In [1]:
import numpy as np
import glob
import tensorflow as tf

from music21 import converter, instrument, note, chord, stream

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras import utils


import pickle
import os
notes = []

if os.path.exists("data/notes"):
    with open("data/notes", "rb") as notes:
        notes = pickle.load(notes)
else:
    for file in glob.glob("midi_songs/*.mid"):
        # create a stream object
        midi = converter.parse(file)
        notes_to_parse = None

        # get all the notes and chords in the file
        parts = instrument.partitionByInstrument(midi)
        stream.Stream().show()
        # Has instrument parts
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:
            notes_to_parse = midi.flat.notes
        # if parts:
        #     notes_to_parse = parts.parts[0].recurse()
        # # File has notes in flat
        # else:
        #     notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))

    with open("data/notes", "wb") as data_file:
        pickle.dump(notes, data_file)


In [2]:

n_vocab = len(set(notes))

# Mapping from note and chord (str) to number

# all pitches name
pitch_names = sorted(set(item for item in notes))

# create a dictionary to map pitches to integers
note_to_int = dict((note, number) for number, note in enumerate(pitch_names))
# Create input sequences and corresponding outputs for network
sequence_len = 100

network_input = []
network_output = []

for i in range(0, len(notes) - sequence_len, 1):
    sequence_input = notes[i:i+sequence_len]
    sequence_output = notes[i+sequence_len]
    network_input.append([note_to_int[note] for note in sequence_input])
    network_output.append(note_to_int[sequence_output])


n_patterns = len(network_input)

# reshape input to working with LSTM
network_input = np.reshape(network_input, (n_patterns, sequence_len, 1))

# Normalize inout
network_input = network_input / float(n_vocab)

# Convert input to categorical
network_output = utils.to_categorical(network_output)

network_input.shape

(5399, 100, 1)

In [3]:
# Build Model

model = Sequential((
    LSTM(units=512,
         input_shape=(network_input.shape[1], network_input.shape[2]),
         recurrent_dropout=.3, return_sequences=True),
    LSTM(units=512,
         return_sequences=True, recurrent_dropout=.3),
    LSTM(512),
    BatchNormalization(),
    Dropout(.3),
    Dense(256),
    Activation("relu"),
    BatchNormalization(),
    Dropout(.3),
    Dense(n_vocab),
    Activation("softmax")
))

model.compile(optimizer='rmsprop', loss="categorical_crossentropy")

In [4]:
file_path = "model_checkpoint-epoch-%d.ckpt"
checkpoint = ModelCheckpoint(file_path, monitor="loss", save_best_only=True)

model.fit(network_input, network_output, epochs=200, batch_size=128, callbacks=[checkpoint])

Epoch 1/200
 2/43 [>.............................] - ETA: 6:06 - loss: 5.7176 

KeyboardInterrupt: 

In [None]:
# Generate
rate = .3
model = Sequential((
    LSTM(512, input_shape=(network_input.shape[1], network_input.shape[2]), return_sequences=True),
    Dropout(rate),
    LSTM(512, return_sequences=True),
    Dropout(rate),
    LSTM(512),
    Dense(256),
    Dropout(rate),
    Dense(n_vocab),
    Activation(activation="softmax"),
))

model.compile(loss="categorical_crossentropy", optimizer="adam")

checkpoint_dir = ""
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

In [None]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """
    Generate notes from trained network
    :param model:
    :param network_input:
    :param pitchnames:
    :param n_vocab:
    :return:
    """
    start = np.random.randint(0, len(network_input) - 1)

    int_to_note = dict((index, note) for index, note in enumerate(pitchnames))

    pattern = network_input[start]
    predict_output = []

    for note_index in range(500):
        predict_input = np.reshape(pattern, (1, len(pattern), 1))
        predict_input = predict_input / float(n_vocab)

        prediction = model.predict(predict_input)

        index = np.argmax(prediction)
        result = int_to_note[index]
        predict_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return predict_output


In [None]:
def create_midi(prediction_output):
    """
    Generate midi from prediction notes
    :param prediction_output:
    :return:
    """

    offset = 0
    output_notes = []

    for pattern in prediction_output:
        if ("." in pattern) or pattern.isditigt():
            notes_in_chord = pattern.split('.')

            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storeInstrument = instrument.Piano()
                notes.append(new_note)

            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)

        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storeInstrument = instrument.Piano()
            output_notes.append(new_note)

        offset += .5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp="midi_output.mid")

