First let's do all of the imports. We use music21 for Computer-Aided Musical Analysis and Computational Musicology. 
Glob to read all the training files. Some LSTM models and layers from keras, and some standard liberaries like numpy and tqdm.

In [18]:
from music21 import *
import glob
from tqdm import tqdm
import numpy as np
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split

We're gonna read files repeatedly so let's a create a function that reads a single MIDI file. Function `extract_notes_from_piano` gets the piano notes from a MIDI file using a helper function called `_parse_notes_from_part`.

In [19]:
def extract_notes_from_piano(file_path):
    notes = []

    midi = converter.parse(file_path)
    instr_stream = instrument.partitionByInstrument(midi)

    for part in instr_stream.parts:
        if 'Piano' in str(part):
            notes.extend(_parse_notes_from_part(part))

    return notes

def _parse_notes_from_part(part):
    notes = []
    notes_to_parse = part.recurse()

    for element in notes_to_parse:
        if isinstance(element, note.Note):
            notes.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            notes.append('.'.join(str(n) for n in element.normalOrder))

    return notes

Retrieving the files that will create our training set.

In [20]:
def get_unique_notes_from_directory(directory):
    all_files = glob.glob(f'All Midi Files/{directory}/*.mid', recursive=True)

    # Extract notes from each MIDI file and store in an array
    notes_array = [extract_notes_from_piano(file) for file in tqdm(all_files, position=0, leave=True)]

    # Flatten the notes_array and get unique notes
    flat_notes = [note for notes in notes_array for note in notes]
    unique_notes = list(set(flat_notes))
    notess = sum(notes_array,[]) 

    return unique_notes, notess, notes_array

file_path = "schumann"
unique_notes, notess, notes_array = get_unique_notes_from_directory(file_path)
print("Unique Notes:", len(unique_notes))

100%|██████████| 24/24 [00:10<00:00,  2.19it/s]

Unique Notes: 247





Create some utility variables for training and inferance.

In [21]:
threshold = 50
timesteps = 50

# The frequency (count) of each note
# We will use these values to filter the note set in order to make training faster
freq = {note: notess.count(note) for note in unique_notes}

# Get how many notes have frequency of more than 30, more than 50, etc
print("\nFrequency notes")
for i in range(30, 100, 20):
    threshold_notes = {note: count for note, count in freq.items() if count >= i}
    print(i, ":", len(threshold_notes))

# Remove notes with frequency greater than threshold, e.g., 50
freq_notes = {note: count for note, count in freq.items() if count >= threshold}

# Create new notes using the frequent notes
new_notes = [[note for note in notes if note in freq_notes] for notes in notes_array]

# Dictionary with key as note index and value as note
ind2note = dict(enumerate(freq_notes.keys()))

# Dictionary with key as note and value as note index
note2ind = {note: index for index, note in ind2note.items()}


Frequency notes
30 : 121
50 : 97
70 : 79
90 : 64


Create the input and output sets.

In [22]:
x, y = [], []

for notes in new_notes:
    for j in range(0, len(notes) - timesteps):
        # Input will be the current index + timestep
        # Output will be the next index after timestep
        # To create a seq2seq input output chain
        inp = notes[j: j + timesteps]
        out = notes[j + timesteps]

        # Append the index value of respective notes
        x.append([note2ind[note] for note in inp])
        y.append(note2ind[out])

x_new = np.array(x)
y_new = np.array(y)

Split the training and testing sets.

In [23]:
x_new = np.reshape(x_new, (len(x_new), timesteps, 1))
y_new = np.reshape(y_new, (-1, 1))

x_train, x_test, y_train, y_test = train_test_split(x_new, y_new, test_size = 0.2, random_state = 42)

Building the model. We will use a two stacked LSTM layer with the latent dimension of 256 with 2 dropout layers between them. Finally we will the last dropout layer to a fully connected layer of dimension 256 and finally connect it to an output layer of dimension `len(note2ind)`.

In [24]:
def build_model():
    model = Sequential()

    model.add(LSTM(256, return_sequences=True, input_shape=(x_new.shape[1], x_new.shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(256))
    model.add(Dropout(0.2))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(len(note2ind), activation='softmax'))

    return model

model = build_model()

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 50, 256)           264192    
                                                                 
 dropout_2 (Dropout)         (None, 50, 256)           0         
                                                                 
 lstm_3 (LSTM)               (None, 256)               525312    
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 256)               65792     
                                                                 
 dense_3 (Dense)             (None, 97)                24929     
                                                                 
Total params: 880,225
Trainable params: 880,225
Non-tr

Training the model. We use `sparse_categorical_crossentropy` because we have 97 class labels.

In [25]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(
    x_train, y_train,
    batch_size=128, epochs=60, 
    validation_data=(x_test, y_test)
)

model.save("seq2seq")

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60




INFO:tensorflow:Assets written to: seq2seq\assets


INFO:tensorflow:Assets written to: seq2seq\assets


Inference. Let's create some music. We will pick a random note from the test set to be our starting input sequence then we will predict the next note, append it to our current sequence and repeat.

In [28]:
num_of_notes = 200

# Get some random data from x_test to be the start
# of the output sequence 
music_pattern = x_test[np.random.randint(0, len(x_test) - 1)]

out_pred = []  # It will store predicted notes

for _ in range(num_of_notes):
    music_pattern = music_pattern.reshape(1, len(music_pattern), 1)

    # Get the maximum probability value from the predicted output
    pred_index = np.argmax(model.predict(music_pattern))

    # Get the note using the predicted index and append to the output prediction list
    out_pred.append(ind2note[pred_index])

    # Update the music pattern with one timestep ahead
    music_pattern = np.append(music_pattern, pred_index)
    music_pattern = music_pattern[1:]



 Saving the file

In [29]:
output_notes = []

for offset, pattern in enumerate(out_pred):
    # If pattern is a chord instance
    if ('.' in pattern) or pattern.isdigit():
        # Split notes from the chord
        notes_in_chord = pattern.split('.')
        notes = []

        for current_note in notes_in_chord:
            i_curr_note = int(current_note)
            # Create a Note object for the current note and append it
            new_note = note.Note(i_curr_note)
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)

        # Create a Chord object for the chord
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)

    else:
        # Create a Note object for the single note, apply the offset, and append it
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)

# Save the MIDI file
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp='output.mid')

'output.mid'