# 0 - Globals
Let's first define some variables that are required throughout different stages.  Think of this like a configuration section.  Always run this as different sections may need some of them defining.

In [None]:
# Resolution to sample at.  Must be consistent between all stages.
SAMPLE_FREQ = 12
# What is the highest note allowed? 108 is the top of an 88 key piano.  Must be consistent between all stages.
HIGHEST_NOTE = 108

# How far to search for END_KEY when converting back to midi.
END_SEARCH_DIST = 50

# The key that's used for waits.
WAIT_KEY = 'wait'
# The key that's used for ends.
END_KEY = 'end'

# Where are the source midi files located?
MIDI_PATH = './data/midi/'
# OUTPUT path for converted rolls.
ROLL_PATH = './data/roll/'
# Nested rolls array cache.
ROLL_FILE = './notes.bin'

# The width of the sliding window when generating X and y data.
SEQUENCE_LENGTH = 128

# The actual model checkpoint.
MODEL_FILE = './model.hdf5'

# 1 - Conversion
Firstly the source midi files need to be converted into a notewise format.  This is a preprocessing step that results in actual new files that will be used for the training process rather than having to ingest the midi files each time.  If these files already exist and don't need updating, then this should not need to be run.

In [None]:
import math
import datetime
import numpy as np
import music21 as m21

def convert_midi_to_roll(file, sample_freq):
    midi = m21.converter.parse(file)
    
    # Get all note pitches (in midi format), offsets, and durations.
    notes = []
    for el in midi.recurse():
        if isinstance(el, m21.note.Note):
            notes.append((el.pitch.midi, math.floor(el.offset * sample_freq), math.floor(el.duration.quarterLength * sample_freq)))
        #end
        
        if isinstance(el, m21.chord.Chord):
            for pitch in el.pitches:
                notes.append((pitch.midi, math.floor(el.offset * sample_freq), math.floor(el.duration.quarterLength * sample_freq)))
            #end
        #end
    #end
    
    # Create piano roll array.
    max_timestep = math.floor(midi.duration.quarterLength * sample_freq) + 1
    roll_array = np.zeros((max_timestep, HIGHEST_NOTE))
    for note in notes:
        pitch = note[0]
        if pitch < 0 or pitch > HIGHEST_NOTE:
            print(f'Ignoring out of range pitch: {pitch}')
            continue
        #end
        roll_array[note[1], pitch] = 1 # Strike note.
        roll_array[note[1] + 1:note[1] + note[2], pitch] = 2 # Hold note.
    #end
    
    # Convert roll into strings.
    roll_string = []
    for ts in roll_array:
        tmp = ''.join([str(int(n)) for n in ts])
        roll_string.append(f'{tmp}')
    #end
    
    # Convert roll strings into a notewise format.
    notewise = []
    for i in range(len(roll_string)):
        curr = roll_string[i]
        # Find the next matching value (or empty string if at the end).
        nxt = '' if i == (len(roll_string)-1) else roll_string[i+1]
        
        for j in range(len(curr)):
            if curr[j] == '0':
                continue
            #end
            note = str(j)
            if curr[j] == '1':
                notewise.append(note)
            if nxt == '' or nxt[j] == '0':
                notewise.append(f'{END_KEY}{note}')
        #end
        
        notewise.append(WAIT_KEY)
    #end
    
    # Create the final string and merge all of the successive 'wait' values at the same time.
    i=0
    while i < len(notewise):
        wait_count = 1
        if WAIT_KEY == notewise[i]:
            while (wait_count <= sample_freq * 2) and ((i + wait_count) < len(notewise)) and (WAIT_KEY == notewise[i + wait_count]):
                wait_count += 1
            #end
            notewise[i] = WAIT_KEY + str(wait_count)
        #end
        i += wait_count
    #end
    
    # Remove all redundant 'wait' entries.
    notewise = list(filter(lambda x: x != WAIT_KEY, notewise))
    
    return ' '.join(notewise)
#end

def convert_roll_to_midi(roll, sample_freq, output_file):
    speed = 1.0 / sample_freq
    time_offset = 0
    notes = []
    
    for i in range(len(roll)):
        curr = roll[i]
        
        # Skip ends.
        if END_KEY == curr[:3]:
            continue
        #end
        
        # Handle waits.
        if WAIT_KEY == curr[:4]:
            time_offset += int(curr[4:])
            continue
        #end
        
        duration = 1.0
        has_end = False
        curr_len = len(curr)
        # Look ahead to find a matching end.
        for j in range(1, END_SEARCH_DIST):
            if (i+j) == len(roll):
                break
            #end
            
            if WAIT_KEY == roll[i+j][:4]:
                duration += int(roll[i+j][4:])
            #end
            
            if (END_KEY + curr) == roll[i+j][:3 + curr_len] or roll[i+j][:curr_len] == curr:
                has_end = True
                break
            #end
        #end
        
        if not has_end:
            duration = sample_freq
        #end
        
        try:
            new_note = m21.note.Note(int(curr))
            new_note.duration = m21.duration.Duration(duration * speed)
            new_note.offset = time_offset * speed
            notes.append(new_note)
        except:
            print(f'Unknown note: {curr}')
    #end
    
    piano = m21.instrument.fromString('Piano')
    notes.insert(0, piano)
    stream = m21.stream.Stream(notes)
    stream.timeSignature = m21.meter.TimeSignature('3/4')
    stream.tempo = m21.tempo.MetronomeMark(number=80)
    stream.keySignature = m21.key.KeySignature(0)
    
    stream.write('midi', fp=output_file)
    curr_time = datetime.datetime.now().strftime('%H:%M:%S')
    print(f'{output_file} written at {curr_time}')
#end

# Testing!
# convert_roll_to_midi(output, SAMPLE_FREQ, './output.mid')
# convert_roll_to_midi(convert_midi_to_roll('../data/C_mapleaf.mid', SAMPLE_FREQ).split(' '), SAMPLE_FREQ, './output.mid')

In [None]:
import os
import glob
import pickle

# Just load the notes file if it already exists.
if os.path.exists(ROLL_FILE):
    print('Loading existing notes file...', end='')
    with open(ROLL_FILE, 'rb') as file:
        rolls = pickle.load(file)
    #end
    print('Done!')
else:
    # Make roll path if doesn't exist.
    if not os.path.exists(ROLL_PATH):
        os.makedirs(ROLL_PATH)
    #end
    
    # Convert all midi files, save their text to a file, and append their text split by space to a nested list (one sublist per piece).
    rolls = []
    midi_files = glob.glob(MIDI_PATH + '*.mid') #[MIDI_PATH + 'C_mapleaf.mid']#
    for midi_file in midi_files:
        filename = os.path.basename(midi_file)
        print(f'Processing `{filename}`...', end='')

        try:
            with open(ROLL_PATH + filename.split('.')[0] + '.txt', 'w') as file:
                roll = convert_midi_to_roll(midi_file, SAMPLE_FREQ)
                file.write(roll)
                rolls.append(roll.split(' '))
                print('done!')
            #end
        except:
            print('failed!')
    #end
    # Save the combined rolls to a file to save having to process it in the future.
    print('Writing binary pickle...', end='')
    try:
        with open(ROLL_FILE, 'wb') as file:
            pickle.dump(rolls, file)
            print('done!')
        #end
    except:
        print('failed!')
#end

# 2 - Training
Now let's take all of the processed files and create our `X` and `y` data for training.

In [None]:
# Run this if you haven't ran the first stage.
import numpy as np

## Mappings
Generate some mappings to go between unique notes and integers.  The integers are what will be used in the neural network.

In [None]:
# All notes flattened across rolls.
flat_notes = [item for sublist in rolls for item in sublist]

# All unique notes across all flattened rolls.
unique_notes = sorted(set(flat_notes))

# Build two dictionaries.  One maps notes (as strings) to ints, and the other backwards.
# We use the first to convert the rolls into a sequence of integers, and the second to convert back to notes.
note_to_int = dict((note, num) for num, note in enumerate(unique_notes))
int_to_note = dict((num, note) for num, note in enumerate(unique_notes))

## `X` and `y` data
Building the training data using a sliding window.  Since the rolls are a nested list - one for each piece - I'm going to ensure that the sliding window does not go over a boundary (hence the nested lists).  Essentially, I'm creating `X` and `y` from sliding windows over different pieces joined together rather than treating the entire thing as one giant sequence.

In [None]:
from keras.utils import np_utils

data_X = []
data_y = []

# Apply a sliding window per piece but append the same data array.
# This avoids a sliding window overlapping the boundaries between pieces.
for roll in rolls:
    for i in range(0, len(roll) - SEQUENCE_LENGTH):
        # Snip a sequence of our piece as the X data of this window.
        seq_in = roll[i:i + SEQUENCE_LENGTH]
        data_X.append([note_to_int[n] for n in seq_in])
        
        # Take the next note as the y value to predict.
        seq_out = roll[i + SEQUENCE_LENGTH]
        data_y.append(note_to_int[seq_out])
    #end
#end

# Create and shape the final X and y data for the network.
X = np.reshape(data_X, (len(data_X), SEQUENCE_LENGTH, 1))
X = X / float(len(flat_notes))
y = np_utils.to_categorical(data_y)

## Neural network
And now building the model.  It uses stacked LSTMs.

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
import keras.backend as K

# A fresh start for use during debugging.
K.clear_session()

model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Training the model and/or loading existing weights.

In [None]:
from keras.callbacks import ModelCheckpoint

# A checkpoint to save the model.
checkpoint = ModelCheckpoint(
    MODEL_FILE,
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

# Load the model weights if they already exists.
if os.path.exists(MODEL_FILE):
    model.load_weights(MODEL_FILE)
    print('Loaded existing weights...')
#end

# Should the training take place?
should_train = True
# How many epochs to train?
train_epochs = 2
# Batch size per epoch.
batch_size = 32

# Perform the actual training.
if should_train:
    history = model.fit(X, y, epochs=train_epochs, batch_size=batch_size, callbacks=[checkpoint])
#end

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(history.history['accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title(f'Accuracy')
plt.show()

# 3 - Prediction
Time to make some music!  Don't forget to run the training section *without* actually retraining the model first.

In [None]:
# How many of the top predictions to choose from rather than taking the top choice only.
# Setting to 1 always takes the best choice but may result in more repetition.
num_top_preds = 5

# Get a random starting point.
start_idx = np.random.randint(0, len(data_X))
start_pattern = data_X[start_idx]
pattern = data_X[start_idx]
print(f'Start: {start_idx}')

output = []
for idx in range(5 * SEQUENCE_LENGTH):
#     print(f'Input: {[int_to_note[n] for n in pattern]}')
    # Shape the input and make a prediction.
    pred_input = np.reshape(pattern, (1, len(pattern), 1))
    pred_input = pred_input / float(len(flat_notes))
    prediction = model.predict(pred_input)
    
    # Sample one of the best top choices.
    top_predictions = np.argpartition(prediction[0], -num_top_preds)[-num_top_preds:]
    predicted_index = top_predictions[np.random.randint(0, num_top_preds)]
    
#     print(f'\t{[int_to_note[n] for n in top_predictions]}')
    
    # Convert and save the best choice out.
    output.append(int_to_note[predicted_index])
    
    # Feed the data back through for the next prediction.
    pattern.append(predicted_index) # (np.argmax(prediction[0]))
    pattern = pattern[1:len(pattern)]
#end

output

In [None]:
convert_roll_to_midi(output, SAMPLE_FREQ, './output.mid')

In [None]:
convert_roll_to_midi([int_to_note[n] for n in start_pattern], SAMPLE_FREQ, './pattern.mid')

In [None]:
roll = [int_to_note[n] for n in start_pattern] + output
convert_roll_to_midi(roll, SAMPLE_FREQ, './combined.mid')