In [1]:
import glob
import pickle
from music21 import instrument, note, stream, chord, converter, duration

MAX_DURATION = 8.0 # 2 bars
NOTE_SEPARATOR = '!'
REST_VALUE = '@'

def get_notes(files):
    notes = []

    for f in files:
        print(f'Parsing \"{f}\"...')
        midi = converter.parse(f)
        
        # get raw midi notes from the first instrument
        notes_to_parse = None
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:
            notes_to_parse = midi.flat.notes
        #end

        num_skipped = 0 # how many notes did we skip for this piece?
        prev_offset = 0.0 # what was the offset of the last set of elements?
        notes_to_dump = [] # accumulation of notes with the same offset
        durations_to_dump = [] # list of durations matching notes_to_dump
        for idx, el in enumerate(notes_to_parse):
            if not isinstance(el, note.Note) and not isinstance(el, chord.Chord) and not isinstance(el, note.Rest):
                # print(f'Skipping element (not a note/chord): {el}')
                num_skipped += 1
                continue
            #end

            # skip zero length notes (alternatively we could give them a minimum duration here instead)
            if 'zero' == el.duration.type:
                # print(f'Skipping note {idx} with zero duration: {el}')
                num_skipped += 1
                continue
            #end
            
            
            # skip lengthy durations
            if el.duration.quarterLength > MAX_DURATION:
                #print(f'Skipped long duration ({el.duration.quarterLength}).')
                num_skipped += 1
                continue
            #end

            # dump notes when required and reset trackers
            if el.offset != prev_offset:
                if len(notes_to_dump): # only dump when there's something to dump - cannot put this in the main IF as we need to run it for the first element to update the offset
                    notes.append(NOTE_SEPARATOR.join(str(n.pitch if isinstance(n, note.Note) else n) + '$' + str(d) for n, d in zip(notes_to_dump, durations_to_dump)))
                    notes_to_dump = []
                    durations_to_dump = []
                #end
                prev_offset = el.offset
            #end

            # append the notes to dump list
            if isinstance(el, note.Note):
                notes_to_dump.append(el)
                durations_to_dump.append(el.duration.quarterLength)
            elif isinstance(el, chord.Chord):
                notes_to_dump.extend(el.notes)
                durations_to_dump.extend([el.duration.quarterLength for n in el.notes])
            elif isinstance(el, note.Rest):
                # dump immediately if no pending notes exist.
                # this is to avoid having rests accompanying notes.
                # if you want this behavior, use `notes_to_dump.append(REST_VALUE)` and nothing more.
                if not len(notes_to_dump):
                    notes.append(f'{REST_VALUE}${el.duration.quarterLength}')
        #end

        if len(notes_to_dump) != 0:
            notes.append(NOTE_SEPARATOR.join(str(n.pitch if isinstance(n, note.Note) else n) + '$' + str(d) for n, d in zip(notes_to_dump, durations_to_dump)))
            notes_to_dump = []
            durations_to_dump = []
        #end

        print(f'Finished parsing. Skipped {num_skipped} notes.')
    #end

    return notes
#end

# TESTING:
# notes_array_to_midi(get_notes(['./data/C_mapleaf.mid']))

In [2]:
import os
if os.path.exists('./raggen-notes.bin'):
    notes = pickle.load(open('./raggen-notes.bin', 'rb'))
else:
    pieces = glob.glob('./data/*.mid')
    notes = get_notes(pieces)
    with open('./raggen-notes.bin', 'wb') as notes_file:
        pickle.dump(notes, notes_file)

In [3]:
from keras.utils import np_utils
import numpy as np

TIMESTEP = 0.5 # 16th notes
SEQ_LEN = int(4 / TIMESTEP) # 8 per bar

num_unique_notes = len(set(notes))
print(f'Number of unique notes: {num_unique_notes}')

# all unique pitches (including rests)
pitch_names = sorted(set(i for i in notes))
# map pitches to integers
note_to_int = dict((note, num) for num, note in enumerate(pitch_names))

data_X = []
data_y = []
for i in range(0, len(notes) - SEQ_LEN, 1):
    seq_in = notes[i:i + SEQ_LEN]
    data_X.append([note_to_int[n] for n in seq_in])

    seq_out = notes[i + SEQ_LEN]
    data_y.append(note_to_int[seq_out])
#end

X = np.reshape(data_X, (len(data_X), SEQ_LEN, 1))
X = X / float(len(notes))
y = np_utils.to_categorical(data_y)

Using TensorFlow backend.


Number of unique notes: 5806


In [4]:
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
import keras.backend as K

K.clear_session()

model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(256))
model.add(Dropout(0.3))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 8, 256)            264192    
_________________________________________________________________
dropout_1 (Dropout)          (None, 8, 256)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 5806)              1492142   
Total params: 2,281,646
Trainable params: 2,281,646
Non-trainable params: 0
_________________________________________________________________


In [5]:
from keras.callbacks import ModelCheckpoint

import matplotlib.pyplot as plt
%matplotlib inline

model_path = './raggen-model.hdf5'
checkpoint = ModelCheckpoint(
    model_path,
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

use_existing_weights = True
if use_existing_weights:
      model.load_weights(model_path)

should_train = False
if should_train:
    history = model.fit(X, y, epochs=50, batch_size=64, callbacks=[checkpoint])

In [6]:
# manually updated each time I run above
total_epochs = 300

plt.plot(history.history['accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title(f'Naive LSTM 1-1 Mapping Accuracy ({total_epochs} epochs total)')
plt.show()

NameError: name 'history' is not defined

In [16]:
# reverse mapping
int_to_note = dict((num, note) for num, note in enumerate(pitch_names))

best_pred_only = False
num_best_preds = 2

# random start point
start_idx = np.random.randint(0, len(data_X)-1)
v1_pattern = data_X[start_idx]
print(f'Starting pattern: {v1_pattern}')
v1_output = []
for idx in range(100 * SEQ_LEN):
#     print(f'Pattern {idx}: {v1_pattern}')
    prediction_input = np.reshape(v1_pattern, (1, len(v1_pattern), 1))
    prediction_input = prediction_input / float(len(notes))
    prediction = model.predict(prediction_input)
    
    if best_pred_only:
        pred_idx = np.argmax(prediction[0])
    else:
        top_5_idx = np.argpartition(prediction[0], -num_best_preds)[-num_best_preds:]
        pred_idx = top_5_idx[np.random.randint(0, len(top_5_idx))]
    
    result = int_to_note[pred_idx]
    v1_output.append(result)

#     print(f'\tPredicted index: {pred_idx}')

    v1_pattern.append(pred_idx)
    v1_pattern = v1_pattern[1:len(v1_pattern)]
#end

Starting pattern: [599, 1751, 3866, 4536, 3062, 2354, 1694, 1385]


In [17]:
v1_pattern

[8, 950, 8, 8, 2786, 3540, 8, 2662]

In [18]:
from music21 import stream, duration, key, meter, note, chord, instrument

def split_note_duration(pattern):
    n, d = pattern.split('$')
    if '/' in d:
        a, b = d.split('/')
        d = float(a) / float(b)
    else:
        d = float(d)
    #end
    return n, d
#end

def notes_array_to_midi(notes_array):
    offset = 0.0
    output_notes = []
    
    for pattern in notes_array:
#         print(pattern)
        # handle chords (i.e. multiple notes split by NOTE_SEPARATOR)
        if NOTE_SEPARATOR in pattern:
            chord_notes = []
            for chord_note in pattern.split(NOTE_SEPARATOR):
                note_name, note_duration = split_note_duration(chord_note)
                new_note = note.Note(note_name)
                new_note.offset = offset
                new_note.storedInstrument = instrument.Piano
                new_note.duration = duration.Duration(note_duration)
                output_notes.append(new_note)
#                 chord_notes.append(new_note)
            #end
            new_chord = chord.Chord(chord_notes)
            new_chord.offset = offset
#             output_notes.append(new_chord)
        #end
        else:
            note_name, note_duration = split_note_duration(pattern)
            # handle rests
            if REST_VALUE == note_name:
                new_rest = note.Rest()
                new_rest.offset = offset
                new_rest.duration = duration.Duration(note_duration)
                output_notes.append(new_rest)
            else:
                new_note = note.Note(note_name)
                new_note.offset = offset
                new_note.duration = duration.Duration(note_duration)
                output_notes.append(new_note)
        #end
        
        offset += TIMESTEP
    #end
    
    midi_stream = stream.Stream(output_notes)
    midi_stream.timeSignature = meter.TimeSignature('2/4')
    midi_stream.keySignature = key.KeySignature(0)
    midi_stream.write('midi', fp='./output.mid')
    return output_notes
#end

s = notes_array_to_midi(v1_output)
print('Done!')

Done!
