In [1]:
import numpy as np
import os
import pretty_midi
import data

import keras
from keras import layers

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
mdg = data.MidiDataGenerator("clean_midi", 500)
g = mdg.flow(50)

In [3]:
batches = []
for i in range(20):
    batches.append(next(g))
    print("batches so far: {}".format(len(batches)))
#batches = np.concatenate(batches)

len:1349




len:753
len:1716
len:626
len:2558
len:613
error on clean_midi/Nirvana/Polly.1.mid data byte must be in range 0..127
len:1386
error on clean_midi/Bush/Glycerine.1.mid Could not decode key with 1 flats and mode 255
len:826
len:1268
error on clean_midi/Rascel/Arrividerci Roma.mid data byte must be in range 0..127
error on clean_midi/Celine Dion/Where Does My Heart Beat Now.mid Could not decode key with 2 flats and mode 255
len:2862
len:3926
len:1850
error on clean_midi/Billy Joel/Pressure.mid data byte must be in range 0..127
len:1319
error on clean_midi/Jimi Hendrix/Purple Haze.2.mid data byte must be in range 0..127
len:687
error on clean_midi/Rednex/The Ultimate Rednex Mega Mix, Part 3: Cotton Eye Joe.mid data byte must be in range 0..127
len:3244
error on clean_midi/Clayderman Richard/Ballade Pour Adeline.mid data byte must be in range 0..127
error on clean_midi/Jennifer Lopez/If You Had My Love.mid MThd not found. Probably not a MIDI file
len:1190
len:1235
len:1403
error on clean_mid

In [26]:
x_train_octaves = []
x_train_notes = []
x_train_velocities = []
x_train_durations = []
x_train_rests = []

for batch in batches:
    x_train_octaves.extend(batch[0])
    x_train_notes.extend(batch[1])
    x_train_velocities.extend(batch[2])
    x_train_durations.extend(batch[3])
    x_train_rests.extend(batch[4])

x_train_octaves = np.array(x_train_octaves)
x_train_notes = np.array(x_train_notes)
x_train_velocities = np.array(x_train_velocities)
x_train_durations = np.array(x_train_durations)
x_train_rests = np.array(x_train_rests)

In [30]:
def create_model(stateful=False, batch_size=None, seq_len=None):
    return_sequences = not stateful
    
    reg = keras.regularizers.l2(1e-6)
    
    if not stateful:
        x_input_octaves = layers.Input((None, 8))
        x_input_notes = layers.Input((None, 12))
        x_input_velocities = layers.Input((None, 1))
        x_input_durations = layers.Input((None, 1))
        x_input_rests = layers.Input((None, 1))
    else:
        x_input_octaves = layers.Input(batch_shape=(batch_size, 1, 8))
        x_input_notes = layers.Input(batch_shape=(batch_size, 1, 12))
        x_input_velocities = layers.Input(batch_shape=(batch_size, 1, 1))
        x_input_durations = layers.Input(batch_shape=(batch_size, 1, 1))
        x_input_rests = layers.Input(batch_shape=(batch_size, 1, 1))
    
    x_input_velocities_bn = layers.BatchNormalization()(x_input_velocities)
    x_input_durations_bn = layers.BatchNormalization()(x_input_durations)
    x_input_rests_bn = layers.BatchNormalization()(x_input_rests)
    
    
    x = layers.Concatenate(2)([x_input_octaves, x_input_notes, x_input_velocities_bn, x_input_durations_bn, x_input_rests_bn])
    
    x = layers.CuDNNLSTM(256, unit_forget_bias=True, kernel_regularizer=reg, recurrent_regularizer=reg,
                         activity_regularizer=reg, return_sequences=True, stateful=stateful)(x)
    x = layers.CuDNNLSTM(256, unit_forget_bias=True, kernel_regularizer=reg, recurrent_regularizer=reg,
                         activity_regularizer=reg, return_sequences=True, stateful=stateful)(x)
    x = layers.CuDNNLSTM(256, unit_forget_bias=True, kernel_regularizer=reg, recurrent_regularizer=reg,
                         activity_regularizer=reg, return_sequences=True, stateful=stateful)(x)
    
    x_octaves = layers.TimeDistributed(layers.Dense(8, kernel_regularizer=reg))(x)
    x_notes = layers.TimeDistributed(layers.Dense(12, kernel_regularizer=reg))(x)
    x_velocities = layers.TimeDistributed(layers.Dense(1, kernel_regularizer=reg))(x)
    x_durations = layers.TimeDistributed(layers.Dense(1, kernel_regularizer=reg))(x)
    x_rests = layers.TimeDistributed(layers.Dense(1, kernel_regularizer=reg))(x)

    x_octaves = layers.Softmax(axis=2)(x_octaves)
    x_notes = layers.Softmax(axis=2)(x_notes)
    
    def cut_last(x):
        return x[:, :-1]
    
    if not stateful:
        x_octaves = layers.Lambda(cut_last, name="octaves_out")(x_octaves)
        x_notes = layers.Lambda(cut_last, name="notes_out")(x_notes)
        x_velocities = layers.Lambda(cut_last, name="velocities_out")(x_velocities)
        x_durations = layers.Lambda(cut_last, name="durations_out")(x_durations)
        x_rests = layers.Lambda(cut_last, name="rests_out")(x_rests)
    
    model = keras.models.Model([x_input_octaves, x_input_notes, x_input_velocities, x_input_durations, x_input_rests],
                               [x_octaves, x_notes, x_velocities, x_durations, x_rests])
    return model

In [33]:
model = create_model()
model.summary()

opt = keras.optimizers.Adam(1e-3)
model.compile(opt, ['categorical_crossentropy', 'categorical_crossentropy', 'mse', 'mse', 'mse'],
              {'octaves_out': 'acc', 'notes_out': 'acc'},
             loss_weights=[1, 1, 5e-4, 5, 5])

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_23 (InputLayer)           (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_24 (InputLayer)           (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_25 (InputLayer)           (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_21 (InputLayer)           (None, None, 8)      0                                            
__________________________________________________________________________________________________
input_22 (

In [34]:
model.fit(x=[x_train_octaves, x_train_notes, x_train_velocities, x_train_durations, x_train_rests],
          y=[x_train_octaves[:, 1:], x_train_notes[:, 1:], x_train_velocities[:, 1:], x_train_durations[:, 1:], x_train_rests[:, 1:]],
          epochs=1000, validation_split=0.2)

Epoch 541/1000
Epoch 542/1000
Epoch 543/1000
Epoch 544/1000
Epoch 545/1000
Epoch 546/1000
Epoch 547/1000
Epoch 548/1000
Epoch 549/1000
Epoch 550/1000
Epoch 551/1000
Epoch 552/1000
Epoch 553/1000
Epoch 554/1000
Epoch 555/1000
Epoch 556/1000
Epoch 557/1000
Epoch 558/1000
Epoch 559/1000
Epoch 560/1000
Epoch 561/1000
Epoch 562/1000
Epoch 563/1000
Epoch 564/1000
Epoch 565/1000
Epoch 566/1000
Epoch 567/1000
Epoch 568/1000
Epoch 569/1000
Epoch 570/1000
Epoch 571/1000
Epoch 572/1000
Epoch 573/1000
Epoch 574/1000
Epoch 575/1000
Epoch 576/1000
Epoch 577/1000
Epoch 578/1000
Epoch 579/1000
Epoch 580/1000
Epoch 581/1000
Epoch 582/1000
Epoch 583/1000
Epoch 584/1000
Epoch 585/1000
Epoch 586/1000
Epoch 587/1000
Epoch 588/1000
Epoch 589/1000
Epoch 590/1000
Epoch 591/1000
Epoch 592/1000
Epoch 593/1000
Epoch 594/1000
Epoch 595/1000
Epoch 596/1000
Epoch 597/1000
Epoch 598/1000
Epoch 599/1000
Epoch 600/1000
Epoch 601/1000


KeyboardInterrupt: 

In [22]:
model = keras.models.load_model('first.h5')

In [35]:
predict_model = create_model(True, 1)
predict_model.set_weights(model.get_weights())

In [40]:
sample = False

song = [[4, 6, 50, 0.25, 0]]

for i in range(500):
    last_out = [np.zeros((1, 1, 8)), np.zeros((1, 1, 12)), np.zeros((1, 1, 1)), np.zeros((1, 1, 1)), np.zeros((1, 1, 1))]
    last_out[0][0, 0, song[-1][0]] = 1
    last_out[1][0, 0, song[-1][1]] = 1
    last_out[2][0, 0, 0] = song[-1][2]
    last_out[3][0, 0, 0] = song[-1][3]
    last_out[4][0, 0, 0] = song[-1][4]
    
    output = predict_model.predict(last_out)
    
    octaves = output[0][0, 0]
    notes = output[1][0, 0]
    
    octave_selection = np.random.random()
    note_selection = np.random.random()
    
    
    if sample:
        octave = len(octaves) - 1
        note = len(notes) - 1

        for i, p in enumerate(octaves):
            octave_selection -= p
            if octave_selection <= 0:
                octave = i
                break

        for i, p in enumerate(notes):
            note_selection -= p
            if note_selection <= 0:
                note = i
                break
    else:
        octave = np.argmax(octaves)
        note = np.argmax(notes)
    
    velocity = int(output[2][0, 0, 0])
    duration = output[3][0, 0, 0]
    rest = output[4][0, 0, 0]
    
    song.append([octave, note, velocity, duration, rest])
predict_model.reset_states()

In [41]:
midi = pretty_midi.PrettyMIDI()
instrument = pretty_midi.Instrument(program=0)

time = 0

for i, (octave, note, velocity, duration, rest) in enumerate(song):
    pm_note = pretty_midi.Note(velocity, octave * 12 + note + 21, time, time + duration)
    time += duration + rest
    instrument.notes.append(pm_note)

midi.instruments.append(instrument)
midi.write('out.mid')