In [2]:
from mido import MidiFile, MidiTrack, Message
from keras.layers import LSTM, Dense, Activation, Dropout
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.optimizers import RMSprop
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from pathlib import Path
import mido


Using TensorFlow backend.


In [2]:
notes = []

time = float(0)
prev_time = float(0)


music_path = Path('./pop-music-collection/Pop_Music_Midi/')

for fil in music_path.glob('*.midi'):
    mid = MidiFile(fil)
    for msg in mid:
        time += msg.time
        if not msg.is_meta:
            ### only interested in one channel
            if msg.type == 'note_on':  
            # note in vector form to train on
                note = msg.bytes() 
            # only interested in the note and velocity. note message is in the form of [type, note, velocity]
                note = note[1:3]
                note.append(time-prev_time)
                note.append(msg.channel)
                prev_time = time
                notes.append(note)
print(len(notes))
print(time)

21870
2744.0826523281203


In [3]:
t = []
for note in notes:
    note[0] = (note[0]-24)/88
    note[1] = note[1]/127
    t.append(note[2])
    note[3] /=4
max_t = max(t) # scale based on the biggest time of any note
for note in notes:
    note[2] = note[2]/max_t

In [4]:
X = []
Y = []
n_prev = 15
# n_prev notes to predict the (n_prev+1)th note
for i in range(len(notes)-n_prev):
    x = notes[i:i+n_prev]
    y = notes[i+n_prev]
    X.append(x)
    Y.append(y)
# print(Y)
# save a seed to do prediction later
X = np.array(X)
Y = np.array(Y)


[[0.375, 0.8031496062992126, 0.0, 0.25], [0.45454545454545453, 0.6377952755905512, 0.0, 0.5], [0.48863636363636365, 0.6377952755905512, 0.0, 0.5], [0.4090909090909091, 0.6377952755905512, 0.0, 0.5], [0.2727272727272727, 0.6377952755905512, 0.0, 0.75], [0.45454545454545453, 0.6377952755905512, 0.04801200300075019, 0.5], [0.48863636363636365, 0.6377952755905512, 0.0, 0.5], [0.4090909090909091, 0.6377952755905512, 0.0, 0.5], [0.2727272727272727, 0.6377952755905512, 0.0, 0.75], [0.3522727272727273, 0.8031496062992126, 0.04801200300075019, 0.25], [0.3181818181818182, 0.8031496062992126, 0.04801200300075019, 0.25], [0.45454545454545453, 0.6377952755905512, 0.0, 0.5], [0.48863636363636365, 0.6377952755905512, 0.0, 0.5], [0.4090909090909091, 0.6377952755905512, 0.0, 0.5], [0.2727272727272727, 0.6377952755905512, 0.0, 0.75]]


In [12]:
temp = np.random.randint(0,len(notes)-n_prev)
seed = notes[temp:n_prev+temp]
print(seed)

[[0.19318181818181818, 0.6377952755905512, 0.0, 0.75], [0.45454545454545453, 0.8031496062992126, 0.09602400600150038, 0.25], [0.4318181818181818, 0.8031496062992126, 0.04801200300075019, 0.25], [0.4659090909090909, 0.2440944881889764, 0.0, 0.5], [0.375, 0.2440944881889764, 0.0, 0.5], [0.4090909090909091, 0.2440944881889764, 0.0, 0.5], [0.4090909090909091, 0.8031496062992126, 0.04801200300075019, 0.25], [0.19318181818181818, 0.4645669291338583, 0.0, 0.75], [0.4090909090909091, 0.8031496062992126, 0.04801200300075019, 0.25], [0.4090909090909091, 0.6377952755905512, 0.0, 0.5], [0.45454545454545453, 0.6377952755905512, 0.0, 0.5], [0.375, 0.6377952755905512, 0.0, 0.5], [0.23863636363636365, 0.6377952755905512, 0.0, 0.75], [0.3522727272727273, 0.8031496062992126, 0.04801200300075019, 0.25], [0.4318181818181818, 0.8031496062992126, 0.04801200300075019, 0.25]]


In [13]:
print('Build model...')
# del model
model = Sequential()
model.add(LSTM(64, input_shape=(n_prev, 4), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32,return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(4))
model.add(Activation('linear'))

model.compile(loss='mse', optimizer='rmsprop',metrics = ['accuracy'])
model.summary()
model.fit(X, Y, batch_size=300, epochs=20, verbose=1)


Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 15, 64)            17664     
_________________________________________________________________
dropout_3 (Dropout)          (None, 15, 64)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dropout_4 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 132       
_________________________________________________________________
activation_2 (Activation)    (None, 4)                 0         
Total params: 30,212
Trainable params: 30,212
Non-trainable params: 0
_________________________________________________________

<keras.callbacks.History at 0x7f7b984fc588>

In [14]:
prediction = []
x = seed
x = np.expand_dims(x, axis=0)

for i in range(300):
    preds = model.predict(x)
    print (preds)
    x = np.squeeze(x)
    x = np.concatenate((x, preds))
    x = x[1:]
    x = np.expand_dims(x, axis=0)
    preds = np.squeeze(preds)
    prediction.append(preds)

for pred in prediction:
    pred[0] = int(88*pred[0] + 24)
    pred[1] = int(127*pred[1])
    pred[2] *= max_t
    pred[3] = round(pred[3]*4)
    # to reject values that will be out of range
    if pred[0] < 24:
        pred[0] = 24
    elif pred[0] > 102:
        pred[0] = 102
    if pred[1] < 0:
        pred[1] = 0
    elif pred[1] > 127:
        pred[1] = 127
    if pred[2] < 0:
        pred[2] = 0

[[0.4214821  0.5074451  0.02069405 0.41685882]]
[[0.4426972  0.37787592 0.00134451 0.45705417]]
[[ 0.42384055  0.35452577 -0.00406138  0.5151391 ]]
[[0.36096576 0.57173014 0.02299416 0.52801204]]
[[0.35670432 0.65355545 0.0329365  0.5098709 ]]
[[0.4109872  0.631384   0.03485459 0.43546224]]
[[0.4382682  0.59031725 0.02482702 0.4398391 ]]
[[0.4162904  0.54327136 0.00853969 0.5189128 ]]
[[0.38393152 0.56355387 0.00850657 0.5717631 ]]
[[0.37335917 0.6176305  0.02313991 0.538267  ]]
[[0.40547228 0.62139416 0.03477341 0.4567175 ]]
[[0.42534375 0.54812187 0.02506678 0.44903505]]
[[0.4243667  0.4735213  0.01055434 0.49176556]]
[[0.40650153 0.47439736 0.00761961 0.52951276]]
[[0.38568205 0.5514532  0.01726341 0.5300418 ]]
[[0.39479327 0.5931583  0.02446689 0.49848413]]
[[0.4144498  0.57563555 0.02135945 0.48122624]]
[[0.4201085  0.5443222  0.01385935 0.4978528 ]]
[[0.40830204 0.53387654 0.00997069 0.528895  ]]
[[0.39301205 0.5608159  0.01390245 0.53897214]]
[[0.39527097 0.5863498  0.02081754 0

[[0.39709276 0.54845    0.01503329 0.5175283 ]]
[[0.39709282 0.54844993 0.01503328 0.5175283 ]]
[[0.3970928  0.5484499  0.01503328 0.5175283 ]]
[[0.39709276 0.5484499  0.01503327 0.5175284 ]]
[[0.39709276 0.54845    0.01503328 0.51752836]]
[[0.39709276 0.54845    0.01503329 0.5175283 ]]
[[0.3970928  0.54844993 0.01503329 0.5175283 ]]
[[0.39709282 0.5484499  0.01503328 0.5175283 ]]
[[0.39709276 0.5484499  0.01503327 0.5175284 ]]
[[0.39709276 0.54844993 0.01503328 0.51752836]]
[[0.39709276 0.54845    0.01503329 0.5175283 ]]
[[0.39709282 0.54845    0.01503329 0.5175283 ]]
[[0.3970928  0.54844993 0.01503328 0.5175283 ]]
[[0.3970928  0.54844993 0.01503327 0.51752836]]
[[0.39709276 0.54844993 0.01503328 0.51752836]]
[[0.39709276 0.54844993 0.01503328 0.5175283 ]]
[[0.3970928  0.54845    0.01503328 0.5175283 ]]
[[0.3970928  0.54844993 0.01503328 0.5175283 ]]
[[0.39709276 0.5484499  0.01503327 0.51752836]]
[[0.39709276 0.54844993 0.01503328 0.5175283 ]]
[[0.39709276 0.54844993 0.01503328 0.517

In [15]:
mid = MidiFile()
track = MidiTrack()
mid.tracks.append(track)

for note in prediction:
    # 147 means note_on
    channel = note[3]*4
    note = note[:3]
    note = np.insert(note, 0, 147)
    bytes = note.astype(int)
  
    msg = Message.from_bytes(bytes[0:3]) 
    time = int(note[3]/0.001025) # to rescale to midi's delta ticks. arbitrary value for now.
    msg.time = time
    msg.channel = int(channel)
    track.append(msg)
    print (note)

mid.save('new_song.midi')

[1.4700000e+02 6.1000000e+01 6.4000000e+01 1.0775457e-01]
[1.4700000e+02 6.2000000e+01 4.7000000e+01 7.0009218e-03]
[147.  61.  45.   0.]
[1.47000000e+02 5.50000000e+01 7.20000000e+01 1.19731285e-01]
[147.          55.          83.           0.17150138]
[147.          60.          80.           0.18148895]
[1.4700000e+02 6.2000000e+01 7.4000000e+01 1.2927505e-01]
[1.470000e+02 6.000000e+01 6.800000e+01 4.446642e-02]
[1.4700000e+02 5.7000000e+01 7.1000000e+01 4.4293955e-02]
[1.4700000e+02 5.6000000e+01 7.8000000e+01 1.2049024e-01]
[147.          59.          78.           0.18106624]
[1.4700000e+02 6.1000000e+01 6.9000000e+01 1.3052352e-01]
[1.4700000e+02 6.1000000e+01 6.0000000e+01 5.4956786e-02]
[1.4700000e+02 5.9000000e+01 6.0000000e+01 3.9675556e-02]
[1.47000e+02 5.70000e+01 7.00000e+01 8.98911e-02]
[1.4700000e+02 5.8000000e+01 7.5000000e+01 1.2739986e-01]
[1.4700000e+02 6.0000000e+01 7.3000000e+01 1.1121933e-01]
[1.470000e+02 6.000000e+01 6.900000e+01 7.216607e-02]
[1.470000e+02 5.