# Realizing Velocity Prediction with CNN

This part of the notebook attempts to realize the velocity prediction with CNN as opposed to the LSTM models used in the original [paper](https://arxiv.org/pdf/1708.03535.pdf). 

In [None]:
import os
import mido
import keras
import numpy as np
import sklearn.model_selection as ms
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
set_session(session)

In [None]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

###  Modeling
Now we have represented our data, we would like to see if we can build a model that predicts the velocities through the 3D matrix we generated

In [None]:
X = np.load('matricies/notes.npy')
Y = np.load('matricies/velocities.npy')
labels = np.load('matricies/labels.npy')

In [None]:
X_classical = X[(labels == 1).ravel()]
Y_classical = Y[(labels == 1).ravel()]
# X_classical = X_classical[:,:,:,1]

In [None]:
X_train, X_test, Y_train, Y_test = ms.train_test_split(X_classical, Y_classical, test_size=0.1, random_state=43)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

#### 2.1 Attempt to use a CNN 

The code block below will split the dataset into train and test datasets.

In [None]:
def model1(input_shape):
    X_input = keras.layers.Input(input_shape)
    X_on_off = keras.layers.Lambda(lambda X:X[:,:,:,0])(X_input)
    X_sustain = keras.layers.Lambda(lambda X:X[:,:,:,1])(X_input)
    X = X_sustain
    X = keras.layers.ZeroPadding1D((185, 0))(X)
    X = keras.layers.Conv1D(filters=input_shape[-2], kernel_size=64, dilation_rate=1, name='Conv0',
                            kernel_initializer=keras.initializers.glorot_normal(seed=None),
                            bias_initializer=keras.initializers.glorot_normal(seed=None),
                            data_format="channels_last")(X)
    X = keras.layers.BatchNormalization(axis = 2, name = 'bn0')(X)
    X = keras.layers.Conv1D(filters=input_shape[-2], kernel_size=32, dilation_rate=2, padding='valid', name='Conv1',
                            kernel_initializer=keras.initializers.glorot_normal(seed=None),
                            bias_initializer=keras.initializers.glorot_normal(seed=None),
                            data_format="channels_last")(X)
    X = keras.layers.BatchNormalization(axis = 2, name = 'bn1')(X)
    X = keras.layers.Conv1D(filters=input_shape[-2], kernel_size=16, dilation_rate=4, padding='valid', name='Conv2',
                            kernel_initializer=keras.initializers.glorot_normal(seed=None),
                            bias_initializer=keras.initializers.glorot_normal(seed=None),
                            data_format="channels_last")(X)
    X = keras.layers.BatchNormalization(axis = 2, name = 'bn2')(X)
    X = keras.layers.Activation('relu')(X)
    print(X.shape)
    print(X_on_off.shape)
    X = keras.layers.Multiply()([X, X_on_off])
#     X = keras.layers.Flatten()(X)
    model = keras.models.Model(inputs=X_input, outputs=X, name='basic')
    return model

In [None]:
m1 = model1(input_shape=X_train.shape[1:])
m1.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [None]:
m1.summary()

In [None]:
m1.fit(X_train, Y_train, epochs = 20, batch_size=8) # Run multiple times to train further!

In [None]:
m1.save('classical.h5')

In [None]:
m1 = keras.models.load_model('m1_conv.h5')

In [None]:
preds = m1.evaluate(X_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

In [None]:
Y_0_hat = m1.predict(np.expand_dims(X_test[0], axis=0))
Y_0_hat = Y_0_hat.reshape(-1, X_test.shape[-2])

In [None]:
Y_0_hat.shape

In [None]:
plt.figure(figsize = (200,10))
plt.imshow(Y_0_hat)

In [None]:
plt.figure(figsize = (200,10))
plt.imshow(Y_test[0].reshape(-1, X_test.shape[-2]))

It doesn't look bad!!! IM HAPPY

# Jazz

Now we need to move on to train another velocity generator for jazz. We will be using the same architecture

In [None]:
X_jazz = X[(labels == 0).ravel()]
Y_jazz = Y[(labels == 0).ravel()]
# X_classical = X_classical[:,:,:,1]

In [None]:
X_train, X_test, Y_train, Y_test = ms.train_test_split(X_jazz, Y_jazz, test_size=0.1, random_state=43)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

In [None]:
m2 = model1(input_shape=X_train.shape[1:])
m2.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [None]:
m2.summary()

In [None]:
m2.fit(X_train, Y_train, epochs = 50, batch_size=16) # Run multiple times to train further!

In [None]:
m2.save('jazz.h5')

In [None]:
m2 = keras.models.load_mode('jazz.h5')

In [None]:
Y_0_hat = m1.predict(np.expand_dims(X_test[0], axis=0))
Y_0_hat = Y_0_hat.reshape(-1, X_test.shape[-2])
Y_0_hat.shape
plt.figure(figsize = (200,10))
plt.imshow(Y_0_hat)

In [None]:
plt.figure(figsize = (200,10))
plt.imshow(Y_test[0].reshape(-1, X_test.shape[-2]))

# Generate

Now let us generate some music with this.

In [None]:
target_ticks_per_beat = 8
notes_to_keep_down = 32
def generate(notes_mat, velocity_mat, bpm, seconds):
    new_midi = mido.MidiFile(type=0)
    new_midi.ticks_per_beat = target_ticks_per_beat
    track = mido.MidiTrack()
    new_midi.tracks.append(track)

    track.append(mido.MetaMessage('set_tempo', tempo=mido.bpm2tempo(bpm), time=0))
    track.append(mido.MetaMessage('time_signature', numerator=4, denominator=4, 
                             clocks_per_click=24, notated_32nd_notes_per_beat=8, time=0))
    track.append(mido.MetaMessage('track_name', name='Test Track', time=0))

    T, N, D = notes_mat.shape
    notes_on = [False] * N
    prev_event_t = 0
    Ty = seconds * bpm // 60 * target_ticks_per_beat
    print('generating {0} ticks of music'.format(Ty))
    for t in range(Ty):
        for n in range(N-2):
            if notes_mat[t, n, 0] > 0:
                velocity = velocity_mat[t, n]
                track.append(mido.Message('note_on', note=n + notes_to_keep_down, velocity=velocity, time=t-prev_event_t))
                prev_event_t = t
                notes_on[n] = True
            if notes_on[n] and notes_mat[t, n, 1] == 0:
                track.append(mido.Message('note_on', note=n + notes_to_keep_down, velocity=0, time=t-prev_event_t))
                prev_event_t = t
                notes_on[n] = False
        if notes_mat[t, N-2, 0] == 1:
            track.append(mido.Message('control_change', control=64, value=127, time=t-prev_event_t))
            prev_event_t = t
            notes_on[N-2] = True
        if notes_mat[t, N-1, 0] == 1:
            track.append(mido.Message('control_change', control=67, value=127, time=t-prev_event_t))
            prev_event_t = t
            notes_on[N-2] = True
        if notes_on[N-2] and notes_mat[t, N-2, 1] == 0:
            track.append(mido.Message('control_change', control=64, value=0, time=t-prev_event_t))
            prev_event_t = t
            notes_on[N-2] = False
        if notes_on[N-1] and notes_mat[t, N-1, 1] == 0:
            track.append(mido.Message('control_change', control=67, value=0, time=t-prev_event_t))
            prev_event_t = t
            notes_on[N-1] = False
    return new_midi

In [None]:
new_midi = generate(X_test[0], np.round(Y_0_hat).astype(np.int32), 120, 30)
old_midi = generate(X_test[0], Y_test[0], 120, 30)
new_midi.save('vel_generate.midi')
old_midi.save('vel_original.midi')

# Cross Generate

Now let us see if music play style transfer will work by generating a velocity matrix from a jazz music with the classical music model.


In [None]:
X_classical.shape

In [None]:
Y_0_hat = m2.predict(np.expand_dims(X_classical[100], axis=0))
Y_0_hat = Y_0_hat.reshape(-1, X_test.shape[-2])
Y_0_hat.shape
# plt.figure(figsize = (200,10))
# plt.imshow(Y_0_hat)

In [None]:
# plt.figure(figsize = (200,10))
# plt.imshow(Y_classical[100])

In [None]:
new_midi = generate(X_classical[100], np.round(Y_0_hat).astype(np.int32), 110, 30)
old_midi = generate(X_classical[100], Y_classical[100], 110, 30)
new_midi.save('vel_generate_c2j.midi')
old_midi.save('vel_original_c2j.midi')

In [None]:
Y_0_hat = m1.predict(np.expand_dims(X_jazz[64], axis=0))
Y_0_hat = Y_0_hat.reshape(-1, X_test.shape[-2])
Y_0_hat.shape
new_midi = generate(X_jazz[64], np.round(Y_0_hat).astype(np.int32), 110, 30)
old_midi = generate(X_jazz[64], Y_jazz[64], 110, 30)
new_midi.save('vel_generate_j2c.midi')
old_midi.save('vel_original_j2c.midi')


In [None]:
plt.figure(figsize = (200,10))
plt.imshow(Y_0_hat)

In [None]:
plt.figure(figsize = (200,10))
plt.imshow(Y_jazz[64])