In [2]:
%matplotlib inline

from keras.models import Model
from keras.callbacks import TensorBoard, EarlyStopping
from keras.layers import LSTM, Dropout, Dense, BatchNormalization, Activation, Input, TimeDistributed, Masking
from keras.regularizers import l2
from keras.layers.advanced_activations import LeakyReLU, ELU, PReLU
from keras.optimizers import RMSprop, Adam, Adadelta, Adagrad
from keras.preprocessing.sequence import pad_sequences
import numpy as np
from time import time
from matplotlib import pyplot as plt
from sklearn.preprocessing import binarize

from assignment.helpers import datapreparation as prep


# Can make the midi actually play by multiply the notes (0-128, so 1 is basically silence).

# Note on specifying the initial state of RNNs
# Seems as though you can reset state (probably stateful=True), and then pass an array of initial states that can be used
# in the RNN - so initialize based on composer :D
# https://keras.io/layers/recurrent/

fs1_dirpath = "./assignment/datasets/training/piano_roll_fs1"

datasets = prep.load_all_dataset(fs1_dirpath)
dataset_names = prep.load_all_dataset_names(fs1_dirpath)

datasets = [dataset[:, 1:] for dataset in datasets] # Remove the headers

dataset_id_names = dict(zip(np.arange(len(dataset_names)), dataset_names))
longest_song = max(datasets[i].shape[1] for i in range(len(datasets)))
sequence_length = 10
num_batches = longest_song//sequence_length + 1
num_keys = len(datasets[0])
pad_length = num_batches*sequence_length
num_songs = len(datasets)
b_size = 21 # feed one and one to control reset states for stateful, hella slow though :(

def preprocess_for_stateful_with_padding(dataset, num_songs, sequence_length, num_batches, num_keys, pad_length):
    big_af = [[[] for a in range(num_songs)] for b in range(num_batches)]
    songs_padded = pad_sequences(dataset, maxlen=pad_length, padding="post", value=np.array([-1.0 for _ in range(num_keys)]))
    for i in range(num_batches):
        for j in range(num_songs):
            big_af[i][j] = songs_padded[j, i*sequence_length:(i+1)*sequence_length]
    return np.array(big_af)

def preprocess_for_3d(dataset, num_songs, sequence_length, num_batches, num_keys, pad_length):
    big_af = [[] for song in dataset]
    songs_padded = pad_sequences(dataset, maxlen=pad_length, padding="post", value=np.array([-1.0 for _ in range(num_keys)]))
    for i in range(num_songs):
        for j in range(num_batches):
            big_af[i].append([songs_padded[i,j*sequence_length:(j+1)*sequence_length]])
    return np.array(big_af)

datasets = np.array([dataset.T for dataset in datasets])
xs = preprocess_for_stateful_with_padding(datasets, num_songs, sequence_length, num_batches, num_keys, pad_length)
datasets_labels = np.array([np.append(dataset[1:,:], np.array([np.ones(num_keys)]), axis=0) for dataset in datasets])
ys = preprocess_for_stateful_with_padding(datasets_labels, num_songs, sequence_length, num_batches, num_keys, pad_length)
print(xs.shape)
print(ys.shape)
# 84 batches x 43 songs x 10 time_steps x 128 piano_keys



(84, 43, 10, 128)
(84, 43, 10, 128)


In [3]:
inputs = Input(batch_shape=(b_size, num_songs, sequence_length, num_keys))
mask = TimeDistributed(Masking(mask_value=-1.0))(inputs)
# Units = units per timestep LSTM block, i.e. output dimensionality (128 here since input and output 128 keys)
lstm1 = TimeDistributed(LSTM(num_keys,
               activation='relu',
               return_sequences=True,
               stateful=True,
               dropout=0.0, #0.2, #0.25,
               recurrent_dropout=0.0, #0.25,
               kernel_regularizer=None,#l2(0.0001),
               recurrent_regularizer=None, #l2(0.0001),
               bias_regularizer=None,
               activity_regularizer=None,#l2(0.0001),
               ))(inputs)
normalized1 = TimeDistributed(BatchNormalization())(lstm1)
dense1 = TimeDistributed(Dense(num_keys, activation="sigmoid"))(normalized1)
lstm2 = TimeDistributed(LSTM(num_keys,
               activation='relu',
               return_sequences=True,
               stateful=True,
               dropout=0.0, #0.2, #0.25,
               recurrent_dropout=0.0, #0.25,
               kernel_regularizer=None,#l2(0.0001),
               recurrent_regularizer=None, #l2(0.0001),
               bias_regularizer=None,
               activity_regularizer=None,#l2(0.0001),
               ))(dense1)
normalized2 = TimeDistributed(BatchNormalization())(lstm2)
dense2 = TimeDistributed(TimeDistributed(Dense(num_keys, activation="sigmoid")))(normalized2)
outputs = TimeDistributed(Dense(num_keys, activation="sigmoid"))(normalized2) # Sigmoid keeps the probabilities independent of each other, while softmax does not!

model = Model(inputs=inputs, outputs=outputs)

rmsprop = RMSprop(lr=0.001)
adagrad =  Adagrad(lr=0.001)
adam = Adam(lr=0.001, amsgrad=True) #Ends up in a point where gradients really small, denominator really small and then loss exploding
adadelta = Adadelta(lr=1.0)

In [None]:
# Want to penalize each output node independantly. So we pick a binary loss 
# and model the output of the network as a independent bernoulli distributions per label.

model.compile(loss='binary_crossentropy',
              optimizer=adam, # consider changing this one for others
              metrics=['categorical_accuracy'])
print(model.summary())

tensorboard = TensorBoard(log_dir="./logs/{}".format(time()))
early_stop = EarlyStopping(monitor="val_loss", min_delta=0, patience=3, verbose=0, mode="auto")


model.fit(xs, ys, batch_size=b_size, epochs=50, callbacks=[tensorboard])

In [None]:
a = model.predict(xs, verbose=True, batch_size=b_size)
print(model.layers)
#print(model.layers[1].states[0])
#print(model.layers[1].states[0])


In [None]:
# TODO: Fix these
maxes = [np.max(c) for c in a]
plt.hist(maxes)
plt.show()
plt.hist(a[:,:,-1])
plt.show()
b = np.max(a[1][-1])
plt.plot(a[0][-1])
plt.show()
prep.visualize_piano_roll(a[0][0].T, fs=1)
prep.visualize_piano_roll(xs[0][0].T, fs=1)
plt.plot(a[100][-1])
plt.show()
prep.visualize_piano_roll(a[1][0].T, fs=1)
prep.visualize_piano_roll(xs[1][0].T, fs=1)
plt.plot(a[200][-1])
plt.show()
prep.visualize_piano_roll(a[2][0].T, fs=1)
prep.visualize_piano_roll(xs[2][0].T, fs=1)

In [None]:
def make_song_from_predict(model, initial_data, limit):
    song = []
    keep_producing = True
    prev_data = initial_data
    while keep_producing and len(song) < limit:
        #print("input", prev_data)
        predictions = model.predict(np.array([prev_data]))[0]
        #print("output", predictions[-1])
        #plt.plot(predictions[-1])
        #plt.show()
        labels = np.zeros(predictions.shape)
        labels[predictions>0.5] = 1 # Weak activations, want to scale to 0/1
        last_output = labels[-1]
        #print("output scaled:", last_output)
        keep_producing = np.sum(last_output) != len(last_output)
        song.append(last_output)
        prev_data = np.append(prev_data[1:], [last_output], 0)
    return np.array(song)

initial_step = 1250
song = make_song_from_predict(model, test_xs[initial_step], sequence_length)
prep.visualize_piano_roll(song.T, fs=1)
prep.visualize_piano_roll(test_xs[initial_step].T, fs=1)
prep.embed_play_v1(song.T, fs=1)


In [None]:
prep.visualize_piano_roll(test_xs[initial_step+1].T, fs=1)
prep.embed_play_v1(test_xs[initial_step+1].T, fs=1)