In [2]:
%matplotlib inline


from keras.models import Model
from keras import backend as K
from keras.callbacks import TensorBoard, EarlyStopping
from keras.layers import LSTM, Dropout, Dense, BatchNormalization, Activation, Input, TimeDistributed
from keras.regularizers import l2
from keras.layers.advanced_activations import LeakyReLU, ELU, PReLU
from keras.optimizers import RMSprop, Adam, Adadelta, Adagrad
from keras.utils import to_categorical
import numpy as np
from time import time
from matplotlib import pyplot as plt

from assignment.helpers import datapreparation as prep



fs1_dirpath = "./assignment/datasets/training/piano_roll_fs1"
fs5_dirpath = "./assignment/datasets/training/piano_roll_fs5"


# Load initial data
datasets = prep.load_all_dataset(fs5_dirpath)
dataset_names = prep.load_all_dataset_names(fs5_dirpath)
unique_names = set()
for name in dataset_names: # Make sure the same names get the same encoding each run
    unique_names.add(name)
unique_names = list(unique_names)
name_to_int = dict([(unique_names[i], i) for i in range(len(unique_names))])
int_to_name = dict([(i, unique_names[i]) for i in range(len(unique_names))])
dataset_names = to_categorical([name_to_int[name] for name in dataset_names]) # one-hot encode the composers
datasets = [dataset[:, 1:] for dataset in datasets] # Remove the headers

# Setting initial parameters
dataset_id_names = dict(zip(np.arange(len(dataset_names)), dataset_names))
longest_song = max(datasets[i].shape[1] for i in range(len(datasets)))
sequence_length = 20
length = longest_song//sequence_length + 1
num_keys = len(datasets[0])
parts_per_song = int(longest_song/sequence_length)
composer_encoding_len=len(dataset_names[0]) # 4 composers

# Makes several datasets from this first one with differing intervals between to capture the "gaps" between two sequences
# Add each subsequence of each song with differing offsets ([0:10], [1:11], [2:12], ...) to retain information.
# Unable to implement stateful, so try to retain as much information between subsequences as possible. 
# Also a way of dataset augmentation (regularization) by increasing the size of the dataset

def transpose_and_label_more(dataset_names, datasets, num_keys):
    zs = []
    datasets_transposed = np.array([(datasets[i].T, dataset_names[i]) for i in range(len(datasets))])
    for song, composer in datasets_transposed:
        for offset in range(0, sequence_length, 5):
            for i in range(0, len(song)//sequence_length-offset):
                x = song[offset+i*sequence_length:offset+(i+1)*sequence_length]
                if i == len(song)//sequence_length - 1: # Add the EOF marker if last seq of song
                    y = np.append(song[offset+i*sequence_length+1:offset+(i+1)*sequence_length], np.array([np.ones(num_keys)]), 0)
                else:
                    y = song[offset+i*sequence_length+1:offset+(i+1)*sequence_length+1]
                zs.append((x, y, composer))
    np.random.shuffle(zs)
    xs, ys, composers = [], [], []
    for x, y, composer in zs:
        xs.append(x)
        ys.append(y)
        composers.append(composer)
    return np.array(xs), np.array(ys), np.array(composers)

train_xs, train_ys, train_composers = transpose_and_label_more(dataset_names, datasets, num_keys)
test_xs = train_xs[int(len(train_xs)*0.8):]
train_xs = train_xs[:int(len(train_xs)*0.8)]
test_ys = train_ys[int(len(train_ys)*0.8):]
train_ys = train_ys[:int(len(train_ys)*0.8)]
test_composers = train_composers[int(len(train_composers)*0.8):]
train_composers = train_composers[:int(len(train_composers)*0.8)]

Using TensorFlow backend.


In [3]:
specialist_input = Input(shape=(composer_encoding_len,))
x = Dense(64, activation="relu")(specialist_input) # Change the activation function within as the sigmoid quickly saturates due to shape. It's ok as output though.
x = Dropout(0.5)(x)
specialist_output_c = Dense(num_keys, activation="relu")(x)
specialist_output_h = Dense(num_keys, activation="relu")(x)
#zero_input = Dropout(1)(specialist_output) # Hax to privde 0 as initial h.

inputs = Input(shape=(sequence_length, num_keys))

# Units = units per timestep LSTM block, i.e. output dimensionality (128 here since input and output 128 keys)
lstm1 = LSTM(num_keys,
               activation='relu',
               return_sequences=True,
               name="lstm1")
lstm1_outputs = lstm1(inputs, initial_state=[specialist_output_h, specialist_output_c]) # [h = prev output, c = memory], h should be None
normalized1 = BatchNormalization()(lstm1_outputs)
dense1 = Dense(num_keys, activation="sigmoid")(normalized1)

lstm2 = LSTM(num_keys,
               activation='relu',
               return_sequences=True)(dense1)
normalized2 = BatchNormalization()(lstm2)
dense2 = Dense(num_keys, activation="sigmoid")(normalized2)

outputs = TimeDistributed(Dense(num_keys, activation="sigmoid"))(normalized2) # Sigmoid keeps the probabilities independent of each other, while softmax does not!

model = Model([inputs, specialist_input], outputs)

adam = Adam(lr=0.001, amsgrad=True) 
# Ends up in a point where gradients really small, denominator really small and then loss exploding
# v_t is based on the gradients at the current time step, and previous v_t, thus when gradient really small as well as v_t-1
# the update denominator (sqrt(v_t) + epsilon) is so small that explodes.
# AMSGrad maintains the maximum of all v_t until the present time step and uses this maximum value for normalizing
# the running average of the gradient instead of the current v_t as is done in regular Adam.

#model.save("./models/latest.h5f")

In [4]:
# Want to penalize each output node independantly. 
# Log Loss aka multi-class multi-label as sigmoid -> binary CE, as want probs to be considered independent of each other.
# Combo of sigmoid and crossentropy here log counteracts exp to reduce the saturation :)
model.compile(loss='binary_crossentropy',
              optimizer=adam, # consider changing this one for others
              metrics=['categorical_accuracy'])
print(model.summary())

tensorboard = TensorBoard(log_dir="./logs/{}".format(time()))
early_stop = EarlyStopping(monitor="val_loss", min_delta=0, patience=5, verbose=0, mode="auto")

print(train_xs.shape, train_composers.shape, train_ys.shape)
model.fit([train_xs, train_composers], train_ys,
          epochs=50, # Train harder more for more things was too bad train man :(
          batch_size=32,
          shuffle=True, # shuffle here but not when constructing set to be able to validate later on :)
          callbacks=[tensorboard, early_stop],
          validation_data=([test_xs, test_composers], test_ys),
          )

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 4)            0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 64)           320         input_1[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 64)           0           dense_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 20, 128)      0                                            
__________________________________________________________________________________________________
dense_3 (D

InternalError: Blas GEMM launch failed : a.shape=(32, 4), b.shape=(4, 64), m=32, n=64, k=4
	 [[{{node dense_1/MatMul}} = MatMul[T=DT_FLOAT, _class=["loc:@training/Adam/gradients/dense_1/MatMul_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_input_1_0_1/_205, dense_1/kernel/read)]]
	 [[{{node metrics/categorical_accuracy/Mean/_275}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4701_metrics/categorical_accuracy/Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

In [None]:
model.save("./models/nostate_20_offset_5_specialist.h5f")

In [None]:
a = model.predict([train_xs, train_composers], verbose=True)
maxes = [np.max(c) for c in a]
plt.hist(maxes)
plt.show()
plt.hist(a[:,-1])
plt.show()
b = np.max(a[1][-1])
plt.plot(a[1][-1])
plt.show()
prep.visualize_piano_roll(a[0].T, fs=5)
prep.visualize_piano_roll(train_xs[0].T, fs=5)
plt.plot(a[100][-1])
plt.show()
prep.visualize_piano_roll(a[100].T, fs=5)
prep.visualize_piano_roll(train_xs[100].T, fs=5)
plt.plot(a[200][-1])
plt.show()
prep.visualize_piano_roll(a[200].T, fs=5)
prep.visualize_piano_roll(train_xs[200].T, fs=5)

In [None]:
def make_song_from_predict(model, initial_data, composer, limit):
    song = []
    keep_producing = True
    prev_data = initial_data
    while keep_producing and len(song) < limit:
        predictions = model.predict([np.array([prev_data]), composer])[0]
        labels = np.zeros(predictions.shape)
        labels[predictions>0.4] = 1 # Threshold to consider the key as active, binarized based on this
        last_output = labels[-1]
        keep_producing = np.sum(last_output) != len(last_output)
        song.append(last_output)
        prev_data = np.append(prev_data[1:], [last_output], 0)
    return np.array(song)

initial_step = 250
steps = 10
song1 = make_song_from_predict(model, train_xs[initial_step], np.array([[1.0, 0.0, 0.0, 0.0]]), sequence_length*steps)
song2 = make_song_from_predict(model, train_xs[initial_step], np.array([[0.0, 1.0, 0.0, 0.0]]), sequence_length*steps)
song3 = make_song_from_predict(model, train_xs[initial_step], np.array([[0.0, 0.0, 1.0, 0.0]]), sequence_length*steps)
song4 = make_song_from_predict(model, train_xs[initial_step], np.array([[0.0, 0.0, 0.0, 1.0]]), sequence_length*steps)
prep.embed_play_v1(song1.T, fs=5)


In [None]:
prep.embed_play_v1(song2.T, fs=5)

In [None]:
prep.embed_play_v1(song3.T, fs=5)

In [None]:
prep.embed_play_v1(song4.T, fs=5)

In [None]:
actual = train_xs[initial_step+1]
print(train_composers[initial_step])
print(int_to_name[list(train_composers[initial_step]).index(1)])
for i in range(1, steps):
    actual = np.append(actual, train_xs[initial_step+1+i], axis=0)
actual = actual.T
prep.visualize_piano_roll(song1.T, fs=5)
prep.visualize_piano_roll(song2.T, fs=5)
prep.visualize_piano_roll(song3.T, fs=5)
prep.visualize_piano_roll(song4.T, fs=5)
prep.visualize_piano_roll(actual, fs=5)
prep.embed_play_v1(actual, fs=5)

In [None]:
from keras.models import load_model
# Restore the model and construct the encoder and decoder.
model = load_model("./models/nostate_20_offset_5_specialist.h5f")
for layer in model.layers:
    print(layer)
print(model.summary())
a = model.evaluate([train_xs, train_composers], train_ys,
          batch_size=32)
print(a)