In [3]:
import os
import functools
import pickle
import sys
from typing import Iterator, Generator

# import music21
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

In [4]:
# Load data
score_names = np.load('./data/score_names.npy')

In [5]:
scores = [
    (np.load('./data/{}.npy'.format(i)))
    for i in tqdm(range(len(score_names)))
]

100%|██████████| 5930/5930 [00:09<00:00, 637.33it/s]


In [6]:
max_pitch = np.max([np.max(t) for t in scores])
min_pitch = np.min([np.min(t[t > 0]) for t in scores])
n_notes = int(max_pitch - min_pitch) + 1

In [283]:
def make_targets(score, voice):
    """
    Make our target variables. It is a stream of notes and rests for one voice.
    """
    n_output_features = n_notes + 1  # all possible notes plus rest
    y = np.zeros((score.shape[1], n_output_features))  # shape: n timesteps X m features
    for i, note in enumerate(score[voice]):
        if note > 0:
            note_idx = int(note - min_pitch) + 1
            y[i, note_idx] = 1
        else:
            y[i, 0] = 1
    return y

In [284]:
def make_targets_meta(meta, voice):
    """
    Make our target variables. It is the a stream of notes and one of metadata
    for a specified voice in the score.
    """
    y_meta = np.zeros((meta.shape[1], 2)) #  add 2 meta features: slur, rest
    for i in range(meta.shape[1]):
        y_meta[i, idx_rest] = meta[voice, i, idx_rest]
        y_meta[i, idx_slur] = meta[voice, i, idx_slur]
    return y_meta

In [285]:
def make_padded(score, window_size, max_voices=6):
    # pad the beginning of the sequence so that our first window ends on the first timestep
    # also padd the voices
    padding_size = window_size - 1
    if max_voices is not None:

        voices_padding_size = max_voices - score.shape[0]
        voices_padding = np.zeros((voices_padding_size, score.shape[1]))
        score = np.vstack((score, voices_padding))
        score_padding = np.zeros((max_voices, padding_size))
    else:
        score_padding = np.zeros((score.shape[0], padding_size))
    return np.hstack((score_padding, score))


In [286]:
def make_padded_meta(meta, window_size):
    padding_size = window_size - 1
    meta_padding = np.zeros((meta.shape[0], padding_size, meta.shape[2]))
    return np.hstack((meta_padding, meta))

In [287]:
def make_input_sequence(score, voice, sequence_steps=16, conv_window_size=32):
    """
    Make an input sequence for a particular voice
    """
    window_size = sequence_steps * conv_window_size
    padded_score = make_padded(score, window_size)
    padding_size = window_size - 1
    indexer = np.arange(window_size)[None, :] + np.arange(padded_score.shape[1] - padding_size)[:, None]
    score_sequence = padded_score.T[indexer, :, None]

    # Now, mask out the target values
    score_sequence[:, -1, voice, :] = 0
    
    return score_sequence.reshape((score.shape[1], -1, conv_window_size, padded_score.shape[0], 1)) / max_pitch

In [288]:
def make_input_sequence_meta(meta, voice, sequence_steps=16, conv_window_size=32):
    window_size = sequence_steps * conv_window_size
    padded_meta = make_padded_meta(meta, window_size)
    
    padding_size = window_size - 1
    indexer = np.arange(window_size)[None, :] + np.arange(padded_score.shape[1] - padding_size)[:, None]
    meta_sequence = np.swapaxes(padded_meta, 0, 1)[indexer, :, :]
    
    # Now, mask out the target values
    meta_sequence[:, -1, voice, :] = 0
    
    return meta_sequence

In [289]:
np.random.seed(25)

In [290]:
scores_train, scores_valid = train_test_split(scores, test_size=0.1)

In [291]:
scores_valid[0].shape

(5, 880)

In [292]:
def cycle(iterable):
    while True:
        for i in iterable:
            yield i

In [316]:
it = [1,2,3]
c = cycle(it)

In [293]:
# hyperparameters
SEQUENCE_STEPS = 4
window_size = 32
n_features = 58

In [319]:
train_gen = cycle(
    (make_input_sequence(score, voice, sequence_steps=SEQUENCE_STEPS), make_targets(score, voice))
    for score in scores_train
    for voice in range(score.shape[0])
)

In [295]:
y = np.zeros((880, 59))

In [296]:
max_pitch

88.0

In [297]:
np.maximum(scores_valid[0][0] - min_pitch, 0).astype(int).reshape(-1, 1).shape

(880, 1)

In [298]:
y[np.maximum(scores_valid[0][0] - min_pitch, 0).astype(int), :] = 1

In [299]:
y.shape

(880, 59)

In [301]:
valid_gen = cycle(
    (make_input_sequence(score, voice, sequence_steps=SEQUENCE_STEPS), make_targets(score, voice))
    for score in scores_valid
    for voice in range(score.shape[0])
)

In [302]:
next(train_gen)[0].shape

(2160, 4, 32, 6, 1)

In [303]:
from keras.models import Sequential, Model
from keras.layers import BatchNormalization, Conv2D, TimeDistributed, Input, Activation, Flatten, LSTM, CuDNNLSTM, ConvLSTM2D, Dense, Dropout, MaxPool2D, GlobalAveragePooling2D, GlobalAveragePooling3D
from keras.activations import relu
import keras.callbacks

In [304]:
# # model 0
# model=Sequential()
# model.add(ConvLSTM2D(32, 3, padding='same', activation='relu', return_sequences=True, input_shape=(None, window_size, None, 1)))
# model.add(ConvLSTM2D(64, 3, strides=2, padding='same', activation='relu'))
# # model.add(Dropout(0.2))
# model.add(GlobalAveragePooling2D())
# model.add(Dense(100, activation='relu'))
# model.add(Dense(n_features, activation='softmax'))
# model.summary()


#### MODEL 1

# model = Sequential()
# model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu'), input_shape=(3, window_size, 6, 1)))
# model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Dropout(0.1)))
# model.add(TimeDistributed(GlobalAveragePooling2D()))
# model.add(LSTM(128, activation='relu'))
# model.add(Dense(200, activation='relu'))
# model.add(Dropout(0.1))

# model.add(Dense(n_features, activation='softmax'))

### MODEL 2
# model = Sequential()
# model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu'), input_shape=(3, window_size, 6, 1)))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Conv2D(128, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(Dropout(0.1)))
# model.add(TimeDistributed(GlobalAveragePooling2D()))
# model.add(LSTM(128))
# model.add(Activation('relu'))
# model.add(Dense(200, activation='relu'))
# model.add(Dropout(0.1))

# model.add(Dense(n_features, activation='softmax'))


#### MODEL 3

# model = Sequential()
# model.add(TimeDistributed(Conv2D(32, 3, padding='same'), input_shape=(3, window_size, 6, 1)))
# model.add(TimeDistributed(BatchNormalization()))
# model.add(TimeDistributed(Activation('relu')))
# model.add(TimeDistributed(Conv2D(32, 3, padding='same')))
# model.add(TimeDistributed(BatchNormalization()))
# model.add(TimeDistributed(Activation('relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same')))
# model.add(TimeDistributed(BatchNormalization()))
# model.add(TimeDistributed(Activation('relu')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same')))
# model.add(TimeDistributed(BatchNormalization()))
# model.add(TimeDistributed(Activation('relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(GlobalAveragePooling2D()))
# model.add(Dropout(0.1))
# model.add(LSTM(128, activation='relu'))
# model.add(Dense(200, activation='relu'))

# model.add(Dense(n_features, activation='softmax'))


# MODEL 4
# model=Sequential()
# model.add(ConvLSTM2D(32, 3, padding='same', activation='relu', return_sequences=True, input_shape=(3, window_size, None, 1)))
# model.add(ConvLSTM2D(64, 3, strides=2, padding='same', activation='relu'))
# model.add(GlobalAveragePooling2D())
# model.add(Dense(200, activation='relu'))
# model.add(Dropout(0.1))
# model.add(Dense(n_features, activation='softmax'))
# model.summary()

### MODEL 5
model = Sequential()
model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu'), input_shape=(SEQUENCE_STEPS, window_size, 6, 1)))
model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu')))
model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
model.add(TimeDistributed(Conv2D(128, 3, padding='same', activation='relu')))
model.add(TimeDistributed(Dropout(0.1)))
model.add(TimeDistributed(GlobalAveragePooling2D()))
model.add(LSTM(128, return_sequences=True, unroll=True, dropout=0.1))
model.add(LSTM(128, dropout=0.1, unroll=True))
# model.add(Dense(200, activation='relu'))
model.add(Dense(n_features + 1, activation='softmax'))


model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_80 (TimeDis (None, 4, 32, 6, 32)      320       
_________________________________________________________________
time_distributed_81 (TimeDis (None, 4, 32, 6, 32)      9248      
_________________________________________________________________
time_distributed_82 (TimeDis (None, 4, 16, 3, 32)      0         
_________________________________________________________________
time_distributed_83 (TimeDis (None, 4, 16, 3, 64)      18496     
_________________________________________________________________
time_distributed_84 (TimeDis (None, 4, 8, 2, 64)       0         
_________________________________________________________________
time_distributed_85 (TimeDis (None, 4, 8, 2, 128)      73856     
_________________________________________________________________
time_distributed_86 (TimeDis (None, 4, 8, 2, 128)      0         
__________

In [305]:
top3_acc = functools.partial(keras.metrics.top_k_categorical_accuracy, k=3)

top3_acc.__name__ = 'top3_acc'



In [306]:
model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy', top3_acc])

In [307]:
checkpointer = keras.callbacks.ModelCheckpoint(filepath='./models/model5.hdf5', verbose=1, save_best_only=True)

In [308]:
steps_per_epoch = np.sum(score.shape[0] for score in scores_train)
steps_per_epoch

25420

In [309]:
validation_steps = np.sum(score.shape[0] for score in scores_valid)
validation_steps

2835

In [310]:
try:
    model.load_weights('./models/model5.hdf5')
except OSError:
    print('no weights found')

no weights found


In [324]:
history = model.fit_generator(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=5,
    validation_data=valid_gen,
    validation_steps=500,
    callbacks=[checkpointer]
)

Epoch 1/5
  427/25420 [..............................] - ETA: 1:18:38 - loss: 1.1695 - acc: 0.6444 - top3_acc: 0.866

KeyboardInterrupt: 

In [None]:
model.evaluate_generator(valid_gen, steps=validation_steps)

In [315]:
import keras


In [None]:
keras.