In [34]:
import os
import itertools
import pickle
import sys
from typing import Iterator, Generator

# import music21
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

In [6]:
# Load data
score_names = np.load('./data/score_names.npy')

In [7]:
scores = [
    (np.load('./data/{}.npy'.format(i)))
    for i in tqdm(range(len(score_names)))
]

100%|██████████| 5930/5930 [00:01<00:00, 3059.49it/s]


In [8]:
max_pitch = np.max([np.max(t) for t in scores])
min_pitch = np.min([np.min(t[t > 0]) for t in scores])
n_notes = int(max_pitch - min_pitch) + 1

In [9]:
def make_targets(score, voice):
    """
    Make our target variables. It is the a stream of notes and one of metadata
    for a specified voice in the score.
    """
    n_output_features = n_notes
    y = np.zeros((score.shape[1], n_output_features))  # shape: n timesteps X m features
    for i, note in enumerate(score[voice]):
        if note > 0:
            note_idx = int(note - min_pitch)
            y[i, note_idx] = 1
    return y

In [10]:
def make_targets_meta(meta, voice):
    """
    Make our target variables. It is the a stream of notes and one of metadata
    for a specified voice in the score.
    """
    y_meta = np.zeros((meta.shape[1], 2)) #  add 2 meta features: slur, rest
    for i in range(meta.shape[1]):
        y_meta[i, idx_rest] = meta[voice, i, idx_rest]
        y_meta[i, idx_slur] = meta[voice, i, idx_slur]
    return y_meta

In [71]:
def make_padded(score, window_size):
    # pad the beginning of the sequence so that our first window ends on the first timestep
    # also padd the voices
    padding_size = window_size - 1
    max_voices = 6
    voices_padding_size = max_voices - score.shape[0]
    voices_padding = np.zeros((voices_padding_size, score.shape[1]))
    voices_padded = np.vstack((score, voices_padding))
    score_padding = np.zeros((max_voices, padding_size))
    return np.hstack((score_padding, voices_padded))


In [12]:
def make_padded_meta(meta, window_size):
    padding_size = window_size - 1
    meta_padding = np.zeros((meta.shape[0], padding_size, meta.shape[2]))
    return np.hstack((meta_padding, meta))

In [85]:
def make_input_sequence(score, voice, window_size=32):
    """
    Make an input sequence for a particular voice
    """
    padded_score = make_padded(score, window_size)
    padding_size = window_size - 1
    indexer = np.arange(window_size)[None, :] + np.arange(padded_score.shape[1] - padding_size)[:, None]
    score_sequence = padded_score.T[indexer, :, None]

    # Now, mask out the target values
    score_sequence[:, -1, voice, :] = 0
    
    return score_sequence.reshape((score.shape[1], 1, window_size, padded_score.shape[0], 1)) / max_pitch

In [74]:
def make_input_sequence_meta(meta, voice, window_size=32):
    padded_meta = make_padded_meta(meta, window_size)
    
    padding_size = window_size - 1
    indexer = np.arange(window_size)[None, :] + np.arange(padded_score.shape[1] - padding_size)[:, None]
    meta_sequence = np.swapaxes(padded_meta, 0, 1)[indexer, :, :]
    
    # Now, mask out the target values
    meta_sequence[:, -1, voice, :] = 0
    
    return meta_sequence

In [75]:
np.random.seed(1)

In [76]:
scores_train, scores_valid = train_test_split(scores, test_size=0.1)

In [77]:
scores_valid[0].shape

(4, 208)

In [78]:
make_input_sequence(scores_valid[0], 0).shape

(208, 1, 32, 6, 1)

In [79]:
train_gen = itertools.cycle(
    (make_input_sequence(score, voice), make_targets(score, voice))
    for score in scores_train
    for voice in range(score.shape[0])
)

In [80]:
valid_gen = itertools.cycle(
    (make_input_sequence(score, voice), make_targets(score, voice))
    for score in scores_valid
    for voice in range(score.shape[0])
)

In [82]:
next(train_gen)[0].shape

(432, 1, 32, 6, 1)

In [144]:
from keras.models import Sequential, Model
from keras.layers import BatchNormalization, Conv2D, TimeDistributed, Input, Activation, Flatten, LSTM, ConvLSTM2D, Dense, Dropout, MaxPool2D, GlobalAveragePooling2D, GlobalAveragePooling3D
from keras.activations import relu
import keras.callbacks

In [91]:
window_size = 32
n_features = 58

In [151]:
# model=Sequential()
# model.add(ConvLSTM2D(32, 3, padding='same', activation='relu', return_sequences=True, input_shape=(1, window_size, None, 1)))
# model.add(ConvLSTM2D(64, 3, strides=2, padding='same', activation='relu'))
# # model.add(Dropout(0.2))
# model.add(GlobalAveragePooling2D())
# model.add(Dense(100, activation='relu'))
# model.add(Dense(n_features, activation='softmax'))
# model.summary()


#### MODEL 1

# model = Sequential()
# model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu'), input_shape=(1, window_size, 6, 1)))
# model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
# model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
# model.add(TimeDistributed(Dropout(0.1)))
# model.add(TimeDistributed(GlobalAveragePooling2D()))
# model.add(LSTM(128, activation='relu'))
# model.add(Dense(200, activation='relu'))
# model.add(Dropout(0.1))

# model.add(Dense(n_features, activation='softmax'))

#### MODEL 2
model = Sequential()
model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu'), input_shape=(1, window_size, 6, 1)))
model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
model.add(TimeDistributed(Conv2D(32, 3, padding='same', activation='relu')))
model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
model.add(TimeDistributed(Conv2D(64, 3, padding='same', activation='relu')))
model.add(TimeDistributed(MaxPool2D(2, 2, padding='same')))
model.add(TimeDistributed(Conv2D(128, 3, padding='same', activation='relu')))
model.add(TimeDistributed(Dropout(0.1)))
model.add(TimeDistributed(GlobalAveragePooling2D()))
model.add(LSTM(128, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(n_features, activation='softmax'))


model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_218 (TimeDi (None, 1, 32, 6, 32)      320       
_________________________________________________________________
time_distributed_219 (TimeDi (None, 1, 16, 3, 32)      0         
_________________________________________________________________
time_distributed_220 (TimeDi (None, 1, 16, 3, 32)      9248      
_________________________________________________________________
time_distributed_221 (TimeDi (None, 1, 8, 2, 32)       0         
_________________________________________________________________
time_distributed_222 (TimeDi (None, 1, 8, 2, 64)       18496     
_________________________________________________________________
time_distributed_223 (TimeDi (None, 1, 4, 1, 64)       0         
_________________________________________________________________
time_distributed_224 (TimeDi (None, 1, 4, 1, 128)      73856     
__________

In [152]:
model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [153]:
checkpointer = keras.callbacks.ModelCheckpoint(filepath='./models/model2.hdf5', verbose=1, save_best_only=True)

In [154]:
steps_per_epoch = np.sum(score.shape[0] for score in scores_train)

In [155]:
validation_steps = np.sum(score.shape[0] for score in scores_valid)

In [156]:
model.load_weights('./models/model2.hdf5')

OSError: Unable to open file (unable to open file: name = './models/model2.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [157]:
history = model.fit_generator(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=5,
    validation_data=valid_gen,
    validation_steps=validation_steps,
    callbacks=[checkpointer]
)

Epoch 1/5
Epoch 2/5
 2494/25419 [=>............................] - ETA: 9:20 - loss: 1.0918 - acc: 0.4839

KeyboardInterrupt: 