In [1]:
import os
import functools
import pickle
import sys
from typing import Iterator, Generator

# import music21
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

In [2]:
# Load data
score_names = np.load('./data/score_names.npy')

In [3]:
scores = [
    (np.load('./data/{}.npy'.format(i)))
    for i in tqdm(range(len(score_names)))
]

100%|██████████| 5930/5930 [00:48<00:00, 121.91it/s]


In [4]:
meta = [
    (np.load('./data/{}_meta.npy'.format(i)))
    for i in tqdm(range(len(score_names)))
]

100%|██████████| 5930/5930 [09:24<00:00, 10.50it/s]


In [5]:
max_pitch = np.max([np.max(t) for t in scores])
min_pitch = np.min([np.min(t[t > 0]) for t in scores])
n_notes = int(max_pitch - min_pitch) + 1
idx_slur = 0
idx_beat = 2

In [6]:
# hyperparameters
SEQUENCE_STEPS = 4  # The number of windows to look at; 4 * 32 means we look at an 8-bar window
window_size = 32  # each measure is 16, so 32 is a two-measure window
n_features = n_notes + 1

In [7]:
def make_targets(score, voice):
    """
    Make our target variables. It is the a stream of notes and one of metadata
    for a specified voice in the score.
    """
    n_output_features = n_notes + 1
    y = np.zeros((score.shape[1], n_output_features))  # shape: n timesteps X m features
    for i, note in enumerate(score[voice]):
        if note > 0:
            note_idx = int(note - min_pitch)
            y[i, note_idx + 1] = 1
        else:
            y[i, 0] = 1 # it's a rest
    return y

In [8]:
def make_targets_slur(meta, voice):
    """
    Make our target variables. It is the a stream of notes and one of metadata
    for a specified voice in the score.
    """
    return meta[voice, :, idx_slur]

In [9]:
def make_padded(score, window_size, max_voices=None):
    # pad the beginning of the sequence so that our first window ends on the first timestep
    # also padd the voices
    padding_size = window_size - 1

    score_padding = np.zeros((score.shape[0], padding_size))
    return np.hstack((score_padding, score))


In [10]:
def make_input_beat(meta, voice):
    return meta[voice, :, idx_beat:]

In [28]:
def make_input_sequence(score, meta, voice, sequence_steps=16, conv_window_size=32):
    """
    Make an input sequence for a particular voice
    """
    window_size = sequence_steps * conv_window_size
    # First, do the notes channel
    padded_score = make_padded(score, window_size) / max_pitch
    padding_size = window_size - 1
    
    # Now, the slurs channel
    padded_meta = make_padded(meta[:, :, 0], window_size)
    
    # A mask showing which voice to predict
    voice_mask = np.zeros(padded_meta.shape)
    
    # Stack them together
    indexer = np.arange(window_size)[None, :] + np.arange(padded_score.shape[1] - padding_size)[:, None]
    stacked = np.stack((padded_score, padded_meta, voice_mask), axis=-1)
    
    # Make the sliding windows
    sequence = stacked.swapaxes(0, 1)[indexer, :, :]
    
    # Now, mask out the target values
    sequence[:, -1, voice, :2] = 0
    
    # Set a flag in the voice mask to indicate which voice is to be predicted
    sequence[:, -1, voice, 2] = 1
    
    return sequence.reshape((score.shape[1], -1, conv_window_size, padded_score.shape[0], 3))

In [29]:
np.random.seed(25)

In [30]:
scores_train, scores_valid, meta_train, meta_valid = train_test_split(scores, meta, test_size=0.1)

In [31]:
make_input_sequence(scores_valid[0], meta_valid[0], 0).shape

(880, 16, 32, 5, 3)

In [32]:
from keras.utils import Sequence

In [33]:
class BatchSequence(Sequence):
    def __init__(self, scores, meta, subsample_voices=False):
        self.scores = scores
        self.meta = meta
        if subsample_voices:
            # Take one randomly sampled voice for each score
            voice_sample = [
                np.random.randint(score.shape[0]) 
                for score in scores
            ]
            self.indices = [
                (score_idx, voice_sample[score_idx])
                for score_idx, score in enumerate(scores)
            ]
        else:
            self.indices = [
                (score_idx, voice_idx)
                for score_idx, score in enumerate(scores)
                for voice_idx in range(score.shape[0])
            ]
                    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        score_idx, voice = self.indices[idx]
        score = self.scores[score_idx]
        meta = self.meta[score_idx]
        return (
            [
                make_input_sequence(score, meta, voice, sequence_steps=SEQUENCE_STEPS, conv_window_size=window_size),
                make_input_beat(meta, voice)
            ],
            [
                make_targets(score, voice),
                make_targets_slur(meta, voice)
            ]
        )

In [17]:
valid_sequence = BatchSequence(scores_valid, meta_valid, subsample_voices=True)

In [18]:
train_sequence = BatchSequence(scores_train, meta_train)

In [19]:
from keras.models import Sequential, Model
from keras import layers
from keras.activations import relu
import keras.callbacks

In [44]:
notes_model = Sequential()
notes_model.add(layers.ConvLSTM2D(32, 3, return_sequences=True, padding='same', input_shape=(None, window_size, None, 3)))
notes_model.add(layers.MaxPool3D(2, 2))
notes_model.add(layers.ConvLSTM2D(64, 3, padding='same'))
notes_model.add(layers.GlobalAveragePooling2D())

beats_input = layers.Input(shape=(16,))

features = layers.concatenate([notes_model.output, beats_input])
dropout1 = layers.Dropout(0.2)(features)

fc_1 = layers.Dense(100, activation='relu')(dropout1)
dropout2 = layers.Dropout(0.2)(fc_1)

output_notes = layers.Dense(n_notes + 1, activation='softmax')(dropout2)
output_slur = layers.Dense(1, activation='sigmoid')(fc_1)

model = Model(inputs=[notes_model.input, beats_input], outputs=[output_notes, output_slur])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
conv_lst_m2d_5_input (InputLaye (None, None, 32, Non 0                                            
__________________________________________________________________________________________________
conv_lst_m2d_5 (ConvLSTM2D)     (None, None, 32, Non 40448       conv_lst_m2d_5_input[0][0]       
__________________________________________________________________________________________________
max_pooling3d_3 (MaxPooling3D)  (None, None, 16, Non 0           conv_lst_m2d_5[0][0]             
__________________________________________________________________________________________________
conv_lst_m2d_6 (ConvLSTM2D)     (None, 16, None, 64) 221440      max_pooling3d_3[0][0]            
__________________________________________________________________________________________________
global_ave

In [45]:
top3_acc = functools.partial(keras.metrics.top_k_categorical_accuracy, k=3)

top3_acc.__name__ = 'top3_acc'


In [46]:
model.compile('adam', loss=['categorical_crossentropy', 'binary_crossentropy'], metrics=['accuracy', top3_acc])

In [47]:
checkpointer = keras.callbacks.ModelCheckpoint(filepath='./models/model6.hdf5', verbose=1, save_best_only=True)

In [48]:
steps_per_epoch = len(train_sequence)
steps_per_epoch

25420

In [49]:
validation_steps = len(valid_sequence)
validation_steps

593

In [50]:
try:
    model.load_weights('./models/model6.hdf5')
except (OSError, ValueError):
    print('no compatible weights found')

In [40]:
history = model.fit_generator(
    train_sequence,
    steps_per_epoch=len(train_sequence),
    epochs=5,
    validation_data=valid_sequence,
    validation_steps=len(valid_sequence),
    callbacks=[checkpointer],
    use_multiprocessing=True,
    initial_epoch=2
)

Epoch 3/5
Epoch 4/5
Epoch 5/5

Process ForkPoolWorker-28:
Process ForkPoolWorker-24:
Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Process ForkPoolWorker-25:
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/multiprocessing/pool.py", line 

KeyboardInterrupt: 

In [41]:
%time pred = model.evaluate_generator(valid_sequence, steps=250, use_multiprocessing=True)

CPU times: user 9.24 s, sys: 1.5 s, total: 10.7 s
Wall time: 27.9 s


In [None]:
history2 = model.fit_generator(
    train_sequence,
    steps_per_epoch=len(train_sequence),
    epochs=5,
    validation_data=valid_sequence,
    validation_steps=len(valid_sequence),
    callbacks=[checkpointer],
    use_multiprocessing=True,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [None]:
model.save_weights('./models/model6.hdf5')

In [None]:
notes_pred = np.argmax(pred[0][0], axis=1)

In [None]:
notes = np.argmax(valid_sequence[0][1][0], axis=1)

In [None]:
notes + (notes > 1) * (min_pitch - 1)

In [None]:
scores_valid[0][2] - (notes + (notes > 1) * (min_pitch - 1))