#### README

Resources:
1. Tutorial on TimeDistributed layer: [machinelearningmastery blog](https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/)
2. Hints on [StackOverflow](https://stackoverflow.com/questions/47671732/keras-input-a-3-channel-image-into-lstm) about how to deal with image-LSTMs.

#### Imports and user variables

In [13]:
import os, shutil
import random
import sys
import numpy as np
import pypianoroll
from matplotlib import pyplot as plt
import cPickle as pickle
import pianoroll_utils
import IPython
import h5py

from keras.layers import Input, Dense, Conv2D, Conv2DTranspose, BatchNormalization, MaxPooling2D, UpSampling2D
from keras.models import Model, Sequential
from keras.callbacks import TensorBoard
from keras.models import load_model
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.layers import Activation, LSTM, RNN, Concatenate, concatenate, Dropout
from keras import optimizers
from keras.layers import TimeDistributed, Flatten, Reshape

from __future__ import print_function
from keras.models import Sequential
from keras import layers
from six.moves import range

NUM_FILES = 1000
WINDOW_LENGTH = 4
SEQ_FILE = './pickle_jar/seq_{}_songs_clipped96.h5'.format(NUM_FILES)

# Music shape
MIN_PITCH = 13 # C-1
MAX_PITCH = 108 # C7 (MIDI 108)
BEATS_PER_UNIT = 4
NUM_TRANSPOSITIONS = 3 # Number of transpositions to perform (maximum 12)

# Don't change unless you know what you're doing
BEAT_RESOLUTION = 24 # This is set by the encoding of the lpd-5 dataset, corresponds to number of ticks per beat
PARTITION_NOTE = 60 # Break into left- and right-accompaniments at middle C
NUM_PITCHES = MAX_PITCH - MIN_PITCH + 1
NUM_TICKS = BEATS_PER_UNIT * BEAT_RESOLUTION

#### Load dataset

In [2]:
from keras.utils import Sequence

class TimeseriesGeneratorTwoInputs(Sequence):
    """Keras data reading class
    Utility class for generating batches of temporal data.
    This class takes in a sequence of data-points gathered at
    equal intervals, along with time series parameters such as
    stride, length of history, etc., to produce batches for
    training/validation.
    
    Adapted from the TimeseriesGenerator class:
    https://github.com/keras-team/keras/blob/master/keras/utils/data_utils.py#L302
    """

    def __init__(self, data1, data2, targets, length,
                 sampling_rate=1,
                 stride=1,
                 start_index=0,
                 end_index=None,
                 shuffle=False,
                 reverse=False,
                 batch_size=128):
        self.data1 = data1
        self.data2 = data2
        assert len(data1) == len(data2)
        self.targets = targets
        self.length = length
        self.sampling_rate = sampling_rate
        self.stride = stride
        self.start_index = start_index + length
        if end_index is None:
            end_index = len(data1) - 1
        self.end_index = end_index
        self.shuffle = shuffle
        self.reverse = reverse
        self.batch_size = batch_size

        if self.start_index > self.end_index:
            raise ValueError('`start_index+length=%i > end_index=%i` '
                             'is disallowed, as no part of the sequence '
                             'would be left to be used as current step.'
                             % (self.start_index, self.end_index))

    def __len__(self):
        return int(np.ceil(
            (self.end_index - self.start_index + 1) /
            (self.batch_size * self.stride)))

    def _empty_batch(self, num_rows):
        samples_shape = [num_rows, self.length // self.sampling_rate]
        samples_shape.extend(self.data1.shape[1:])
        targets_shape = [num_rows]
        targets_shape.extend(self.targets.shape[1:])
        return np.empty(samples_shape), np.empty(samples_shape), np.empty(targets_shape)

    def __getitem__(self, index):
        if self.shuffle:
            rows = np.random.randint(
                self.start_index, self.end_index + 1, size=self.batch_size)
        else:
            i = self.start_index + self.batch_size * self.stride * index
            rows = np.arange(i, min(i + self.batch_size *
                                    self.stride, self.end_index + 1), self.stride)

        samples1, samples2, targets = self._empty_batch(len(rows))
        for j, row in enumerate(rows):
            indices = range(rows[j] - self.length, rows[j], self.sampling_rate)
            samples1[j] = self.data1[indices]
            samples2[j] = self.data2[indices]
            targets[j] = self.targets[rows[j]]
        if self.reverse:
            return [samples1[:, ::-1, ...], samples2[:, ::-1, ...]], targets
        return [samples1, samples2], targets

In [3]:
# Load dataset
f = h5py.File(SEQ_FILE, 'r')
seq_units_input = f['input_test']
seq_units_comp = f['comp_test']
seq_embed_input = f['input_embed_test']
seq_embed_comp = f['comp_embed_test']
data_gen_train = TimeseriesGeneratorTwoInputs(f['input_embed_train'], f['comp_embed_train'], f['comp_embed_train'], 
                                              length=WINDOW_LENGTH, sampling_rate=1, batch_size=128)
data_gen_test = TimeseriesGeneratorTwoInputs(f['input_embed_test'], f['comp_embed_test'], f['comp_embed_test'], 
                                              length=WINDOW_LENGTH, sampling_rate=1, batch_size=128)
print(seq_units_input.shape)
print(seq_units_comp.shape)
print(seq_embed_input.shape)
print(seq_embed_comp.shape)
print(len(data_gen_train))
print(len(data_gen_test))

IOError: Unable to open file (unable to open file: name = './pickle_jar/seq_1000_songs_clipped96.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

## V1: Direct LSTM

This model skips any convolutional steps, diving straight into LSTM on whole units.

In [18]:
# data_dim = latent_dim # Our model predicts embeddings, so our data is the size of the embedding
recurrent_dim = 800
dense_hidden_dim = 1000

# Inputs
input1 = Input(shape=(WINDOW_LENGTH, NUM_PITCHES, NUM_TICKS, 1))
input2 = Input(shape=(WINDOW_LENGTH, NUM_PITCHES, NUM_TICKS, 1))
# For each time slice
timeDist1 = TimeDistributed(Flatten())(input1)
timeDist2 = TimeDistributed(Flatten())(input2)
# LSTM
lstm1 = LSTM(recurrent_dim)(timeDist1)
lstm2 = LSTM(recurrent_dim)(timeDist2)
# Dense
merged = concatenate([lstm1, lstm2])
hidden = Dense(dense_hidden_dim, activation='relu')(merged)
# Output
flat_output = Dense(NUM_PITCHES*NUM_TICKS, activation='sigmoid')(hidden)
output = Reshape((NUM_PITCHES, NUM_TICKS, 1))(flat_output)

model = Model(inputs=[input1, input2], outputs=output)
model.compile(loss='mse', optimizer='rmsprop') #, metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_20 (InputLayer)           (None, 4, 96, 96, 1) 0                                            
__________________________________________________________________________________________________
input_21 (InputLayer)           (None, 4, 96, 96, 1) 0                                            
__________________________________________________________________________________________________
time_distributed_15 (TimeDistri (None, 4, 9216)      0           input_20[0][0]                   
__________________________________________________________________________________________________
time_distributed_16 (TimeDistri (None, 4, 9216)      0           input_21[0][0]                   
__________________________________________________________________________________________________
lstm_16 (L

## V2: CVAE + LSTM

This model builds directly off of VAE_V7 and RLSTM_V5, simply joining them together to train the whole system at once. Let's see how it goes.