In [None]:
import numpy as np
import tensorflow as tf
import os, shutil
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

from MusicRnnData import MusicRnnData

# input parameters
x_len = 80
y_len = 40
batch_size = 32
n_samples = 4096
# LSTM parameters
num_layers = 2
lstm_size = [50, 100]
# training parameters
dropout_prob = 0.2
num_epochs = 100
epoch_size = 4096
verbose = True
display_interval = 500
moving_avg_length = 100

# fix random seed for reproducibility
np.random.seed(7)

## Load data

In [None]:
# filelist = ['a2002011001-e02.wav']
filelist = ['sine.wav']
music_data = MusicRnnData(filelist)
x_train, y_train = music_data.batch(x_len, y_len, n_samples)

# reshape to be (samples, timesteps, features)
x_train = np.reshape(x_train, (n_samples, x_len, 1))
y_train = np.reshape(y_train, (n_samples, y_len))
print(x_train.shape)
print(y_train.shape)

## Build network

In [None]:
def build_model(x_len, lstm_size, dropout_prob):
    model = Sequential()
    model.add(LSTM(units=lstm_size[0], input_shape=(x_len, 1), return_sequences=True))
    model.add(Dropout(dropout_prob))
    model.add(LSTM(lstm_size[1], return_sequences=False))
    model.add(Dropout(dropout_prob))
    model.add(Dense(units=y_len))
    model.add(Activation("linear"))
    model.compile(loss='mean_squared_error', optimizer='rmsprop')
    return model

model = build_model(x_len, lstm_size, dropout_prob)

## Train model

In [None]:
model.fit(x_train, y_train, epochs=num_epochs, batch_size=batch_size, verbose=2)

## Predict on sequence

In [None]:
from __future__ import division

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

display_interval = 1000

original = music_data.tracks[0]
orig_len = original.shape[0]

prediction = original[0:x_len]
num_predictions = int((orig_len-x_len)/y_len)
x_batch = prediction
for i in range(num_predictions):
    feed_pred = np.reshape(x_batch, (1, x_len, 1))
    feed_pred = np.repeat(feed_pred, batch_size, axis=0)
    new_y = model.predict(feed_pred, batch_size=batch_size)[0,:]
    prediction = np.append(prediction, new_y, axis=0)
    x_batch = np.append(x_batch[y_len:], new_y, axis=0)
    
    if (i % display_interval == 0):
        print('Iteration: %g / %g, len(prediction) = %g / %g' % (i, num_predictions, len(prediction), orig_len))

## Plot output

In [None]:
converted_original = music_data.convert_to_wav(original)
converted_prediction = music_data.convert_to_wav(prediction)
pred_len = converted_prediction.shape[0]
x_orig = np.linspace(0, orig_len/music_data.sample_rate, orig_len)
x_conv = np.linspace(0, pred_len/music_data.sample_rate, pred_len)
plt.subplot(211)
plt.plot(x_orig[0:300], converted_original[0:300])
plt.subplot(212)
plt.plot(x_conv[0:300], converted_prediction[0:300])