In [8]:
import numpy as np
import tensorflow as tf
import os, shutil

from MusicRnnData import MusicRnnData

# input parameters
x_len = 10
y_len = 1
batch_size = 1
# LSTM parameters
num_layers = 1
lstm_size = 8
hidden_size = 8
# training parameters
dropout_prob = 0.5
learning_rate = 1e-1
num_steps = 50000
verbose = True
display_interval = 500
moving_avg_length = 100

# fix random seed for reproducibility
np.random.seed(7)

## Helper functions

In [2]:
from collections import deque

class MovingAverager(object):
    def __init__(self, filter_length):
        self.filter = deque([0 for _ in range(filter_length)])
        
    def insert(self, num):
        self.filter.popleft()
        self.filter.append(num)
        
    def average(self):
        return sum(self.filter)/float(len(self.filter))
    
def build_lstm_stack(num_layers, lstm_size, dropout_prob=1.0):
    def lstm_layer(lstm_size, dropout_prob=1.0):
        lstm_layer = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        return tf.contrib.rnn.DropoutWrapper(lstm_layer, output_keep_prob=dropout_prob)
    
    lstm_stack = [lstm_layer(lstm_size, dropout_prob=dropout_prob) for _ in range(num_layers)]
    return tf.contrib.rnn.MultiRNNCell(lstm_stack)

## Load the data

In [3]:
# filelist = ['a2002011001-e02.wav']
filelist = ['sine.wav']
music_data = MusicRnnData(filelist)

## Construct model

In [4]:
# input
x = tf.placeholder(tf.float32, [batch_size, x_len])
y = tf.placeholder(tf.float32, [batch_size, y_len])

# RNN Cell
lstm = build_lstm_stack(num_layers, lstm_size, dropout_prob=dropout_prob)
# add dropout here

# output layer
output, states = tf.contrib.rnn.static_rnn(lstm, [x], dtype=tf.float32)
fc_weights = tf.Variable(tf.random_normal([hidden_size, y_len]))
fc_bias = tf.Variable(tf.random_normal([y_len]))

y_ = tf.matmul(output[-1], fc_weights) + fc_bias

## Set cost function and optimizer

In [5]:
# cost = tf.divide(tf.nn.l2_loss(y_ - y), y_len)
cost = tf.reduce_mean(tf.squared_difference(y_, y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

## Set summary variables

In [10]:
dir_save = './Logs'

# define summary for tensorboard
tf.summary.scalar('learning_rate', cost)
summary_merged = tf.summary.merge_all()

## Run training session

In [11]:
# initialize moving averager for loss
if verbose:
    moving_avg = MovingAverager(moving_avg_length)

# create session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# define saver
if os.path.exists(dir_save):
    shutil.rmtree(dir_save)
train_writer = tf.summary.FileWriter(dir_save, sess.graph)
saver = tf.train.Saver()

# and begin
for i in range(num_steps):
    x_batch, y_batch = music_data.batch(x_len, y_len, batch_size)
    _, loss, summary = sess.run([optimizer, cost, summary_merged], feed_dict={x: x_batch, y: y_batch})
    
    train_writer.add_summary(summary, i)

    if verbose:
        moving_avg.insert(loss)
        # print progress
        if (i % display_interval == 0):
            print('step: %d, training loss: %g' % (i, loss))

            if i > moving_avg_length:
                print('moving average loss: %g' % (moving_avg.average()))

step: 0, training loss: 0.999412
step: 500, training loss: 0.074743
moving average loss: 0.100703
step: 1000, training loss: 0.233639
moving average loss: 0.0828682
step: 1500, training loss: 0.0200707
moving average loss: 0.0711575
step: 2000, training loss: 0.0170109
moving average loss: 0.0822004
step: 2500, training loss: 0.00715887
moving average loss: 0.0721346
step: 3000, training loss: 0.0825668
moving average loss: 0.0610811
step: 3500, training loss: 0.000798302
moving average loss: 0.0620597
step: 4000, training loss: 0.101032
moving average loss: 0.0774145
step: 4500, training loss: 0.000439913
moving average loss: 0.0852644
step: 5000, training loss: 0.00661538
moving average loss: 0.0668656
step: 5500, training loss: 0.220353
moving average loss: 0.0542021
step: 6000, training loss: 0.0232836
moving average loss: 0.0703216
step: 6500, training loss: 0.0873569
moving average loss: 0.0733865
step: 7000, training loss: 0.0620765
moving average loss: 0.088769
step: 7500, trai

## Predict on sequence

In [None]:
from __future__ import division

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

display_interval = 1000

original = music_data.convert_to_wav(music_data.tracks[0])
orig_len = original.shape[0]

prediction = original[0:x_len]
num_predictions = int((orig_len-x_len)/y_len)
x_batch = prediction
for i in range(num_predictions):
    feed_pred = np.expand_dims(x_batch, axis=0)
    feed_pred = np.repeat(feed_pred, batch_size, axis=0)
    new_y = sess.run([y_], feed_dict={x: feed_pred, y: y_batch})[0][0,:]
    prediction = np.append(prediction, new_y, axis=0)
    x_batch = np.append(x_batch[y_len:], new_y, axis=0)
    
    if (i % display_interval == 0):
        print('Iteration: %d, len(prediction) = %g' % (i, len(prediction)))
    

## Plot predicted output

In [None]:
converted = music_data.convert_to_wav(prediction)
pred_len = converted.shape[0]
x_orig = np.linspace(0, orig_len/music_data.sample_rate, orig_len)
x_conv = np.linspace(0, pred_len/music_data.sample_rate, pred_len)
plt.subplot(211)
plt.plot(x_orig[0:300], original[0:300])
plt.subplot(212)
plt.plot(x_conv[0:300], converted[0:300])