In [1]:
import numpy as np
import tensorflow as tf

from MusicRnnData import MusicRnnData

# input parameters
x_len = 50
y_len = 1
batch_size = 64
# LSTM parameters
num_layers = 2
lstm_size = 128
hidden_size = 128
# training parameters
dropout_prob = 0.5
learning_rate = 1e-1
num_steps = 50000
verbose = True
display_interval = 500
moving_avg_length = 100

# fix random seed for reproducibility
np.random.seed(7)

## Helper functions

In [2]:
from collections import deque

class MovingAverager(object):
    def __init__(self, filter_length):
        self.filter = deque([0 for _ in range(filter_length)])
        
    def insert(self, num):
        self.filter.popleft()
        self.filter.append(num)
        
    def average(self):
        return sum(self.filter)/float(len(self.filter))
    
def build_lstm_stack(num_layers, lstm_size, dropout_prob=1.0):
    def lstm_layer(lstm_size, dropout_prob=1.0):
        lstm_layer = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        return tf.contrib.rnn.DropoutWrapper(lstm_layer, output_keep_prob=dropout_prob)
    
    lstm_stack = [lstm_layer(lstm_size, dropout_prob=dropout_prob) for _ in range(num_layers)]
    return tf.contrib.rnn.MultiRNNCell(lstm_stack)

## Load the data

In [3]:
# filelist = ['a2002011001-e02.wav']
filelist = ['sine.wav']
music_data = MusicRnnData(filelist)

## Construct model

In [4]:
# input
x = tf.placeholder(tf.float32, [batch_size, x_len])
y = tf.placeholder(tf.float32, [batch_size, y_len])

# RNN Cell
lstm = build_lstm_stack(num_layers, lstm_size, dropout_prob=dropout_prob)
# add dropout here

# output layer
output, states = tf.contrib.rnn.static_rnn(lstm, [x], dtype=tf.float32)
fc_weights = tf.Variable(tf.random_normal([hidden_size, y_len]))
fc_bias = tf.Variable(tf.random_normal([y_len]))

y_ = tf.matmul(output[-1], fc_weights) + fc_bias

## Set cost function and optimizer

In [5]:
cost = tf.nn.l2_loss(y_ - y)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

## Run training session

In [None]:
moving_avg = MovingAverager(moving_avg_length)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(num_steps):
        x_batch, y_batch = music_data.batch(x_len, y_len, batch_size)
        _, loss = sess.run([optimizer, cost], feed_dict={x: x_batch, y: y_batch})

        # print progress
        if verbose and (i % display_interval == 0):
            print('step: %d, training loss: %g' % (i, loss))
            
            # moving averager
            moving_avg.insert(loss)
            if i > moving_avg_length:
                print('moving average: %g' % (moving_avg.average()))

step: 0, training loss: 0.480391
step: 500, training loss: 0.0847758
moving average: 0.00565167
step: 1000, training loss: 0.0415556
moving average: 0.00606723
step: 1500, training loss: 0.132123
moving average: 0.00738846
step: 2000, training loss: 0.362011
moving average: 0.0110086
step: 2500, training loss: 0.197169
moving average: 0.0129803
step: 3000, training loss: 0.115876
moving average: 0.014139
