## Homework: Deep Jazz

In [1]:
import numpy as np
from music21 import stream, midi, tempo, note 

In [2]:
from preprocess import get_musical_data, get_corpus_data
from grammar import unparse_grammar

from qa import prune_grammar, prune_notes, clean_up_notes
from generator import __sample, __generate_grammar, __predict

import tflearn

import tensorflow as tf
from tensorflow.contrib import rnn

import tqdm
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

  return f(*args, **kwds)
Using TensorFlow backend.


In [3]:
N_epochs = 128  # default
data_fn = 'midi/' + 'original_metheny.mid'  # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [4]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [5]:
chords[0]

[<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [6]:
def get_keras_model(max_len, N_values):
    # build a 2 stacked LSTM
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(N_values))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    return model

## Task

Replace previous model with equivalent in prettytensor or tf.slim

Try to make you code as compact as possible

In [7]:
class DeepJazz(object):
    def __init__(self, max_len, N_values, lstm_size=128, forget_bias=0.2, stddev=1e-3, learning_rate=1e-4, batch_size=32):
        self.max_len = max_len
        self.N_values = N_values
        
        self.X = tf.placeholder(tf.float32, shape=(None, max_len, N_values))
        self.y = tf.placeholder(tf.float32, (None, self.N_values))
        self.lstm_size = lstm_size
        self.forget_bias = forget_bias
        self.stddev = stddev
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self._create_network()
    
    def _create_network(self):
        lstm_cell = rnn.MultiRNNCell((rnn.BasicLSTMCell(self.lstm_size), rnn.BasicLSTMCell(self.lstm_size)))
        outputs, _ = tf.nn.dynamic_rnn(lstm_cell, self.X, dtype=tf.float32)
        lstm_out = tf.transpose(outputs, (1, 0, 2))[~0]
        
        weights = tf.Variable(tf.truncated_normal([self.lstm_size, self.N_values], stddev=self.stddev), trainable=True)
        baises = tf.Variable(tf.truncated_normal(shape=[self.N_values], stddev=self.stddev))
        fc = tf.matmul(lstm_out, weights) + baises        
        self.predict_y = tf.nn.softmax(fc)
        
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=fc, labels=self.y)
        self.loss = tf.reduce_mean(cross_entropy)

    def batch_gen(self, X, y):
        idx = np.random.choice(len(X), size=self.batch_size, replace=False)
        x_batch = X[idx]
        y_batch = y[idx]
        return x_batch, y_batch
    
    def fit(self, X, y, batch_size, epochs, lr=1e-3):
        optimizer = tf.train.RMSPropOptimizer(learning_rate=lr).minimize(self.loss)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in tqdm.tqdm_notebook(range(epochs), desc='Epoch'):
                x_batch, y_batch = self.batch_gen(X, y)
                feed_dict_train = {self.X: x_batch, self.y: y_batch}
                sess.run(optimizer, feed_dict=feed_dict_train)
    
    
    def predict(self, X, verbose):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())         
            feed_dict = {self.X: X}
            y_pred = sess.run(self.predict_y, feed_dict=feed_dict)         
            return y_pred

In [8]:
def get_pretty_tensor_model(max_len, N_values):
    return DeepJazz(max_len, N_values)

## Another way with TFlearn
http://tflearn.org


In [9]:
def get_pretty_tensor_model(max_len, N_values):
    tflearn.init_graph()

    net = tflearn.input_data(shape=[None, max_len, N_values])
    net = tflearn.lstm(net, 128, return_seq=True)
    net = tflearn.dropout(net, 0.2)
    net = tflearn.lstm(net, 128, return_seq=False)
    net = tflearn.dropout(net, 0.2)
    net = tflearn.fully_connected(net, N_values, activation='softmax')
    net = tflearn.regression(net, optimizer='rmsprop', loss='categorical_crossentropy')
    
    model = tflearn.DNN(net)
    model.rpredict = model.predict
    model.predict = lambda x, **kwargs: model.rpredict(x)
    return model

In [19]:
def get_pretty_tensor_model(max_len, N_values):
    tflearn.init_graph(gpu_memory_fraction=0.33)
    
    net = tflearn.input_data(shape=[None, max_len, N_values])
    net = tflearn.lstm(net, 128, dropout=(1.0, 0.8), return_seq=True)
    net = tflearn.lstm(net, 128, dropout=(1.0, 0.8), return_seq=False)
    net = tflearn.fully_connected(net, N_values, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy')
    
    model = tflearn.DNN(net)
    model.rpredict = model.predict
    model.predict = lambda x, **kwargs: model.rpredict(x)
    return model

In [20]:
get_model = get_pretty_tensor_model

In [21]:
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''


def build_model(corpus, val_indices, max_len, N_epochs=128):
    # number of different values or words in corpus
    N_values = len(set(corpus))

    # cut the corpus into semi-redundant sequences of max_len values
    step = 3
    sentences = []
    next_values = []
    for i in range(0, len(corpus) - max_len, step):
        sentences.append(corpus[i: i + max_len])
        next_values.append(corpus[i + max_len])
    print('nb sequences:', len(sentences))

    # transform data into binary matrices
    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
    y = np.zeros((len(sentences), N_values), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, val in enumerate(sentence):
            X[i, t, val_indices[val]] = 1
        y[i, val_indices[next_values[i]]] = 1

    model = get_model(max_len, N_values)
    print(max_len)
    print(N_values)
    print(X.shape)
    model.fit(X, y, batch_size=128, n_epoch=N_epochs)

    return model

In [10]:
# build model
model = build_model(corpus=corpus, val_indices=val_indices,
                         max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
    # get chords from file
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)

    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus,
                                      abstract_grammars=abstract_grammars,
                                      values=values, val_indices=val_indices,
                                      indices_val=indices_val,
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)

    curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)

    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
                                            if isinstance(i, note.Note)])))

    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)

    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
#play = lambda x: midi.realtime.StreamPlayer(x).play()
#play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()

nb sequences: 58
20
78
(58, 20, 78)
---------------------------------
Run id: XE8TKU
Log directory: /tmp/tflearn_logs/
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
---------------------------------
Training samples: 58
Validation samples: 0
--
Training Step: 1  | time: 0.831s
| Adam | epoch: 001 | loss: 0.00000 -- iter: 58/58
--
Training Step: 2  | total loss: [1m[32m3.92147[0m[0m | time: 0.103s
| Adam | epoch: 002 | loss: 3.92147 -- iter: 58/58
--
Training Step: 3  | total loss: [1m[32m4.27428[0m[0m | time: 0.104s
| Adam | epoch: 003 | loss: 4.27428 -- iter: 58/58
--
Training Step: 4  | total loss: [1m[32m4.32995[0m[0m | time: 0.076s
| Adam | epoch: 004 | loss: 4.32995 -- iter: 58/58
--
Training Step: 5  | total loss: [1m[32m4.33837[0m[0m | time: 0.077s
| Adam | epoch: 005 | loss: 4.33837 -- iter: 58/58
--
Training Step: 6  | total loss: [1m[32m4.33531[0m[0m | time: 0.078s
| Adam | epo

Training Step: 63  | total loss: [1m[32m3.46248[0m[0m | time: 0.080s
| Adam | epoch: 063 | loss: 3.46248 -- iter: 58/58
--
Training Step: 64  | total loss: [1m[32m3.46279[0m[0m | time: 0.076s
| Adam | epoch: 064 | loss: 3.46279 -- iter: 58/58
--
Training Step: 65  | total loss: [1m[32m3.46369[0m[0m | time: 0.080s
| Adam | epoch: 065 | loss: 3.46369 -- iter: 58/58
--
Training Step: 66  | total loss: [1m[32m3.46550[0m[0m | time: 0.083s
| Adam | epoch: 066 | loss: 3.46550 -- iter: 58/58
--
Training Step: 67  | total loss: [1m[32m3.46686[0m[0m | time: 0.090s
| Adam | epoch: 067 | loss: 3.46686 -- iter: 58/58
--
Training Step: 68  | total loss: [1m[32m3.46673[0m[0m | time: 0.076s
| Adam | epoch: 068 | loss: 3.46673 -- iter: 58/58
--
Training Step: 69  | total loss: [1m[32m3.46682[0m[0m | time: 0.074s
| Adam | epoch: 069 | loss: 3.46682 -- iter: 58/58
--
Training Step: 70  | total loss: [1m[32m3.46507[0m[0m | time: 0.080s
| Adam | epoch: 070 | loss: 3.46507 --

Training Step: 128  | total loss: [1m[32m3.46176[0m[0m | time: 0.126s
| Adam | epoch: 128 | loss: 3.46176 -- iter: 58/58
--
After pruning: 14 notes
After pruning: 14 notes
After pruning: 12 notes
After pruning: 13 notes
After pruning: 10 notes
After pruning: 12 notes
After pruning: 15 notes
After pruning: 13 notes
After pruning: 14 notes
After pruning: 14 notes
After pruning: 15 notes
After pruning: 12 notes
After pruning: 11 notes
After pruning: 13 notes
After pruning: 15 notes
After pruning: 16 notes
After pruning: 15 notes
After pruning: 14 notes


You can play generated sample using any midi player

Under linux I prefer timidity

In [None]:
!! timidity midi/deepjazz_on_metheny...128_epochs.midi