## Homework: Deep Jazz

In [1]:
import numpy as np
from music21 import stream, midi, tempo, note

from grammar import unparse_grammar
from preprocess import get_musical_data, get_corpus_data

from qa import prune_grammar, prune_notes, clean_up_notes
from generator import __sample, __generate_grammar, __predict

In [2]:
N_epochs = 128  # default
data_fn = 'midi/' + 'original_metheny.mid'  # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [3]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [4]:
chords[0]

[<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [5]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

def get_keras_model(max_len, N_values):
    # build a 2 stacked LSTM
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(N_values))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    return model

Using TensorFlow backend.


## Task

Replace previous model with equivalent in prettytensor or tf.slim

Try to make you code as compact as possible

In [6]:
class DeepJazz:
    def __init__(max_len, N_values):
        self.max_len = max_len
        self.N_values = N_values
        self.dropout = tf.constant(0.2)
        
        self.x = tf.placeholder(tf.float32, shape=[None, max_len, N_values], name='x')
        self.y = tf.placeholder(tf.float32, [None, self.N_values], name = 'y')

        self.build()
        self.sess = tf.Session()
    
    def build():
        reset_graph()
        
        cell1 = tf.contrib.rnn.BasicLSTMCell(128)
        cell2 = tf.contrib.rnn.BasicLSTMCell(128)

        cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=self.dropout)
            
        init_state = cell.zero_state(batch_size, tf.float32)
        rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, self.x)
      
        logits = tf.contrib.layers.fully_connected(run_outputs, N_values, biases_initializer=tf.constant_initializer(0.0))
        self.predictions = tf.nn.softmax(logits)

        self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.y))
    
    def create_batches(X, y, batch_size, epochs):
        indices = np.array(range(X.shape[0]))
        for i in range(epochs):
            cur_indeces = np.random_choice(indices, batch_size, replace=False)
            yield X[cur_indeces], y[cur_indeces]
    
    def fit(X, y, batch_size, epochs):
        global_step = tf.Variable(initial_value=0, name='global_step', trainable=False)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(self.loss, global_step=global_step)
        
        self.sess.run(tf.global_variables_initializer())
        for x_batch, y_batch in self.create_batches(X, y, batch_size, epochs):
            dict_ = {self.x: x_batch, self.y: y_batch}
            _, loss, num = self.sess.run([optimizer, self.loss, global_step]. dict_)
            if num % 1 == 0:
                print('num: {}; loss: {}'.format(num, loss))
        
    def predict(x, verbose):
        dict_ = {self.x: x}
        return sess.run([self.predictions], dict_)
        

In [7]:
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
import tflearn

def get_pretty_tensor_model(max_len, N_values):
    in_ = input_data(shape=[None, max_len, N_values], name='input')
    layer1 = tflearn.layers.recurrent.lstm(in_, 128, return_seq=True)
    dropout1 = dropout(layer1, 0.2)
    layer2 = tflearn.layers.recurrent.lstm(dropout1, 128)
    dropout2 = dropout(layer2, 0.2)
    fc = fully_connected(dropout2, N_values, activation='softmax')
    
    reg = regression(fc, optimizer='rmsprop', loss='categorical_crossentropy')
    model = tflearn.DNN(reg)
#     model = Sequential()
#     model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
#     model.add(Dropout(0.2))
#     model.add(LSTM(128, return_sequences=False))
#     model.add(Dropout(0.2))
#     model.add(Dense(N_values))
#     model.add(Activation('softmax'))

#     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    model.rpredict = model.predict
    model.predict = lambda x, **kwargs: model.rpredict(x)
    return model

hdf5 is not supported on this machine (please install/reinstall h5py for optimal experience)


In [8]:
get_model = get_pretty_tensor_model #get_keras_model # get_pretty_tensor_model

In [9]:
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''


def build_model(corpus, val_indices, max_len, N_epochs=128):
    # number of different values or words in corpus
    N_values = len(set(corpus))

    # cut the corpus into semi-redundant sequences of max_len values
    step = 3
    sentences = []
    next_values = []
    for i in range(0, len(corpus) - max_len, step):
        sentences.append(corpus[i: i + max_len])
        next_values.append(corpus[i + max_len])
    print('nb sequences:', len(sentences))

    # transform data into binary matrices
    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
    y = np.zeros((len(sentences), N_values), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, val in enumerate(sentence):
            X[i, t, val_indices[val]] = 1
        y[i, val_indices[next_values[i]]] = 1

    model = get_model(max_len, N_values)
    model.fit(X, y, batch_size=128, n_epoch=N_epochs)

    return model


In [10]:
# build model
model = build_model(corpus=corpus, val_indices=val_indices,
                         max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
    # get chords from file
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)

    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus,
                                      abstract_grammars=abstract_grammars,
                                      values=values, val_indices=val_indices,
                                      indices_val=indices_val,
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)

    curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)

    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
                                            if isinstance(i, note.Note)])))

    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)

    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
play = lambda x: midi.realtime.StreamPlayer(x).play()
play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()    

nb sequences: 58
---------------------------------
Run id: P8D9YJ
Log directory: /tmp/tflearn_logs/
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
---------------------------------
Training samples: 58
Validation samples: 0
--
Training Step: 1  | time: 0.452s
| RMSProp | epoch: 001 | loss: 0.00000 -- iter: 58/58
--
Training Step: 2  | total loss: [1m[32m3.92221[0m[0m | time: 0.097s
| RMSProp | epoch: 002 | loss: 3.92221 -- iter: 58/58
--
Training Step: 3  | total loss: [1m[32m4.27740[0m[0m | time: 0.086s
| RMSProp | epoch: 003 | loss: 4.27740 -- iter: 58/58
--
Training Step: 4  | total loss: [1m[32m4.33599[0m[0m | time: 0.093s
| RMSProp | epoch: 004 | loss: 4.33599 -- iter: 58/58
--
Training Step: 5  | total loss: [1m[32m4.35001[0m[0m | time: 0.092s
| RMSProp | epoch: 

Training Step: 60  | total loss: [1m[32m4.34944[0m[0m | time: 0.095s
| RMSProp | epoch: 060 | loss: 4.34944 -- iter: 58/58
--
Training Step: 61  | total loss: [1m[32m4.34900[0m[0m | time: 0.089s
| RMSProp | epoch: 061 | loss: 4.34900 -- iter: 58/58
--
Training Step: 62  | total loss: [1m[32m4.34871[0m[0m | time: 0.090s
| RMSProp | epoch: 062 | loss: 4.34871 -- iter: 58/58
--
Training Step: 63  | total loss: [1m[32m4.34825[0m[0m | time: 0.087s
| RMSProp | epoch: 063 | loss: 4.34825 -- iter: 58/58
--
Training Step: 64  | total loss: [1m[32m4.34791[0m[0m | time: 0.087s
| RMSProp | epoch: 064 | loss: 4.34791 -- iter: 58/58
--
Training Step: 65  | total loss: [1m[32m4.34751[0m[0m | time: 0.088s
| RMSProp | epoch: 065 | loss: 4.34751 -- iter: 58/58
--
Training Step: 66  | total loss: [1m[32m4.34717[0m[0m | time: 0.114s
| RMSProp | epoch: 066 | loss: 4.34717 -- iter: 58/58
--
Training Step: 67  | total loss: [1m[32m4.34657[0m[0m | time: 0.120s
| RMSProp | epoch

Training Step: 123  | total loss: [1m[32m4.23295[0m[0m | time: 0.143s
| RMSProp | epoch: 123 | loss: 4.23295 -- iter: 58/58
--
Training Step: 124  | total loss: [1m[32m4.21211[0m[0m | time: 0.111s
| RMSProp | epoch: 124 | loss: 4.21211 -- iter: 58/58
--
Training Step: 125  | total loss: [1m[32m4.19223[0m[0m | time: 0.100s
| RMSProp | epoch: 125 | loss: 4.19223 -- iter: 58/58
--
Training Step: 126  | total loss: [1m[32m4.16782[0m[0m | time: 0.092s
| RMSProp | epoch: 126 | loss: 4.16782 -- iter: 58/58
--
Training Step: 127  | total loss: [1m[32m4.14222[0m[0m | time: 0.094s
| RMSProp | epoch: 127 | loss: 4.14222 -- iter: 58/58
--
Training Step: 128  | total loss: [1m[32m4.11545[0m[0m | time: 0.137s
| RMSProp | epoch: 128 | loss: 4.11545 -- iter: 58/58
--
After pruning: 16 notes
After pruning: 12 notes
After pruning: 14 notes
After pruning: 13 notes
After pruning: 12 notes
After pruning: 9 notes
After pruning: 15 notes
After pruning: 12 notes
After pruning: 14 notes

You can play generated sample using any midi player

Under linux I prefer timidity

In [11]:
!! timidity midi/deepjazz_on_metheny...128_epochs.midi

['Check URL type=7',
 'Check URL type=2',
 'Check URL type=1',
 'open url (type=1, name=/usr/local/Cellar/timidity/2.14.0/share/timidity/timidity.cfg)',
 'url_file_open(/usr/local/Cellar/timidity/2.14.0/share/timidity/timidity.cfg)',
 'mmap - success. size=4625',
 'Check URL type=7',
 'Check URL type=2',
 'Check URL type=1',
 'open url (type=1, name=~/.timidity.cfg)',
 'url_file_open(~/.timidity.cfg)',
 'Playing midi/deepjazz_on_metheny...128_epochs.midi',
 'MIDI file: midi/deepjazz_on_metheny...128_epochs.midi',
 'Check URL type=7',
 'Check URL type=2',
 'Check URL type=1',
 'open url (type=1, name=midi/deepjazz_on_metheny...128_epochs.midi)',
 'url_file_open(midi/deepjazz_on_metheny...128_epochs.midi)',
 'Check URL type=7',
 'Check URL type=2',
 'Check URL type=1',
 'open url (type=1, name=/usr/local/Cellar/timidity/2.14.0/share/timidity/midi/deepjazz_on_metheny...128_epochs.midi)',
 'url_file_open(/usr/local/Cellar/timidity/2.14.0/share/timidity/midi/deepjazz_on_metheny...128_epochs