In [41]:
import numpy as np
import re
import pickle
import h5py
from random import sample
import tensorflow as tf
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.layers import Input
from keras.layers import LSTM, Embedding
from tensorflow.keras import layers
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras import backend as K
from feature_funcs import *
from keras.utils import to_categorical

### Prepare Training Data

In [53]:
# Create dictionary
with open("../data/Jigs.txt") as my_file:
    abc_text = my_file.read()

# Cut out unnecessary backslashes
abc_text = re.sub('\\\\+\n', '\n', abc_text)

# Find starting index of the data we care about
start_ind = abc_text.find("X:")
abc_text = abc_text[start_ind:]

# Encode data
num_to_char, char_to_num = create_dictionaries(abc_text)

In [45]:
# Open pickled training data so you don't have to re-run create_training
x_file_pickle = open('../data/x_train_pickle.obj', 'rb')
y_file_pickle = open('../data/y_train_pickle.obj', 'rb')

x_train = pickle.load(x_file_pickle)
y_train = pickle.load(y_file_pickle)

x_file_pickle.close()
y_file_pickle.close()

vocab_length = x_train.shape[2]
vocab_length

88

In [46]:
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)
print(vocab_length)

(90871, 200, 88) (38746, 200, 88)
(90871, 88) (38746, 88)
88


### Build & Compile RNN

Sources: <br />
https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5  <br />
https://github.com/aamini/introtodeeplearning/blob/master/lab1/Part2_Music_Generation.ipynb  <br /> https://medium.com/datadriveninvestor/music-generation-using-deep-learning-85010fb982e2 
<br /> https://keras.io/examples/lstm_text_generation/

In [47]:
model = tf.keras.Sequential()
model.add(layers.LSTM(128, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
model.add(layers.LSTM(256, return_sequences=True))
model.add(layers.LSTM(512))
model.add(layers.Dense(vocab_length, activation='softmax'))

model.summary()


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 200, 128)          111104    
_________________________________________________________________
lstm_10 (LSTM)               (None, 200, 256)          394240    
_________________________________________________________________
lstm_11 (LSTM)               (None, 512)               1574912   
_________________________________________________________________
dense_3 (Dense)              (None, 88)                45144     
Total params: 2,125,400
Trainable params: 2,125,400
Non-trainable params: 0
_________________________________________________________________


In [48]:
# rmsprop: Divide the learning rate for a weight by a running average of the
# magnitues of the recent gradients for that weight
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

### Train RNN
Epoch: When the Neural Network sees all of the training data <br />
Batch: Subset of the data <br />
i.e. If you have 1000 data points, your batch size is 500 and you want 1 epoch, then the NN will do 2 iterations.

In [49]:
# Use checkpoints to save training weights before the model finishes training
# Using this file path, the model checkpoints will be saved with the epoch number 
# and the validation loss in the filename.
weights_filepath = "weights.hdf5"

checkpoint = ModelCheckpoint(
    weights_filepath, monitor="loss", verbose=0,
    save_best_only=True, mode="min")

callbacks_list = [checkpoint]

# Fit model
model.fit(x_train, y_train, epochs=1, batch_size=400, callbacks=callbacks_list)

Train on 90871 samples


<tensorflow.python.keras.callbacks.History at 0xa483b1f50>

In [None]:
# Continue training model
model.load_weights("weights.hdf5")
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

# Fit model
model.fit(x_train, y_train, epochs=3, batch_size=200, callbacks=callbacks_list)

Train on 90871 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

In [50]:
# Open pickled test data so you don't have to re-run create_training
x_file_pickle = open('../data/x_test_pickle.obj', 'rb')
y_file_pickle = open('../data/y_test_pickle.obj', 'rb')

x_test = pickle.load(x_file_pickle)
y_test = pickle.load(y_file_pickle)

x_file_pickle.close()
y_file_pickle.close()

In [51]:
predictions = model.predict(x_test)
predictions

array([[8.49601493e-05, 1.05421723e-05, 5.40875364e-04, ...,
        1.23440474e-01, 7.98791880e-05, 6.57669501e-04],
       [1.06582935e-04, 1.03671200e-05, 5.20085916e-04, ...,
        1.17945693e-01, 9.87423264e-05, 8.57702398e-04],
       [1.13948816e-04, 1.03730554e-05, 3.18021193e-04, ...,
        6.06991574e-02, 1.13425369e-04, 8.91434145e-04],
       ...,
       [1.08138185e-04, 9.71624831e-06, 2.94623896e-04, ...,
        2.74599660e-02, 1.23158650e-04, 6.97774871e-04],
       [9.29513844e-05, 1.06994094e-05, 2.85862305e-04, ...,
        8.73242319e-03, 1.42791905e-04, 4.19296703e-04],
       [8.33730301e-05, 1.04457440e-05, 4.07621934e-04, ...,
        6.61629252e-03, 1.50311811e-04, 3.17616941e-04]], dtype=float32)

In [54]:
text_predictions = decoder(predictions, num_to_char)
print(text_predictions)

ddddddd"eeddd""dldddd""edRRd"""dddRdd"eedddd""eddddddddddddddd""eeddddddddd""eeedddddddddd"eedddddddd"eeddddddddd""eeddddsdd""edddddddddddddd""eeddddddd""eedd"""dddddddddddRdd"eeddddd"dlddddd""eedddd""dddddd"eeddddNNNNNNNeeeeeeeeeeeeeee[[[[[[[[[HHH)rrrreezzzXXXffffffcc&&&&eeeeeeeeeeeeeeee))))eeeeee~~BBBBBB4555NNNNddddd""eeddd""dldddd"eddddddddd""edRdddddd""eedddddddd""eddd"""ddddddd"eedddddddd"eedddddddd""eedddddddd""edddd""dddddd"eedddddddd"eedRddddsddd"eeddddddd"eeddd""dldddd"eeddddddddd"eedddd""dddddd"edddd""ddddddsdd""eeddddddd""eddddddddd"eedddddddddddd"eedddddddd""edddd""ddddddd"eedddddddd""eeddddddddd""eedddddddd"eedddddddd"eedddddddddd"eeddddd"dldddd""eedddddddddd""eeddddd"dldRdd"eeddddddddd"eeddddddddddddd"eedddNNNNNNNNeeeeeeeeeeeeee[[[[[[[[[[[[H)))rrrrezzzzXXXffffffcc&&&&feeeeeeee)))eeBBBBB555NNNNdddddd""ee"dd""dldRdd"eedRd"""dddddd"eeRRd"""dddRdd"edRdd""dldddd"edRRd"""ddddsdd"eeddddddd""eddd""dldddd"eddddRddd"eedddddddd"eedddddsddd"eedddddddd""eddddddd""edddddddd""eddddddd""