In [None]:
# LSTM Ticket to Write
# This notebook trains an LSTM Neural network on an input corpus text file
# Training has been done on 2 corpuses: 
# 1) the complete set of Beatles lyrics and 2) A collection of sheet music of folk tunes in .abc file format
# Once the model is trained it is saved in a .yaml file
# The model is run by providing it a seeding text (read from a file), following which it generates an output
# Due to the structured nature of sheet music, 
# the model trained >2x as fast (100 epochs vs 250) on a corpus 2x larger for sheet music vs lyrics

# Basics - importing standard libraries
# Pls note the ones key to this project are after the hashtag for keras
from __future__ import print_function, division
import pandas as pd 
import numpy as np
import random
from matplotlib import pyplot as plt
%matplotlib inline

# nltk
from nltk.corpus import gutenberg

# keras
np.random.seed(13)
from keras.models import Sequential
from keras.layers import Dense, Embedding, Reshape, Activation, SimpleRNN, GRU, LSTM, Convolution1D, MaxPooling1D, Merge, Dropout
from IPython.display import SVG
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.preprocessing.text import Tokenizer
#import pydot
#from keras.utils.visualize_util import plot,model_to_dot
from keras.utils import plot_model
from keras.datasets import imdb, reuters
from keras.preprocessing import sequence
from keras.optimizers import SGD, RMSprop

# logging for gensim (set to INFO)
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [None]:
! THEANO_FLAGS=floatX=float32,device=gpu0,lib.cnmem=0.8,nvcc.flags=-D_FORCE_INLINES,dnn.enabled=True

In [2]:
import theano

In [None]:
# Check theano config
print(theano.config.floatX)
print(theano.config.device)
print(theano.config.lib.cnmem)
print(theano.config.nvcc)
print(theano.config.dnn)

In [None]:
# Create a Keras Sequential model 
model = Sequential()

# Specify the network architecture
model.add(Dense(output_dim=64, input_dim=100))
model.add(Activation("relu"))
model.add(Dense(output_dim=10))
model.add(Activation("softmax"))

In [None]:
import sys
import re

# Read input file with training corpus
with open('/home/ubuntu/input_text/hc-bd8-pp.abc',encoding="latin-1") as f:
    text = f.read()

print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
# getting indices for characters
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
# maxlen is a parameter to be tuned separately for the Beatles lyrics and .abc format sheet music
# tuned value for Beatles lyrics=32, tuned value for .abc sheet music=64
maxlen = 64
step = 3
sentences = []
next_chars = []
# generate sentences with moving windows
for i in range(0, len(text) - maxlen, step):
    # grabbing 
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        # X : num_sentences x num_words_in_sentence x 'vocab'(chars)
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [None]:
# check the sentence and char arrays

print ("+",sentences[0],"+",next_chars[0])
print ("+",sentences[1],"+",next_chars[1])



In [21]:
def sample(preds, temperature=1.0):
# helper function to sample an index from a probability array
   preds = np.asarray(preds).astype('float64')
   preds = np.log(preds) / temperature
   exp_preds = np.exp(preds)
   preds = exp_preds / np.sum(exp_preds)
   probas = np.random.multinomial(1, preds, 1)
   return np.argmax(probas)


In [None]:
# Run a test model fit with 1 epoch
model.fit(X, y, batch_size=128, nb_epoch=1)

In [None]:
from keras.models import model_from_yaml
# We'll save our model down in a yaml file

# Run the model 
model.fit(X, y, batch_size=128, nb_epoch=100)

# save model as YAML file
model_yaml = model.to_yaml()
with open("/home/ubuntu/64-3-128-E100-run1_model.yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)
# save weights 
model.save_weights("/home/ubuntu/64-3-128-E100-run1_model.h5")
print("Saved 64-3-128-music model run1 to disk")

In [None]:
# Open the file with the seeding text
start_index=1
print ("starting index at", start_index)
with open('/home/ubuntu/input_music/seedfile.abc') as f:
    input_text = f.read()

print("read input text",input_text)

outputfile=('/home/ubuntu/outputmusic-1.abc')
target = open(outputfile, 'w')

# Run model for range of diversity values.
# The diversity dials up the "creativity" of the model output

for diversity in [0.1,0.14,0.2,0.5,0.8]:
    target.write("\n")
    output_text='----- diversity:' + str(diversity)
    target.write(output_text)
    generated = ''
    sentence = input_text[start_index: start_index + maxlen]
   
    generated += sentence
    print('----- Generating with seed: "' + sentence + '"')
    output_text='----- Generating with seed: "' + sentence + '"'
    target.write(output_text)
    print('generated',generated)
    target.write(generated)

    for i in range(maxlen*8):
        x = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x[0, t, char_indices[char]] = 1.

        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

        target.write(next_char)
        target.flush()
    target.write("\n")