In [1]:
import os
import json
from keras.models import Sequential
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding
import numpy as np
import pandas as pd
from matplotlib import pyplot

Using TensorFlow backend.


In [8]:
# Initialize paths for data input and weights output
data_dir = "../Data/"
data_file = "Nottingham_Jigs_Hornpipes.txt"
save_weights_dir = '../Trained_Weights/Weights_Model_final/'
log_dir = "../Data/log.csv"
charToIndex_json = "char_to_index.json"

transfer_weights_path = "../Trained_Weights/Weights_Model1/Weights_70.h5"
# Parameters
BATCH_SIZE = 16
SEQ_LENGTH = 64

In [9]:
# Function used to create the batches
def get_batches(chars, unique_chars):
    char_no = chars.shape[0] # number of characters in the data
    batch_chars = int(char_no / BATCH_SIZE)
    
    # outer loop iterates every time a new batch is created
    for start in range(0, batch_chars - SEQ_LENGTH, SEQ_LENGTH):
        # number of batches wil be char_no/(BATCH_SIZE * SEQ_LENGTH)
        X = np.zeros((BATCH_SIZE, SEQ_LENGTH))  
        Y = np.zeros((BATCH_SIZE, SEQ_LENGTH, unique_chars))
        # iterates over rows in a batch
        for batch_row in range(0, BATCH_SIZE):             # iterates over columns in a batch
            for i in range(0, SEQ_LENGTH):  #it denotes each column in a batch. Each column represents each character means 
                #each time-step character in a sequence.
                X[batch_row, i] = chars[batch_row * batch_chars + start + i]
                Y[batch_row, i, chars[batch_row * batch_chars + start + i + 1]] = 1 
                    # by 1 we mark that the next character in the sequence is the correct one
        yield X, Y

SyntaxError: invalid syntax (<ipython-input-9-e5ca90ff46e1>, line 12)

In [None]:
# added 2 more LSTM layers
# loading previously computed weights - transfer learning
def build_model(batch_size, seq_length, unique_chars):
    model = Sequential()
    
    # inputs have to be the same length which is achieved when creating batches
    # input dimension will be the number of unique characters in the training data
    # output-dimention needs more validation - 8?
    model.add(Embedding(input_dim = unique_chars, output_dim = 512, batch_input_shape = (batch_size, seq_length), name = "embd_1")) 
    
    model.add(LSTM(256, return_sequences = True, stateful = True, name = "lstm_first"))
    model.add(Dropout(0.2, name = "drp_1"))
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(TimeDistributed(Dense(unique_chars)))
    model.add(Activation("softmax"))
    
    model.load_weights(transfer_weights_path, by_name = True)
    
    return model

In [13]:
def train_model(data, epochs = 70):
    
    # Mapping all unique characters to an index
    char_to_index = {char: in for (in, char) in enumerate(sorted(list(set(data))))}
    print("Unique characters in the training data = {}".format(len(char_to_index)))  
    # Saved the mapping in a json file
    with open(os.path.join(data_dir, charToIndex_json), mode = "w") as f:
        json.dump(char_to_index, f)
        
    index_to_char = {i: ch for (ch, i) in char_to_index.items()}
    unique_chars = len(char_to_index)
    
    # Build the model
    model = build_model(BATCH_SIZE, SEQ_LENGTH, unique_chars)
    model.summary()
    # multi-class classification problem - using Categorical Cross entropy as loss function
    model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
    
    characters = np.asarray([char_to_index[c] for c in data], dtype = np.int32)
    print("Total number of characters = "+str(characters.shape[0])) #155222
    
    epoch_number, loss, accuracy = [], [], []
    
    for epoch in range(epochs):
        print("Epoch {}/{}".format(epoch+1, epochs))
        final_epoch_loss, final_epoch_accuracy = 0, 0
        epoch_number.append(epoch+1)
        
        for i, (x, y) in enumerate(get_batches(characters, unique_chars)):
            final_epoch_loss, final_epoch_accuracy = model.train_on_batch(x, y) #check documentation of train_on_batch here: https://keras.io/models/sequential/
            print("Batch: {}, Loss: {}, Accuracy: {}".format(i+1, final_epoch_loss, final_epoch_accuracy))
            #here, above we are reading the batches one-by-one and train our model on each batch one-by-one.
        loss.append(final_epoch_loss)
        accuracy.append(final_epoch_accuracy)
        
        #saving weights after every 10 epochs
        if (epoch + 1) % 10 == 0:
            if not os.path.exists(save_weights_dir):
                os.makedirs(save_weights_dir)
            model.save_weights(os.path.join(save_weights_dir, "Weights_{}.h5".format(epoch+1)))
            print('Saved Weights at epoch {} to file Weights_{}.h5'.format(epoch+1, epoch+1))
    
    #creating dataframe and record all the losses and accuracies at each epoch
    log_frame = pd.DataFrame(columns = ["Epoch", "Loss", "Accuracy"])
    log_frame["Epoch"] = epoch_number
    log_frame["Loss"] = loss
    log_frame["Accuracy"] = accuracy
    log_frame.to_csv(log_dir, index = False)
    
    # Accuracy Plot
    pyplot.plot(accuracy, epoch_number)
    pyplot.show()

In [None]:
file = open(os.path.join(data_dir, data_file), mode = 'r')
data = file.read()
file.close()
if __name__ == "__main__":
    training_model(data)

Number of unique characters in our whole tunes database = 87
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (16, 64, 512)             44544     
_________________________________________________________________
lstm_5 (LSTM)                (16, 64, 256)             787456    
_________________________________________________________________
dropout_5 (Dropout)          (16, 64, 256)             0         
_________________________________________________________________
lstm_6 (LSTM)                (16, 64, 128)             197120    
_________________________________________________________________
dropout_6 (Dropout)          (16, 64, 128)             0         
_________________________________________________________________
time_distributed_3 (TimeDist (16, 64, 87)              11223     
_________________________________________________________________
activation_3 (A

In [15]:
log = pd.read_csv(log_dir)
log

Unnamed: 0,Epoch,Loss,Accuracy
0,1,2.669647,0.284180
1,2,1.846643,0.505859
2,3,1.550365,0.552734
3,4,1.423734,0.578125
4,5,1.316762,0.601562
5,6,1.283447,0.610352
6,7,1.196602,0.627930
7,8,1.147893,0.645508
8,9,1.083460,0.652344
9,10,1.070172,0.669922
