In [46]:
import json
import os
import numpy as np
import pandas as pd
from keras.layers import Activation, TimeDistributed, LSTM, Dropout, Dense, Embedding
from keras.models import Sequential

In [53]:
data_dir = "../Data2/"
data_file = "music_input.txt"
json_file = "char_index.json"
weights_dir = "../Data2/weights"
batch_size = 16
seq_len = 64

# Diving Data into Batches

In [48]:
def batches(all_chars, unique_chars) :
    
    len = all_chars.shape[0]
    batches = int(len/batch_size)
    
    for i in range(0, batches-seq_len, seq_len) :
        
        x = np.zeros((batch_size, seq_len))
        y = np.zeros((batch_size, seq_len, unique_chars))
        
        for ind in range(0,batch_size) :
            for j in range(0,seq_len) :
                x[ind, j] = all_chars[ind*batches+i+j]
                y[ind, j, all_chars[ind*batches+i+j+1]] = 1
                
        yield x, y
    

# Model Building

In [49]:
def model_build(batch_size, seq_len, unique_chars) :
    model = Sequential()
    
    model.add(Embedding(input_dim = unique_chars, output_dim = 512, batch_input_shape = (batch_size, seq_len), name = "embd_1"))
    
    model.add(LSTM(256, return_sequences = True, stateful = True, name = "lstm_first"))
    model.add(Dropout(0.2, name = "drp_1"))
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(TimeDistributed(Dense(unique_chars)))
    model.add(Activation("softmax"))
    
    model.load_weights("../Data1/weights/Weights_80.h5", by_name = True)
    
    return model

# Training Model

In [50]:
def train(data, epochs = 100) :
    
    char_ind = {ch : i for (i,ch) in enumerate(sorted(list(set(data))))}
    
    with open(os.path.join(data_dir, json_file), mode = "w") as f :
        json.dump(char_ind, f)
        
    ind_char = {i : ch for (ch,i) in char_ind.items()}
    unique_chars = len(char_ind)
    
    model = model_build(batch_size, seq_len, unique_chars)
    model.summary()
    model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
    
    all_chars = np.asarray([char_ind[i] for i in data], dtype = np.int32)
    
    epoch_no, loss, accuracy = [], [], []
    
    for index in range(epochs) :
        fina_loss, final_accuracy = 0, 0
        epoch_no.append(index+1)
        
        for i, (x,y) in enumerate(batches(all_chars, unique_chars)) :
            final_loss, final_accuracy = model.train_on_batch(x,y)
        loss.append(final_loss)
        accuracy.append(final_accuracy)
    
        if (index+1)%10 == 0 :
            if not os.path.exists(weights_dir):
                os.makedirs(weights_dir)
            model.save_weights(os.path.join(weights_dir, "Weights_{}.h5".format(index+1)))
            print('Saved Weights at epoch {} to file Weights_{}.h5'.format(index+1, index+1))
            
    log_frame = pd.DataFrame(columns = ["Epoch", "Loss", "Accuracy"])
    log_frame["Epoch"] = epoch_no
    log_frame["Loss"] = loss
    log_frame["Accuracy"] = accuracy
    log_frame.to_csv("../Data2/log.csv", index = False)
            

In [51]:
file = open(os.path.join(data_dir, data_file), mode = "r")
data = file.read()
file.close()
train(data)

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embd_1 (Embedding)           (16, 64, 512)             44544     
_________________________________________________________________
lstm_first (LSTM)            (16, 64, 256)             787456    
_________________________________________________________________
drp_1 (Dropout)              (16, 64, 256)             0         
_________________________________________________________________
lstm_12 (LSTM)               (16, 64, 256)             525312    
_________________________________________________________________
dropout_12 (Dropout)         (16, 64, 256)             0         
_________________________________________________________________
lstm_13 (LSTM)               (16, 64, 256)             525312    
_________________________________________________________________
dropout_13 (Dropout)         (16, 64, 256)            

In [52]:
log = pd.read_csv(os.path.join(data_dir, "log.csv"))
log

Unnamed: 0,Epoch,Loss,Accuracy
0,1,2.471921,0.320312
1,2,1.887096,0.472656
2,3,1.622462,0.528320
3,4,1.449580,0.561523
4,5,1.323585,0.586914
...,...,...,...
95,96,0.234081,0.920898
96,97,0.251157,0.918945
97,98,0.283458,0.906250
98,99,0.255545,0.924805
