In [77]:
import pandas as pd
import numpy as np
import os
import json
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Activation, Embedding, Dense, TimeDistributed

In [78]:
data_dir = "../Data1/"
data_file = "music_input.txt"
json_file = "char_index.json"
weights_dir = "../Data1/weights/"
batch_size = 16
seq_len = 64


# Dividing Data into Batches

In [79]:
def batches(all_chars, unique_chars) :
  len = all_chars.shape[0]
  batch_chars = int(len/batch_size)

  for i in range(0,batch_chars-seq_len,64) :
    x = np.zeros((batch_size,seq_len))
    y = np.zeros((batch_size,seq_len,unique_chars))

    for ind in range(0,16) :
      for j in range(0,64) :
        x[ind, j] = all_chars[batch_chars*ind+i+j]
        y[ind, j, all_chars[batch_chars*ind+i+j+1]] = 1

    yield x, y

# Model Building

In [80]:
def model_build(batch_size, seq_len, unique_chars) :
  model = Sequential()

  model.add(Embedding(input_dim = unique_chars, output_dim = 512, batch_input_shape = (batch_size, seq_len)))
  model.add(LSTM(256, return_sequences = True, stateful = True))
  model.add(Dropout(0.2))

  model.add(LSTM(128, return_sequences = True, stateful = True))
  model.add(Dropout(0.2))

  model.add(TimeDistributed(Dense(unique_chars)))
  model.add(Activation('Softmax'))

  return model
  

# Training Model

In [81]:
def train(data, epochs = 90) :
  char_ind = {ch : i for (i,ch) in enumerate(sorted(list(set(data))))}

  with open(os.path.join(data_dir, json_file), mode = "w") as f :
    json.dump(char_ind,f)

  ind_char = {i : ch for (ch,i) in char_ind.items()}
  unique_chars = len(ind_char)

  model = model_build(batch_size, seq_len, unique_chars)
  model.summary()

  model.compile(loss="categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
  
  all_chars = np.asarray([char_ind[i] for i in data], dtype = np.int32 )

  epoch_no, loss, accuracy = [], [], []

  for index in range(epochs) :
    final_epoch_loss, final_epoch_acc = 0, 0
    epoch_no.append(index+1)

    for i, (x,y) in enumerate(batches(all_chars, unique_chars)) :
      final_epoch_loss, final_epoch_acc = model.train_on_batch(x,y)
    
    loss.append(final_epoch_loss)
    accuracy.append(final_epoch_acc)

    if (index+1) % 10 == 0 :
      if not os.path.exists(weights_dir):
        os.makedirs(weights_dir)
      model.save_weights(os.path.join(weights_dir, "Weights_{}.h5".format(index+1)))
      print('Saved weights at epoch {} to file Weights_{}.h5'.format(index+1,index+1))

  log_frame = pd.DataFrame(columns = ["Epoch", "Loss", "Accuracy"])
  log_frame["Epoch"] = epoch_no
  log_frame['Loss'] = loss
  log_frame['Accuracy'] = accuracy
  print(log_frame.head())

  log_frame.to_csv("../Data1/log.csv", index = False)



In [82]:
file = open(os.path.join(data_dir, data_file), mode = 'r')
data = file.read()
file.close()
train(data)

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_16 (Embedding)     (16, 64, 512)             44544     
_________________________________________________________________
lstm_32 (LSTM)               (16, 64, 256)             787456    
_________________________________________________________________
dropout_32 (Dropout)         (16, 64, 256)             0         
_________________________________________________________________
lstm_33 (LSTM)               (16, 64, 128)             197120    
_________________________________________________________________
dropout_33 (Dropout)         (16, 64, 128)             0         
_________________________________________________________________
time_distributed_16 (TimeDis (16, 64, 87)              11223     
_________________________________________________________________
activation_16 (Activation)   (16, 64, 87)            

In [83]:
log = pd.read_csv(os.path.join(data_dir, "log.csv"))
print(log)

    Epoch      Loss  Accuracy
0       1  2.913778  0.245117
1       2  2.268776  0.374023
2       3  1.889107  0.472656
3       4  1.651544  0.528320
4       5  1.534033  0.544922
..    ...       ...       ...
85     86  0.508480  0.830078
86     87  0.476736  0.846680
87     88  0.477651  0.851562
88     89  0.484024  0.847656
89     90  0.464463  0.846680

[90 rows x 3 columns]
