In [1]:
import os 
import json
import argparse
import numpy as np

In [2]:
data_dir = '/content/drive/MyDrive/Colab_Notebooks/Case_Studies/Music_Generation/data'
log_dir = '/content/drive/MyDrive/Colab_Notebooks/Case_Studies/Music_Generation/logs'


In [3]:
batch_size = 16
seq_length = 64

In [11]:
class TrainLogger(object):
  def __init__(self,file):
    self.file = os.path.join(log_dir,file)
    self.epoch =0
    with open(self.file,'w')as f:
      f.write("epoch,loss,acc\n")

  def add_entry(self,loss,acc):
    self.epoch +=1
    s = '{},{},{}\n'.format(self.epoch,loss,acc)
    with open(self.file,'a') as f:
      f.write(s)    

In [19]:
#from numpy.core.fromnumeric import size
def read_batches(T,vocab_size):
  length = T.shape[0] #129665
  batch_chars = int(length/batch_size) #8104

  for start in range(0,batch_chars-seq_length,seq_length):#(0, 8040, 64)

      X = np.zeros((batch_size,seq_length))#16*64
      Y = np.zeros((batch_size,seq_length,vocab_size))# 16X64X86

      for batch_idx in range(0,batch_size):# (0,16)
        for i in range(0,seq_length): #(0,64)
          X[batch_idx,i] = T[batch_chars * batch_idx + start + i] 
          Y[batch_idx, i, T[batch_chars * batch_idx + start + i + 1]] = 1

      yield X,Y   
            





In [6]:
import os 
from keras import Sequential
from keras import layers
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding


model_dir = '/content/drive/MyDrive/Colab_Notebooks/Case_Studies/Music_Generation/model'

def save_weights(epoch,model):
  if not os.path.exists(model_dir):
    os.makedirs(model_dir)
  model.save_weights(os.path.join(model_dir,'weights.{}.h5'.format(epoch)))  

def load_weights(epoch,model):
  model.load_weights(os.path.join(model_dir,'weights.{}.h5'.format(epoch)))

def build_model(batch_size,seq_length,vocab_size):
  model = Sequential()
  model.add(Embedding(vocab_size, 512, batch_input_shape=(batch_size, seq_length)))
  for i in range(3):

    model.add(LSTM(256, return_sequences=True, stateful=True))
    model.add(Dropout(0.2))

  model.add(TimeDistributed(Dense(vocab_size))) 
  model.add(Activation('softmax'))
  return model

if __name__ == '__main__':

  model = build_model(16, 64, 50)
  model.summary()  

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (16, 64, 512)             25600     
                                                                 
 lstm (LSTM)                 (16, 64, 256)             787456    
                                                                 
 dropout (Dropout)           (16, 64, 256)             0         
                                                                 
 lstm_1 (LSTM)               (16, 64, 256)             525312    
                                                                 
 dropout_1 (Dropout)         (16, 64, 256)             0         
                                                                 
 lstm_2 (LSTM)               (16, 64, 256)             525312    
                                                                 
 dropout_2 (Dropout)         (16, 64, 256)             0

In [13]:
def train(text,epochs =100,save_freq=10):
  #character to index and vice_vesa mapping
  char_to_idx = {ch:i for (i,ch) in enumerate(sorted(list(set(text)))) }
  print("Number of unique characters: " + str(len(char_to_idx))) #86

  with open(os.path.join(data_dir,'char_to_idx.json'),'w')as f:
    json.dump(char_to_idx,f)

  idx_to_char = { i: ch for (ch, i) in char_to_idx.items() }
  vocab_size = len(char_to_idx)

  #model_architecture
  model = build_model(batch_size, seq_length, vocab_size)
  model.summary()
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  #Train data generation
  T = np.asarray([char_to_idx[c] for c in text],dtype=np.int32) #convert complete text into numerical indices)
  print("Length of text:" + str(T.size)) #129,665

  steps_per_epoch = (len(text)/batch_size-1)/seq_length

  log = TrainLogger('training_log.csv')

  for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch+1,epochs))

    losses,accs = [],[]

    for i,(X,Y) in enumerate(read_batches(T,vocab_size)):
      print(X)
      #loss,acc = model.train_on_batch(X,Y)
      loss, acc = model.train_on_batch(X, Y)
      print('Batch {}: loss = {}, acc = {}'.format(i + 1, loss, acc))
      losses.append(loss)
      accs.append(acc)

    log.add_entry(np.average(losses), np.average(accs))

    if (epoch + 1) % save_freq == 0:
            save_weights(epoch + 1, model)
            print('Saved checkpoint to', 'weights.{}.h5'.format(epoch + 1))






In [None]:
"""x = input("Enter name of file input.txt \n")
y = int(input("Enter No of epoch default=100\n"))
z = int(input("Enter frequency default=10\n"))
train(open(os.path.join(data_dir, x)).read(), y, z)"""

train("/content/drive/MyDrive/Colab_Notebooks/Case_Studies/Music_Generation/data/input.txt")

In [20]:
if __name__ == '__main__':
    """parser = argparse.ArgumentParser(description='Train the model on some text.')
    parser.add_argument('--input', default='input.txt', help='name of the text file to train from')
    parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train for')
    parser.add_argument('--freq', type=int, default=10, help='checkpoint save frequency')
    args = parser.parse_args()"""

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    
    train(open(os.path.join(data_dir,"input.txt" )).read())


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 [62. 17. 60. ... 29. 60. 84.]
 [62. 17. 61. ... 62.  1. 35.]
 ...
 [84.  3. 32. ... 28. 70.  3.]
 [ 3. 31.  3. ... 58. 77. 58.]
 [60. 28. 84. ... 31. 17. 29.]]
Batch 7: loss = 0.19818806648254395, acc = 0.9248046875
[[28. 84.  3. ...  1.  3. 34.]
 [ 3. 31.  3. ... 29. 60. 84.]
 [78. 71. 77. ... 25. 21. 14.]
 ...
 [28. 18.  1. ... 77. 77. 66.]
 [59. 58. 76. ... 29.  1. 28.]
 [ 1. 60. 17. ...  3. 28. 60.]]
Batch 8: loss = 0.2676050066947937, acc = 0.908203125
[[ 3. 34. 18. ...  3. 34.  3.]
 [ 3. 31.  3. ...  1. 61. 17.]
 [23.  0. 38. ... 32. 70.  3.]
 ...
 [71. 64. 65. ... 30.  3. 58.]
 [34. 33. 84. ... 17. 25. 84.]
 [62.  1. 61. ...  1. 40. 78.]]
Batch 9: loss = 0.21951881051063538, acc = 0.921875
[[64. 17. 64. ...  1. 34. 17.]
 [25. 84.  0. ... 38. 62. 79.]
 [62. 60. 28. ... 62. 61. 60.]
 ...
 [18.  1. 64. ... 33.  3. 58.]
 [ 0. 43. 25. ...  3. 29. 29.]
 [76. 66. 60. ...  3. 61. 17.]]
Batch 10: loss = 0.20435941219329834

In [35]:
import argparse
import os
import json

import numpy as np

#from model import build_model, load_weights

from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding

DATA_DIR = './data'
MODEL_DIR = './model'

def build_sample_model(vocab_size):
    model = Sequential()
    model.add(Embedding(vocab_size, 512, batch_input_shape=(1, 1)))
    for i in range(3):
        model.add(LSTM(256, return_sequences=(i != 2), stateful=True))
        model.add(Dropout(0.2))

    model.add(Dense(vocab_size))
    model.add(Activation('softmax'))
    return model

def sample(epoch, header, num_chars):
    with open(os.path.join(data_dir, 'char_to_idx.json')) as f:
        char_to_idx = json.load(f)
    idx_to_char = { i: ch for (ch, i) in char_to_idx.items() }
    vocab_size = len(char_to_idx)

    model = build_sample_model(vocab_size)
    load_weights(epoch, model)
    model.save(os.path.join(model_dir, 'model.{}.h5'.format(epoch)))

    sampled = [char_to_idx[c] for c in header]
    print(sampled)
    

    for i in range(num_chars):
        batch = np.zeros((1, 1))
        if sampled:
            batch[0, 0] = sampled[-1]
        else:
            batch[0, 0] = np.random.randint(vocab_size)
        result = model.predict_on_batch(batch).ravel()
        sample = np.random.choice(range(vocab_size), p=result)
        sampled.append(sample)

    return ''.join(idx_to_char[c] for c in sampled)

if __name__ == '__main__':
  x = int(input("Enter epoch"))
  z = input("Enter seed value default null")
  y= int(input("Enter no of character to sample default=512"))
  print(sample(x,z,y))



  


Enter epoch100
Enter seed value default null
Enter no of character to sample default=512512
[]
c AGF||


X: 59
T:Jig For Che
% Nottingham Music Database
S:Trad, arr Phil Rowe
M:6/8
K:D
A|"D"dfe d2A|"D"dfe d2A|"D"d2d d2d|"A7"dcc A2G|"D"FGF "A7"EFG|
"D"ABA AFA|"D"dcd f2f|"A"e2c ABc|"Em"d2B "A"cBc|
"Bm"B3 "F#7"F2A|"Bm"dcB "D"A2F|"G"GAB "D"Adf|"A"e3 def|
"G"g2d Bcd|"A"e2c ABc|"D"dcB "A7"A2G|"D"F2E DFG|"D"F2A d2f|"A7"gfe "G"dcB|"D"A3 -A2:|


X: 273
T:Shafhond Salk
% Nottingham Music Database
S:AA, via EF
Y:AB
M:6/8
K:D
P:A
A|"D"dAA FGA|"D"ded cBA|"G"Bcd "A7"edc|"D"d3 d2:|
P:B
g/2f/2|"G"g2g gfe|"D"f2f faf|
