In [0]:
import os
import json
import argparse

import numpy as np

#from model import build_model, save_weights


DATA_DIR = './data'
LOG_DIR = './logs'

BATCH_SIZE = 16
SEQ_LENGTH = 64


In [0]:
def read_batches(T, vocab_size):
    length = T.shape[0]; #129,665
    batch_chars = int(length / BATCH_SIZE); # 8,104

    for start in range(0, batch_chars - SEQ_LENGTH, SEQ_LENGTH): # (0, 8040, 64)
        X = np.zeros((BATCH_SIZE, SEQ_LENGTH)) # 16X64
        Y = np.zeros((BATCH_SIZE, SEQ_LENGTH, vocab_size)) # 16X64X86
        for batch_idx in range(0, BATCH_SIZE): # (0,16)
            for i in range(0, SEQ_LENGTH): #(0,64)
                X[batch_idx, i] = T[batch_chars * batch_idx + start + i] # 
                Y[batch_idx, i, T[batch_chars * batch_idx + start + i + 1]] = 1
        yield X, Y


In [0]:
import os

from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding


MODEL_DIR = './model'

def save_weights(epoch, model,name):
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)
    model.save_weights(os.path.join(MODEL_DIR, 'weights_{}.{}.h5'.format(name,epoch)))

def load_weights(epoch, model,name):
    model.load_weights(os.path.join(MODEL_DIR, 'weights_{}.{}.h5'.format(name,epoch)))

def build_model(batch_size, seq_len, vocab_size):
    model = Sequential()
    model.add(Embedding(vocab_size, 512, batch_input_shape=(batch_size, seq_len)))
    for i in range(3):
        model.add(LSTM(256, return_sequences=True, stateful=True))
        model.add(Dropout(0.2))

    model.add(TimeDistributed(Dense(vocab_size))) 
    model.add(Activation('softmax'))
    return model


In [0]:
def train(name,text, epochs=100, save_freq=10):

    # character to index and vice-versa mappings
    char_to_idx = { ch: i for (i, ch) in enumerate(sorted(list(set(text)))) }
    print("Number of unique characters: " + str(len(char_to_idx))) #86

    with open('char_to_idx_{}.json'.format(name), 'w') as f:
        json.dump(char_to_idx, f)

    idx_to_char = { i: ch for (ch, i) in char_to_idx.items() }
    vocab_size = len(char_to_idx)

    #model_architecture
    model = build_model(BATCH_SIZE, SEQ_LENGTH, vocab_size)
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


    #Train data generation
    T = np.asarray([char_to_idx[c] for c in text], dtype=np.int32) #convert complete text into numerical indices

    print("Length of text:" + str(T.size)) #129,665

    steps_per_epoch = (len(text) / BATCH_SIZE - 1) / SEQ_LENGTH  

    #log = TrainLogger('training_log.csv')

    for epoch in range(epochs):
        print('\nEpoch {}/{}'.format(epoch + 1, epochs))
        
        losses, accs = [], []

        for i, (X, Y) in enumerate(read_batches(T, vocab_size)):
            
            #print(X);

            loss, acc = model.train_on_batch(X, Y)
            print('Batch {}: loss = {}, acc = {}'.format(i + 1, loss, acc))
            losses.append(loss)
            accs.append(acc)

        #log.add_entry(np.average(losses), np.average(accs))

        if (epoch + 1) % save_freq == 0:
            save_weights(epoch + 1, model,name)
            print('Saved checkpoint to', 'weights_{}.{}.h5'.format(name,epoch + 1))

In [0]:
def file_name(name):
  filename=name
  raw_text= open(filename, 'r', encoding='utf-8').read()
  #comment the below st for abc music
  raw_text= raw_text.lower() 
  print(len(raw_text))
  raw_text=raw_text[0:100000]
  return raw_text

In [45]:
train('eminem_input.txt',file_name('eminem_input.txt'), epochs=100, save_freq=10)

42533
Number of unique characters: 58
Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (16, 64, 512)             29696     
_________________________________________________________________
lstm_43 (LSTM)               (16, 64, 256)             787456    
_________________________________________________________________
dropout_43 (Dropout)         (16, 64, 256)             0         
_________________________________________________________________
lstm_44 (LSTM)               (16, 64, 256)             525312    
_________________________________________________________________
dropout_44 (Dropout)         (16, 64, 256)             0         
_________________________________________________________________
lstm_45 (LSTM)               (16, 64, 256)             525312    
_________________________________________________________________
dropout_45 (Dro

In [46]:
#for text
def build_sample_model(vocab_size):
    model = Sequential()
    model.add(Embedding(vocab_size, 512, batch_input_shape=(1, 1)))
    for i in range(3):
        model.add(LSTM(256, return_sequences=(i != 2), stateful=True))
        model.add(Dropout(0.2))

    model.add(Dense(vocab_size))
    model.add(Activation('softmax'))
    return model
    
def sample( header, num_chars,name):
    with open('char_to_idx_{}.json'.format(name)) as f:
        char_to_idx = json.load(f)
    idx_to_char = { i: ch for (ch, i) in char_to_idx.items() }
    vocab_size = len(char_to_idx)

    model = build_sample_model(vocab_size)
    load_weights(100, model,name)
    model.save(os.path.join(MODEL_DIR, 'model_{}.{}.h5'.format(name,100)))

    sampled = [char_to_idx[c] for c in header]
    print(sampled)
    

    for i in range(num_chars):
        batch = np.zeros((1, 1))
        if sampled:
            batch[0, 0] = sampled[-1]
        else:
            batch[0, 0] = np.random.randint(vocab_size)
        result = model.predict_on_batch(batch).ravel()
        sample = np.random.choice(range(vocab_size), p=result)
        sampled.append(sample)

    return ''.join(idx_to_char[c] for c in sampled)


print(sample( '', 1000,'eminem_input.txt'))


[]
9
feek in the wrong feels right, i'm getin' for you
so escapin' me is ampice
mander asgard

[chorus]
i’m not afraid (i’m not afraid)
to take a stand (to take a stand)
everybody (everybody)
come take my hand (come take my hand)
we’ll walk this road together, through the storm
whatever weather, cold or warm
just lettin' you know that you’re not alone
holla if you feel like you’ve been down the same road

[dintro]
yeah, it's a figa
and i just bought a when i wasn't, then who was there and you better never let it go
you only get one shot, do not miss your chance to blow
this opportunity comes once in a lifetime, yo
you better…

[verse 2]
his soul's escaping the mover, the whole crowd goes so loud
he opens his mouth, but the words won't cook" who show the fuck can him
but it's my hand (come take my hand)
we’ll walk this road together, through the storm
whatever weather, cold or nam!
well, that's all right because i love the way you lie
i love the way you lie

[verse 1: eminem]
i can't te

In [47]:
# for music
print(sample( '', 20000,'TV_Movie_abc_input.txt'))


[]
/8
[=d/4z/8] [b/4z/8] [b/8z/8] [b/8z/8] [^d/8^D,7/8] z/8 [=G,/4G/8] z/8
[b3/4^D,3/4=D/8] z/4 [^D,/4^G,/4^A,/8^D/8] z/4 [^D,/8^G,/8^A,/8] z/8 [^G/8^g/8]
[^D,/8^G/8^g/8] [^G,/8^D,/8] [^A,/8^G,/8] z/8 [=G,/8^D,5/4^D5/4^A5/4] z/8
[^G,3/8^D,/2z3/8] [c/8^d/8] z/4 [^A,/8^D,/8F,/8] [^A,/4=D,/4F,/4] z/8 [^A,/4D,/4F,/4] z/8
^d/8 z/4 [c'/8^D,/4] z/8 [^A,/4^D,/4] z/8 [^a5/8^A,5/8] z/4
[^d/8c/8C/8^D,5/8] z5/8 [^D,/2^G,/2^A,/2z/2] c/4 z/8 [^a5/8z/8]
[c'/2z/8] [^D,/4^G,/4^C,/4] z/8 [^D,/8^G,/8^A,/8] z/8 [^d/8^D,/4F,/4^A,/4^D/4] z/8 [^D,/8^G,/8^A,/8^D/8] z/8 [^D,/8^G,/8^A,/8^D/8] [^A,/4^G,/4^D,/4^D/4]
z/8 [f9/8^A,/4=D,/4F,/4] z/8 [^A,/4D,/4F,/4] z/8
[^a3/8^A,23/8F,3/8D3/8] z/4 [^A,/4D,/4F,/4^A,/4D/8] z/8 [^A,/4D,/4] z/8 ^D/4 z/8 c'/8 [F,/8f/8] z/4 [=A,/8=D,/8] 
X: 1
T: DragonBallZ (3:49)
Z: Transcribed using LotRO MIDI Player: http://lotro.acasylum.com/midi
%  Original file: StarTre/2Nik..4_widBid
%  Transpose: -11
L: 1/4
Q: 120
K: C
z5/4 [D13z11/4] [^F/4B,/4] B,/4 [G/4B,/8] z/8 [^F/4B,/8] z/8 [^F/