In [1]:
import sys
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from time import time

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

INFO (theano.gof.compilelock): Waiting for existing lock by process '18252' (I am process '23734')
INFO (theano.gof.compilelock): To manually release the lock, delete /home/ec2-user/.theano/compiledir_Linux-4.4-amzn1.x86_64-x86_64-with-glibc2.2.5-x86_64-3.6.1-64/lock_dir
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
from keras.optimizers import Adam

In [3]:
start_time_0 = time()

### Load file

In [4]:
lyrics = open('BSB.txt').read()
lyrics = lyrics.lower()

### Convert char to num

In [5]:
chars = [i for i in sorted(set(lyrics))]

In [6]:
char_num = dict((char, num) for num, char in enumerate(chars))
num_char = dict((num, char) for num, char in enumerate(chars))

In [7]:
lyrics_num = [char_num[i] for i in lyrics]


In [8]:
alphabet = len(char_num)

### create sequence

In [9]:
def chop_to_sequence(seq, lyrics_num):
    """
    chop lyrics_num into segments with length seq
    return list of segments
    """
    lyrics_arr = np.asarray(lyrics_num)
    lyrics_arr = lyrics_arr / float(alphabet)
    segments = []
    next_char = []
    for i in range(0, len(lyrics_num)-seq):
        segment = lyrics_arr[i:i+seq]
        segments.append(segment)
        next_char.append(lyrics_num[i+seq])

    print("segment length:", seq)
    print('number of segments:', len(segments))
    print("chars in lyrics:", len(lyrics))
    print("")
    
    segments = np.reshape(segments, (len(segments),seq,1))
    next_char = np_utils.to_categorical(next_char)
    
    return segments, next_char
    

In [10]:
def print_time(start_time):
    print((time()-start_time)/60)

In [11]:
start_time = time()
X_all, y_all = chop_to_sequence(30, lyrics_num)
print_time(start_time)


segment length: 30
number of segments: 36488
chars in lyrics: 36518

0.0007930159568786621


### LSTM model

In [12]:
def predict_next_n(n):
    """
    predict next n char from random seed
    """
    start = np.random.randint(0, len(X))
    seed = lyrics[start:start+X.shape[1]]
    pattern = X[start]
    
    chars=[]
    for i in range(n):
        pred_num = model.predict_classes(np.reshape(pattern,(1,pattern.shape[0],1)), verbose=False)
        #pred_num = np.argmax(pred_arr)
        pred_char = num_char[pred_num[0]]
        
        chars.append(pred_char)
        pattern = np.append(pattern, pred_num/float(alphabet))
        pattern = pattern[1:]
        #print(pred_arr, pred_num, pred_char, pattern)
    print("Seed:", seed)
    print("Generated:", "".join(chars))
    

### All data

In [13]:
X=X_all
y=y_all

In [14]:
X[0], y[0]

(array([[ 0.61538462],
        [ 0.94871795],
        [ 0.76923077],
        [ 0.53846154],
        [ 0.38461538],
        [ 0.79487179],
        [ 0.        ],
        [ 0.30769231],
        [ 0.30769231],
        [ 0.30769231],
        [ 0.30769231],
        [ 0.        ],
        [ 0.07692308],
        [ 0.53846154],
        [ 0.02564103],
        [ 0.8974359 ],
        [ 0.33333333],
        [ 0.66666667],
        [ 0.66666667],
        [ 0.33333333],
        [ 0.02564103],
        [ 0.35897436],
        [ 0.43589744],
        [ 0.02564103],
        [ 0.8974359 ],
        [ 0.53846154],
        [ 0.82051282],
        [ 0.51282051],
        [ 0.02564103],
        [ 0.94871795]]),
 array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]))

In [27]:
y[1]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.])

### Create Model

In [20]:
model = Sequential()
model.add(LSTM(alphabet, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(alphabet))
model.add(Dropout(0.5))
model.add(Dense(y.shape[1], activation='softmax'))

In [21]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 30, 39)            6396      
_________________________________________________________________
dropout_2 (Dropout)          (None, 30, 39)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 39)                12324     
_________________________________________________________________
dropout_3 (Dropout)          (None, 39)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 39)                1560      
Total params: 20,280.0
Trainable params: 20,280.0
Non-trainable params: 0.0
_________________________________________________________________


In [23]:
adam_lr = Adam(lr=0.001)

model.compile(loss='categorical_crossentropy', optimizer=adam_lr, metrics=['accuracy'])
start_time = time()
model.fit(X, y, epochs=2, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f1e6ff57d68>

### Takes too long! reduce data to BSB lyrics

In [None]:
#time to finish 2000 epochs (in hours)
(20*2000)/(60*60)

In [None]:
model.fit(X, y, epochs=500, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/50

In [29]:
### save model in between
filepath="P2_weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"

checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e71df68d0>

In [None]:
model.fit(X, y, epochs=300, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/50

In [31]:
### save model in between
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e6da6efd0>

In [32]:
model.fit(X, y, epochs=300, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/30

<keras.callbacks.History at 0x7f1e736cd320>

In [33]:
### save model in between
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e71df6d68>

In [34]:
model.fit(X, y, epochs=500, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/50

<keras.callbacks.History at 0x7f1e7504e978>

In [35]:
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e736cdc50>

In [36]:
model.fit(X, y, epochs=300, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/30

<keras.callbacks.History at 0x7f1e6ed57390>

In [37]:
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e6db4b128>

In [38]:
model.fit(X, y, epochs=300, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/30

<keras.callbacks.History at 0x7f1e6db4bb70>

In [39]:
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e6ed2e908>

In [40]:
predict_next_n(100)

Seed:  me...""
but i called my girl 
Generated: tee the tay i wanta be with you what i wanta be wiet you the way i like it 
averybody wou teet me th


In [41]:
model.fit(X, y, epochs=300, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/30

<keras.callbacks.History at 0x7f1e6ff87828>

In [42]:
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e6d4b3630>

In [43]:
predict_next_n(100)

Seed: can run to make it alright
i'l
Generated:  sou you aan toe that i wanta be with you what i wanta be with you what i wanta be with you what i w


In [None]:
model.fit(X, y, epochs=300, batch_size=512, verbose= True, validation_split=0.2)

Train on 29190 samples, validate on 7298 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300

In [None]:
predict_next_n(100)

In [None]:
model.fit(X, y, epochs=300, batch_size=512, verbose= True, validation_split=0.2)

In [None]:
model.fit(X, y, epochs=1, batch_size=512, callbacks = callbacks_list, verbose= True, validation_split=0.2)

In [None]:
predict_next_n(100)

In [None]:
model.evaluate(X,y)



In [None]:
print_time(start_time_0)