In [1]:
import sys
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from time import time

Using TensorFlow backend.


In [2]:
start_time_0 = time()

### Load file

In [3]:
lyrics = open('BSB.txt').read()
lyrics = lyrics.lower()

In [4]:
len(lyrics)

36518

### Convert char to num

In [5]:
chars = [i for i in sorted(set(lyrics))]

In [6]:
char_num = dict((char, num) for num, char in enumerate(chars))
num_char = dict((num, char) for num, char in enumerate(chars))

In [7]:
lyrics_num = [char_num[i] for i in lyrics]


In [8]:
alphabet = len(char_num)

### create sequence

In [9]:
def chop_to_sequence(seq, lyrics_num):
    """
    chop lyrics_num into segments with length seq
    return list of segments
    """
    lyrics_arr = np.asarray(lyrics_num)
    lyrics_arr = lyrics_arr / float(alphabet)
    segments = []
    next_char = []
    for i in range(0, len(lyrics_num)-seq):
        segment = lyrics_arr[i:i+seq]
        segments.append(segment)
        next_char.append(lyrics_num[i+seq])

    print("segment length:", seq)
    print('number of segments:', len(segments))
    print("chars in lyrics:", len(lyrics))
    print("")
    
    segments = np.reshape(segments, (len(segments),seq,1))
    next_char = np_utils.to_categorical(next_char)
    
    return segments, next_char
    

In [10]:
def print_time(start_time):
    print((time()-start_time)/60)

In [11]:
start_time = time()
X_all, y_all = chop_to_sequence(10, lyrics_num)
print_time(start_time)


segment length: 10
number of segments: 36508
chars in lyrics: 36518

0.001930367946624756


### LSTM model

In [12]:
def predict_next_n(n):
    """
    predict next n char from random seed
    """
    start = np.random.randint(0, len(X))
    seed = lyrics[start:start+X.shape[1]]
    pattern = X[start]
    
    chars=[]
    for i in range(n):
        pred_num = model.predict_classes(np.reshape(pattern,(1,pattern.shape[0],1)), verbose=False)
        #pred_num = np.argmax(pred_arr)
        pred_char = num_char[pred_num[0]]
        
        chars.append(pred_char)
        pattern = np.append(pattern, pred_num/float(alphabet))
        pattern = pattern[1:]
        #print(pred_arr, pred_num, pred_char, pattern)
    print("Seed:", seed)
    print("Generated:", "".join(chars))
    

### All data

In [13]:
X=X_all[:1000]
y=y_all[:1000]

In [14]:
model = Sequential()
model.add(LSTM(alphabet, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(alphabet))
model.add(Dropout(0.5))
model.add(Dense(y.shape[1], activation='softmax'))

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10, 39)            6396      
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 39)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 39)                12324     
_________________________________________________________________
dropout_2 (Dropout)          (None, 39)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 39)                1560      
Total params: 20,280.0
Trainable params: 20,280.0
Non-trainable params: 0.0
_________________________________________________________________


In [16]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]


In [17]:
from keras.optimizers import Adam

In [18]:
adam_lr = Adam(lr=0.01)

In [19]:
model.compile(loss='categorical_crossentropy', optimizer=adam_lr, metrics=['accuracy'], )
start_time = time()
model.fit(X, y, epochs=10, batch_size=64, 
          #callbacks= callbacks_list, 
          verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x119cc9a58>

In [20]:
model.compile(loss='categorical_crossentropy', optimizer=adam_lr, metrics=['accuracy'], )
start_time = time()
model.fit(X, y, epochs=600, batch_size=64, 
          #callbacks= callbacks_list, 
          verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 12/600
Epoch 13/600
Epoch 14/600
Epoch 15/600
Epoch 16/600
Epoch 17/600
Epoch 18/600
Epoch 19/600
Epoch 20/600
Epoch 21/600
Epoch 22/600
Epoch 23/600
Epoch 24/600
Epoch 25/600
Epoch 26/600
Epoch 27/600
Epoch 28/600
Epoch 29/600
Epoch 30/600
Epoch 31/600
Epoch 32/600
Epoch 33/600
Epoch 34/600
Epoch 35/600
Epoch 36/600
Epoch 37/600
Epoch 38/600
Epoch 39/600
Epoch 40/600
Epoch 41/600
Epoch 42/600
Epoch 43/600
Epoch 44/600
Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 50/600
Epoch 51/600
Epoch 52/600
Epoch 53/600
Epoch 54/600
Epoch 55/600
Epoch 56/600
Epoch 57/600
Epoch 58/600
Epoch 59/600
Epoch 60/600
Epoch 61/600
Epoch 62/600
Epoch 63/600
Epoch 64/600
Epoch 65/600
Epoch 66/600
Epoch 67/600
Epoch 68/600
Epoch 69/600
Epoch 70/600
Epoch 71/600
Epoch 72/600
Epoch 73/600
Epoch 74/600
E

<keras.callbacks.History at 0x11bb60c50>

In [23]:
model.fit(X, y, epochs=20, batch_size=64, 
          #callbacks= callbacks_list, 
          verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x11b091390>

In [24]:
model.fit(X, y, epochs=1, batch_size=64, callbacks= callbacks_list, verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/1


<keras.callbacks.History at 0x11c6cbba8>

In [25]:
model.fit(X, y, epochs=300, batch_size=64, 
          #callbacks= callbacks_list, 
          verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
E

<keras.callbacks.History at 0x11c6cbb38>

In [30]:
model.fit(X, y, epochs=1, batch_size=64, 
          #callbacks= callbacks_list, 
          verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/1


<keras.callbacks.History at 0x11c6e2208>

In [31]:
predict_next_n(100)

Seed: erybody
ye
Generated: ah
everybody swing
yeah
everybody swing
yeah
everybody swing
yeah
everybody swing
yeah
everybody swi


In [32]:
model.fit(X, y, epochs=300, batch_size=64, 
          #callbacks= callbacks_list, 
          verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
E

<keras.callbacks.History at 0x119cf2438>

In [34]:
model.fit(X, y, epochs=1, batch_size=64, 
          callbacks= callbacks_list, 
          verbose= True, validation_split=0.2)

Train on 800 samples, validate on 200 samples
Epoch 1/1


<keras.callbacks.History at 0x11c6e2160>

In [None]:
print_time(start_time)

In [37]:
predict_next_n(100)

Seed:  you baby)
Generated: 
i wanna be with you

and i'll be waiting
until you face the truth
whan
t wisn you 
and i'll be wait


In [54]:
predict_next_n(1000)

Seed: ght is fading
you know what i 
Generated: wanna do

i wanna be with you
it's crazy but it's true
(you know it's true)
and everything i do
i wanna be with you

i'd like to know your policy
ihl i monn  ndwit  you 
thee be eeee
yo tou inye iou iewe ever to sei leena you
you yeu saue you it's erun you it's taaere
yo 
thei the thanh
in
so i segna yoiiio ihai i w iamen th whet t t di watr you
know what's wauh 
you 
and tre thu iths 
n's wanna you

ane whe true

nder toe than shin s saueueatg ind i iiliang yoath toat's mamenn that i can kie to whe te t's erany you
yntn i's lanna be with you

and w'wr yo
you

and i'likenbgyhwhaadi i'lign
iosr toen s me
ief seeth yoat saat tre you ieee
youe se i w era be wiaaa yo t's laeth you 
acd w waat  tha er wa 
ah fne yeu bnoa the tiue (ad  l'wiee oo fn wi t eeery but tt what s tageues yaai aail foe to she that'seseneos
s d lig nn th whth you ith irar so ses femza yea co i wlinng wou fo i wanna be with you

and i'll be waiting
until you face the tru

In [38]:
### Human-input seed:

def predict_next(seed, n):
    """
    predict next n char from human input seed
    """
    #seed = lyrics[start:start+X.shape[1]]
    seed = seed.lower()
    seed_num = [char_num[i] for i in seed]
    seq = X[0].shape[0]
    
    if len(seed) < seq:
        print("Error: ")
    else:
        seed_arr = chop_to_sequence(seq=seq, lyrics_num=seed_num)
        pattern = seed_arr[0][0]
        
        chars=[]
        for i in range(n):
            pred_num = model.predict_classes(np.reshape(pattern,(1,pattern.shape[0],1)), verbose=False)
            #pred_num = np.argmax(pred_arr)
            pred_char = num_char[pred_num[0]]

            chars.append(pred_char)
            pattern = np.append(pattern, pred_num/float(alphabet))
            pattern = pattern[1:]
            #print(pred_arr, pred_num, pred_char, pattern)
        print("Seed:", seed)
        print("Generated:", "".join(chars))


In [39]:
seed = """iWe've got it goin' on for years
Jam on 'cause Backstreets' got it
Come on now, everybody 
We've got it goin' on for years """


In [42]:
predict_next(seed, 30)

segment length: 10
number of segments: 113
chars in lyrics: 36518

Seed: iwe've got it goin' on for years
jam on 'cause backstreets' got it
come on now, everybody 
we've got it goin' on for years 
Generated: e
wou bace the truth
whan
t wi


In [43]:
seed = """
Am I original? (Yeah)
Am I the only one? (Yeah)
Am I sexual? (Yeah)
Am I everything you need?
You'd better rock your body now
"""

In [45]:
predict_next(seed, 100)

segment length: 10
number of segments: 117
chars in lyrics: 36518

Seed: 
am i original? (yeah)
am i the only one? (yeah)
am i sexual? (yeah)
am i everything you need?
you'd better rock your body now

Generated:  bnd sht me free
so that i wanna be with you

and i'll be waiting
until you face the truth
whan
t wi
