In [7]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.utils import np_utils

In [8]:
rhcp_songs = pd.read_excel('rhcp_songs.xlsx')
rhcp_songs.head(10)

Unnamed: 0,Album,Song,Lyrics
0,I'm With You,Monarchy of Roses,The crimson tide is flowing through your finge...
1,I'm With You,Factory of Faith,"All my life I was swinging for the fence,\nI w..."
2,I'm With You,Brendan's Death Song,If I die before I get it done\nWill you decide...
3,I'm With You,Ethiopa,"We're rolling everybody, it starts with bass""\..."
4,I'm With You,Annie Wants A Baby,"Lucy Rebar, she's a friend of mine\nLater she'..."
5,I'm With You,Look Around,"Stiff club, it's my nature,\nCustom love is th..."
6,I'm With You,The Adventures of Raindance Maggie,Lipstick junkie\nDebunked the all in one\nShe ...
7,I'm With You,Did I Let You Know,m comin' for you 'cause I adore you and\nI'd l...
8,I'm With You,Goodbye Hooray,"Wooh, wooh, ha\nJunior paints that old cafe\nH..."
9,I'm With You,Happiness Loves Company,Stop marching 'cause you think you shot to num...


In [9]:
raw_text = rhcp_songs['Lyrics'].iloc[0]

In [10]:
all_text = ""
for i in range(len(rhcp_songs)):
    all_text = all_text + rhcp_songs['Lyrics'].iloc[i]

In [11]:
all_text = all_text.lower()
chars = sorted(list(set(all_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [12]:
n_chars = len(all_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  98370
Total Vocab:  47


In [13]:
seq_length = 100
dataX = []
dataY = []
for n in range(len(rhcp_songs)):
    raw_text = rhcp_songs['Lyrics'].iloc[n].lower()
    n_chars = len(raw_text)
    for i in range(0, n_chars - seq_length, 1):
        seq_in = raw_text[i:i + seq_length]
        seq_out = raw_text[i + seq_length]
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print('Number of total patterns', n_patterns)

Number of total patterns 91270


In [14]:
from keras.utils import to_categorical
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

In [15]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='loss',
                              min_delta=0,
                              patience=3,
                              verbose=0, mode='auto')
callbacks_list = [checkpoint, early_stopping]

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [None]:
model.fit(X, y, epochs=5, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20

In [58]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [64]:
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_word[value] for value in pattern]), "\"")
# generate characters
for i in range(100):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    print(index)
    result = int_to_word[index]
    seq_in = [int_to_word[value] for value in pattern]
    print(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" toldyoui'ddoitallagainthisisto "
2243
to
2243
to
2243
to
2243
to
2243
to
1042
i
1042
i
1042
i
1042
i
1042
i
1042
i
1042
i
1042
i
1042
i
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let's
1199
let'