In [None]:
import pandas as pd
import numpy as np
from keras.utils import np_utils
import re
import sys 
from keras.models import Sequential
from keras.layers import LSTM, Activation, Flatten, Dropout, Dense, Embedding, TimeDistributed, LSTM
from keras.callbacks import LambdaCallback, ModelCheckpoint
import tensorflow as tf
from keras.optimizers import RMSprop, Adam
import random
import os
!pip install pyarrow==2.0.0

Collecting pyarrow==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/d7/e1/27958a70848f8f7089bff8d6ebe42519daf01f976d28b481e1bfd52c8097/pyarrow-2.0.0-cp36-cp36m-manylinux2014_x86_64.whl (17.7MB)
[K     |████████████████████████████████| 17.7MB 208kB/s 
Installing collected packages: pyarrow
  Found existing installation: pyarrow 0.14.1
    Uninstalling pyarrow-0.14.1:
      Successfully uninstalled pyarrow-0.14.1
Successfully installed pyarrow-2.0.0


In [None]:
import requests
import io
data = pd.read_feather('https://github.com/Scytheface/NARV/raw/main/lyrics/lyrics.ft')

In [None]:
NoneType = type(None) 
text = ""
for index, row in data.iterrows():
  if isinstance(row['lyrics'], NoneType):
    continue
  else:
    text += row['lyrics'].lower()

In [None]:
chars = sorted(list(set(text)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [None]:
maxlen = 50
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of Sequences:', len(sentences))

Number of Sequences: 594117


In [None]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [None]:
model = Sequential()
model.add(LSTM(64, input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64, input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64, input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64, input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64, input_shape=(maxlen, len(chars))))
model.add(Dropout(0.2))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.001, momentum=0.1)

In [None]:
#weights_file = '/content/lyrics-weights.hdf5'
#model.load_weights(weights_file)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 50, 64)            51200     
_________________________________________________________________
dropout_5 (Dropout)          (None, 50, 64)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 50, 64)            33024     
_________________________________________________________________
dropout_6 (Dropout)          (None, 50, 64)            0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 50, 64)            33024     
_________________________________________________________________
dropout_7 (Dropout)          (None, 50, 64)            0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 50, 64)           

In [None]:
filepath = "lyrics-weights.hdf5"
checkpoint = ModelCheckpoint(filepath, 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='min')

In [None]:
def sample(preds, diversity):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / diversity
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
model.fit(x, y,
          batch_size=64,
          epochs=25,
          validation_split = 0.04,
          verbose=1,
          callbacks=[checkpoint])

Epoch 1/25

In [None]:
weights_file = '/content/lyrics-weights.hdf5'
model.load_weights(weights_file)
model.compile(loss = 'categorical_crossentropy', optimizer = 'rmsprop')

In [None]:
start_index = random.randint(0, len(text) - maxlen - 1)
generated = ''
sentence = text[start_index: start_index + maxlen]
generated += sentence
print('***** Generating with: "' + sentence + '"')
sys.stdout.write(generated)

for i in range(500):
    x_pred = np.zeros((1, maxlen, len(chars)))
    for t, char in enumerate(sentence):
        x_pred[0, t, char_indices[char]] = 1.

    preds = model.predict(x_pred, verbose=0)[0]
    next_index = sample(preds, 0.5)
    next_char = indices_char[next_index]

    generated += next_char
    sentence = sentence[1:] + next_char

    sys.stdout.write(next_char)
    sys.stdout.flush()