In [30]:
import re
import os
import random

import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.optimizers import RMSprop

In [10]:
path = 'data'
files = os.listdir(path)

data = ''

# opening each file, converting data to lowercase and appending to data
# for file in files:
#     if os.path.isfile(os.path.join(path, file)):
#         file_content = open(os.path.join(path, file), 'r', encoding='utf-8').read()
#         data += file_content.lower()

# opening single file
file_content = open('data/drake.txt', 'r', encoding='utf-8').read()
data += file_content

In [12]:
print('Length of corpus:', len(data))
print(data[:200])

Length of corpus: 199038
[Hook]
I've been down so long, it look like up to me
They look up to me
I got fake people showin' fake love to me
Straight up to my face, straight up to my face
I've been down so long, it look like up


In [13]:
print('Number of unique characters before:', len(set(data)))

# Replace all non ascii characters in data with ''
data = re.sub(r'[^\x00-\x7F]', r'', data)
print('Number of unique characters after:', len(set(data)))

Number of unique characters before: 82
Number of unique characters after: 80


In [14]:
chars = sorted(set(data))
print('Total chars:', len(chars))

char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

Total chars: 80


In [16]:
maxlen = 40
step = 3
sentences = []
next_chars = []

for i in range(0, len(data) - maxlen, step):
    sentences.append(data[i: i + maxlen])
    next_chars.append(data[i + maxlen])

print('Number of sequences:', len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype = np.bool)
y = np.zeros((len(sentences), len(chars)), dtype = np.bool)
for i, sent in enumerate(sentences):
    for t, char in enumerate(sent):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 66332


In [22]:
model = Sequential([
    Input(shape = (maxlen, len(chars))),
    LSTM(units = 128, activation = 'relu'),
    Dense(units = len(chars), activation = 'softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               107008    
_________________________________________________________________
dense (Dense)                (None, 80)                10320     
Total params: 117,328
Trainable params: 117,328
Non-trainable params: 0
_________________________________________________________________


In [41]:
optimizer = RMSprop(learning_rate = 1e-3, decay = 1e-5)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [42]:
epochs = 1
batch_size = 128

model.fit(x, y, batch_size = batch_size, epochs = epochs)



<tensorflow.python.keras.callbacks.History at 0x1ed44b1b670>

In [48]:
def sample(preds):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds)
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [49]:
start_index = random.randint(0, len(data) - maxlen - 1)

generated = ""
sentence = data[start_index : start_index + maxlen]
print('Generating with seed: "' + sentence + '"')

for i in range(400):
    x_pred = np.zeros((1, maxlen, len(chars)))
    for t, char in enumerate(sentence):
        x_pred[0, t, char_indices[char]] = 1.0
        
    preds = model.predict(x_pred)[0]

    next_index = sample(preds)
    next_char = indices_char[next_index]
    sentence = sentence[1:] + next_char
    generated += next_char

print(generated)

Generating with seed: "e bills fall
All over your skin
I got mo"
se, up treaty every dight me time
On, I take ain't gettin' it and Cnuls I do raid a bourn Cing,
All it's me pot Bhuse troud yeah the yough the so, that up, it
I amand yeah, I say forttorsing But and it roop
I'm on bright me, all may when you the Mamed my dippin' Mance
Call, think noghtake, get of ah, drigo
Now I ward wlo nigga got, ing
Starty eight like you winke tell a with debe

Cout used brubd 
