In [74]:
import random
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [61]:
data_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'

data_path = tf.keras.utils.get_file("dataset.txt", data_URL)

text = open(data_path, "rb").read().decode("utf-8").lower()

text = text[300000:800000]

In [62]:
vocab = sorted(set(text))

char_to_index = {c: i for i, c in enumerate(vocab)}
index_to_char = {i:c for i, c in enumerate(vocab)}

In [63]:
sequences = []
next_char = []

sequenceSize = 40
stepSize = 3

for i in range(0, len(text)-sequenceSize, stepSize):
    sequences.append(text[ i : i+sequenceSize ])
    next_char.append(text[ i+sequenceSize ])
print(len(sequences))

166654


In [64]:
x = np.zeros((len(sequences), sequenceSize, len(vocab)), dtype="bool")
y = np.zeros((len(sequences), len(vocab)), dtype="bool" )

for i, sequence in enumerate(sequences):
    for t, char in enumerate(sequence):
        x[i, t, char_to_index[char]] = 1
    y[i, char_to_index[next_char[i]]] = 1

In [70]:
model = Sequential({
    LSTM(128, input_shape=(sequenceSize, len(vocab))),
    Dense(len(vocab), activation="softmax")
})

model.compile(loss="categorical_crossentropy", optimizer=RMSprop(learning_rate=0.01))

In [None]:
model.fit(x, y, batch_size=256, epochs=10)

model.save("text_genrator.h5")

In [76]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate_text(length, temp):
    generated = ""
    starting_index = random.randint(0, len(text) - sequenceSize - 1)
    sentence = text[starting_index : starting_index + sequenceSize]
    generated += sentence
    
    for i in range(length):
        x = np.zeros((1, sequenceSize, len(vocab)))
        for t, char in enumerate(sentence):
            x[i, t, char_to_index[i]] = 1
        preds = model.predict(x)[0]
        nextCharIndex = sample(preds)
        next_char = index_to_char[nextCharIndex]
        generated += next_char
        sentence = sentence[1:] + next_char
    return generated
        