In [1]:
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop


In [2]:
filepath = tf.keras.utils.get_file('shakespeare.txt',
        'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
text = open(filepath, 'rb').read().decode(encoding='utf-8').lower()

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
# convert text to unique numerical represenataion

text = text[300000:800000]
characters = sorted(set(text))

char_to_index = dict((c, i) for i, c in enumerate(characters))
index_to_char = dict((i, c) for i, c in enumerate(characters))

seq_length = 40
step_length = 3

#create mapping
sentences = []
next_chars = []


for i in range(0, len(text) - seq_length, step_length):
    sentences.append(text[i: i + seq_length])
    next_chars.append(text[i + seq_length])



In [4]:
x = np.zeros((len(sentences), seq_length, len(characters)), dtype=bool) # Use bool instead of np.bool
y = np.zeros((len(sentences),len(characters)), dtype=bool) # Use bool instead of np.bool


for i, sentence in enumerate(sentences):
    for t, character in enumerate(sentence):
        x[i, t, char_to_index[character]] = 1
        y[i, char_to_index[next_chars[i]]] = 1


# Define the model
model = Sequential()
model.add(LSTM(128, input_shape=(seq_length, len(characters))))
model.add(Dense(len(characters)))
model.add(Activation('softmax'))

# Compile the model
optimizer = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

# Train the model (this may take a while)
model.fit(x, y, batch_size=128, epochs=5)

# Save the model
model.save('textgenerator.model')

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate_text(length, temperature=1.0):
    start_index = random.randint(0, len(text) - seq_length - 1)
    generated_text= ''
    generated_text = text[start_index: start_index + seq_length]
    generated_text += sentence
    for i in range(length):
        x = np.zeros((1, seq_length, len(characters)))
        for t, character in enumerate(generated_text):
          x[0, t, char_to_index[character]] = 1

        predictions = model.predict(x, verbose=0)[0]
        next_index = sample(predictions, temperature)
        next_char = index_to_char[next_index]

        generated_text += next_char
        sentence = sentence[1:] + next_char
    return generated_text

    print('--------0.2--------')
    print(generated_text(300,0.2))
    print('--------0.4--------')
    print(generated_text(300,0.4))
    print('--------0.6--------')
    print(generated_text(300,0.6))
    print('--------0.8--------')
    print(generated_text(300,0.8))
    print('--------1--------')
    print(generated_text(300,1))



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
