In [9]:
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop


In [10]:
 filepath = tf.keras.utils.get_file("shakespeare.txt", "https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt")

In [11]:
text = open(filepath, "rb").read().decode(encoding="utf-8").lower()

In [12]:
text = text[300000:800000]
text



In [13]:
characters = sorted(set(text))
characters

['\n',
 ' ',
 '!',
 '$',
 '&',
 "'",
 ',',
 '-',
 '.',
 '3',
 ':',
 ';',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

***This is what we use to convert text into numerical format!***

In [14]:
char_to_index = dict((c,i) for i,c in enumerate(characters))
index_to_char = dict((i,c) for i,c in enumerate(characters))

Predicting what the next character should be.How many letters are you going to use as feature data?

In [15]:
SEQ_LENGTH = 40
STEP_SIZE = 3
sentences = []
next_characters = []
for i in range(0, len(text)-SEQ_LENGTH, STEP_SIZE):
    sentences.append(text[i:i+SEQ_LENGTH])
    next_characters.append(text[i+SEQ_LENGTH])

print(sentences)
print(len(sentences))
print(next_characters)




166654
['t', 'r', ';', 'n', 'm', 'y', 'o', ' ', ' ', 'b', ' ', 'm', 'a', ' ', 'r', ':', 'n', 't', 'a', 's', 'o', 'o', 't', 'y', 'o', 'e', ' ', 'e', ' ', 'u', 'e', 'i', 'b', 't', ' ', 'y', 'h', ' ', ' ', 't', 'o', 'h', 'w', 'h', '.', 'd', 'b', '\n', 't', 'n', 'n', ' ', 'y', 'o', ';', 'o', 'e', ' ', ' ', ' ', 'm', 't', 'l', 'i', 't', ' ', 'e', ' ', 't', 'h', 'r', 'l', 'c', 's', 't', '\n', ' ', 'a', ' ', 'p', 's', 'e', 'z', 'e', ' ', 'r', 'a', 'h', 'r', 't', 's', 'l', 't', 's', 'i', ' ', 's', 'v', 'h', ' ', ' ', ' ', 'n', ' ', 'r', 'e', '.', 'b', 'k', 'g', 'm', 'w', 'l', 'o', 'k', 'g', 'i', 'a', ' ', 't', 'e', 'p', 'k', 'i', ' ', 'm', '\n', 'e', 'f', '\n', ',', 'y', 'o', ' ', 'r', ' ', 'e', 'f', 'e', 'e', 'a', 'e', '.', 'b', 'k', 'g', 'm', 'h', 't', 'g', ' ', 'd', 'd', 'r', 's', 'h', 'd', 'n', 'r', 'e', ',', 'r', ',', 'o', ' ', 'n', 'h', 'r', ' ', 'd', 'h', 'f', 'r', 'o', 'e', 'a', ',', 'a', 'h', ',', 'n', 'a', ' ', 'a', 'h', 'e', 'i', 'a', 'i', '\n', ' ', 'd', 'h', 'd', 'o', 'u', 'e', 'f

Setting up the matrices so that we have 1 hot encoding for all the sentences of size SEQ_LENGTH x len(characters).

and then we have the actual matrix predictions of what letter comes next for each sentence.

In [18]:
x = np.zeros((len(sentences), SEQ_LENGTH, len(characters)), dtype = np.bool_)
y = np.zeros((len(sentences), len(characters)), dtype = np.bool_)

for i, sentence in enumerate(sentences):
    for t, character in enumerate(sentence):
        x[i,t,char_to_index[character]] = 1
    y[i, char_to_index[next_characters[i]]] = 1



In [20]:
model = Sequential()
model.add(LSTM(128, input_shape= (SEQ_LENGTH , len(characters))))
model.add(Dense(len(characters)))
model.add(Activation('softmax'))

In [23]:
model.compile(loss = "categorical_crossentropy", optimizer = RMSprop(learning_rate=0.01))

model.fit(x,y, batch_size = 256, epochs = 4)

model.save("textgenerator.keras")

Epoch 1/4
[1m651/651[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 156ms/step - loss: 1.4761
Epoch 2/4
[1m651/651[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 176ms/step - loss: 1.4377
Epoch 3/4
[1m651/651[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 170ms/step - loss: 1.4090
Epoch 4/4
[1m651/651[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 157ms/step - loss: 1.3784


In [25]:
model = tf.keras.models.load_model("textgenerator.keras")

In [26]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


In [29]:
def generate_text(length, temperature):
    start_index = random.randint(0,len(text) - SEQ_LENGTH-1)
    generated = ""
    sentence = text[start_index: start_index+SEQ_LENGTH]
    generated += sentence
    for i in range(length):
        x = np.zeros((1, SEQ_LENGTH, len(characters)))
        for t, character in enumerate(sentence):
            x[0, t, char_to_index[character]] = 1
        predictions = model.predict(x, verbose = 0)[0]
        next_index = sample(predictions, temperature)
        next_character = index_to_char[next_index]

        generated += next_character
        sentence = sentence[1:] + next_character
    return generated


In [30]:
print('-------0.2-------')
print(generate_text(300, 0.2))

print('-------0.4-------')
print(generate_text(300, 0.4))

print('-------0.6-------')
print(generate_text(300, 0.6))

print('-------0.8-------')
print(generate_text(300, 0.8))

print('-------1.0-------')
print(generate_text(300, 1.0))


-------0.2-------
t love in death!

capulet:
despised, dissorm the sentent the world with thee
than the breathe than the world than the sentent
that the brother than the best than the trust
that thou shalt be the count the counter soul.

benvolio:
therefore the count up this son the world.

king richard ii:
i can this the love and there than the bosing.

r
-------0.4-------
must disguise ourselves.

autolycus:
where is the brother is a bold them and their son,
and if thou not stand their strengling their sented to thy friend.

king richard ii:
my lord her both gone and ston she soul in death,
and that thou she that thought the field.

lord ross:
that doth my lord hence that her a man and best.

king henry vi
-------0.6-------
ove all true?

ratcliff:
no doubt, my lords, is the band to france:
i send to arming the hold to not stand and theme,
o means than thy ground and untiled with the
grant of this stall what he see look him,
and in this sile no chound up unto angrest.

romeo:
in this