In [2]:
import numpy as np
import random
import re
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [3]:
!ls

sample_data  shakespeare_model.h5  shakespeare.txt


In [4]:
text = open("shakespeare.txt", "r").read()

In [5]:
cleaned_text = re.sub(r'\d', '', text.replace('\n', ' ').replace('\t', ' ').replace('  ', ''))

In [6]:
cleaned_text[0:56]

'This is the th Etext file presented by Project Gutenberg'

In [7]:
characters = sorted(set(cleaned_text))

In [8]:
len(characters)

80

In [9]:
char_to_index = {char:index for index, char in enumerate(characters)}

In [10]:
index_to_char = {index:char for index,char in enumerate(characters)}

In [11]:
seq_length = 40

In [12]:
step_size = 5

In [13]:
sentences = []
next_characters = []

In [39]:
for i in range(0,len(cleaned_text)-seq_length, step_size):
  sentences.append(cleaned_text[i:i+seq_length])
  next_characters.append(cleaned_text[i+seq_length])

In [40]:
x = np.zeros((len(sentences),seq_length,len(characters)),dtype = bool)

In [41]:
y = np.zeros((len(sentences),len(characters)),dtype = bool)

In [16]:
for i, sentence in enumerate(sentences):
  for t, character in enumerate(sentence):
    x[i, t, char_to_index[character]] = 1
  y[i,char_to_index[next_characters[i]]] = 1

In [17]:
x.shape

(985343, 40, 80)

In [18]:
y.shape

(985343, 80)

In [19]:
model = Sequential()

In [20]:
model.add(LSTM(128, input_shape = (seq_length, len(characters))))

  super().__init__(**kwargs)


In [21]:
model.add(Dense(len(characters)))

In [22]:
model.add(Activation('softmax'))

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer = RMSprop(learning_rate = 0.01))

In [None]:
model.fit(x,y,batch_size = 256, epochs = 5)

Epoch 1/5
[1m3849/3849[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 200ms/step - loss: 2.1399
Epoch 2/5
[1m3849/3849[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m807s[0m 201ms/step - loss: 1.6184
Epoch 3/5
[1m3849/3849[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m797s[0m 200ms/step - loss: 1.5465
Epoch 4/5
[1m3849/3849[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m803s[0m 200ms/step - loss: 1.5264
Epoch 5/5
[1m3849/3849[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 200ms/step - loss: 1.4957


<keras.src.callbacks.history.History at 0x78393a0f6140>

In [None]:
model.save('shakespeare_model.h5')




In [14]:
model = tf.keras.models.load_model('shakespeare_model.h5')



In [15]:
def sample(preds,temperature = 1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds)/temperature
  exp_preds = np.exp(preds)
  preds = exp_preds/np.sum(exp_preds)
  probas = np.random.multinomial(1,preds,1)
  return np.argmax(probas)

In [20]:
def generate_text(length, temperature):
  start_index = random.randint(0,len(cleaned_text)-seq_length-1)
  generated = ''
  sentence = cleaned_text[start_index:start_index+seq_length]
  generated += sentence
  print(generated)
  for i in range(length):
    x = np.zeros((1,seq_length,len(characters)))
    for t, character in enumerate(sentence):
      x[0,t,char_to_index[character]] = 1
    predictions = model.predict(x,verbose = 0)[0]
    next_index = sample(predictions,temperature)
    next_character = index_to_char[next_index]
    generated += next_character
    sentence = sentence[1:] + next_character
  return generated

In [22]:
print(generate_text(120,0.2))

r lead me, like a firebrand, in the dark
r lead me, like a firebrand, in the dark of the fear. If you say the world and the treather to the present of the tent. If you say thee to the man in his part and the world and bear the heart, And the heart of the world to the truth of the companion of the truth, That the world the house o
