<a href="https://colab.research.google.com/github/13-ghantarohith/ATM-Interface/blob/main/LSTM_Text_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import string

In [4]:
from google.colab import files
uploaded = files.upload()

Saving shakespeare.txt to shakespeare (1).txt


In [5]:
with open('shakespeare.txt', 'r', encoding='utf-8') as f:
  text = f.read()
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))

In [8]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])


vocab_size = len(tokenizer.word_index) + 1


sequences = []
for line in text.split(' '):
  token_list = tokenizer.texts_to_sequences([line])[0]
  for i in range(1, len(token_list)):
    sequences.append(token_list[:i+1])


max_seq_len = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_seq_len, padding='pre')


X = sequences[:, :-1]
y = sequences[:, -1]

In [10]:
model = Sequential([
Embedding(vocab_size, 100, input_length=max_seq_len - 1),
LSTM(150, return_sequences=True),
Dropout(0.2),
LSTM(100),
Dense(vocab_size, activation='softmax')
])


model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)


model.summary()




In [11]:
early_stop = EarlyStopping(monitor='loss', patience=3)


history = model.fit(
X, y,
epochs=20,
batch_size=128,
callbacks=[early_stop]
)

Epoch 1/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 15ms/step - accuracy: 0.0607 - loss: 6.6032
Epoch 2/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.0655 - loss: 5.8128
Epoch 3/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.0702 - loss: 5.7388
Epoch 4/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.0729 - loss: 5.7138
Epoch 5/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.0758 - loss: 5.6677
Epoch 6/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.0778 - loss: 5.6247
Epoch 7/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 15ms/step - accuracy: 0.0781 - loss: 5.6075
Epoch 8/20
[1m871/871[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.0812 - loss: 5.5751
Epoch 9/20
[1m871/871[

In [12]:
def generate_text(seed_text, next_words=30):
    for _ in range(next_words):
      token_list = tokenizer.texts_to_sequences([seed_text])[0]
      token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
      predicted = np.argmax(model.predict(token_list, verbose=0))
      output_word = ''
      for word, index in tokenizer.word_index.items():
        if index == predicted:
          output_word = word
          break
      seed_text += ' ' + output_word
    return seed_text

In [13]:
print(generate_text("to be or not to be", 25))
print(generate_text("love looks not with the eyes", 25))

to be or not to be asmath porter o the show show show charity abated asmath and i i show show asmath porter asmath and the recantation show show asmath asmath
love looks not with the eyes o show show show show asmath asmath yellowness i i i i asmath porter asmath and the recantation show show asmath asmath behaviors that i
