In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import random

# Step 1: Load and preprocess the dataset
def load_text_data():
    # Sample Shakespeare text dataset
    text = """
    Shall I compare thee to a summer's day?
    Thou art more lovely and more temperate:
    Rough winds do shake the darling buds of May,
    And summer's lease hath all too short a date:
    Sometime too hot the eye of heaven shines,
    And often is his gold complexion dimm'd;
    And every fair from fair sometime declines,
    By chance, or nature's changing course, untrimm'd;
    But thy eternal summer shall not fade,
    Nor lose possession of that fair thou owest;
    Nor shall Death brag thou wanderest in his shade,
    When in eternal lines to time thou growest:
    So long as men can breathe, or eyes can see,
    So long lives this, and this gives life to thee.
    """
    return text


In [6]:
text = load_text_data().lower()

In [7]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])

In [8]:
vocab_size = len(tokenizer.word_index) +1

In [9]:
sequence_length = 5
input_sequences = []
output_words = []

In [10]:
# Convert text to sequences of tokens
sequences = tokenizer.texts_to_sequences([text])[0]

In [11]:
for i in range(len(sequences) - sequence_length):
    input_sequences.append(sequences[i:i + sequence_length])  # Input sequence
    output_words.append(sequences[i + sequence_length])  # Target word

input_sequences = np.array(input_sequences)
output_words = np.array(output_words)

In [12]:
# Step 3: Build the LSTM model
model = Sequential([
    Embedding(vocab_size, 50, input_length=sequence_length),  # Embedding layer
    LSTM(100, return_sequences=False),  # LSTM layer with 100 units
    Dense(vocab_size, activation="softmax")  # Output layer
])



In [13]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

In [14]:
# Step 4: Train the model
print("Training the model...")
history = model.fit(input_sequences, output_words, epochs=100, batch_size=32, verbose=1)


Training the model...
Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - accuracy: 0.0058 - loss: 4.4317   
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.0616 - loss: 4.4247    
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.1373 - loss: 4.4198
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.0940 - loss: 4.4142
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.0961 - loss: 4.4064
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.1320 - loss: 4.3977
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.1070 - loss: 4.3888
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.0721 - loss: 4.3808
Epoch 9/100
[1m4/4[0m [3

In [15]:
# Step 5: Generate text
def generate_text(seed_text, num_words_to_generate):
    for _ in range(num_words_to_generate):
        # Tokenize seed text
        tokenized_sequence = tokenizer.texts_to_sequences([seed_text])[0]
        tokenized_sequence = pad_sequences([tokenized_sequence], maxlen=sequence_length, padding="pre")

        # Predict next word
        predicted_word_index = np.argmax(model.predict(tokenized_sequence), axis=-1)[0]
        predicted_word = tokenizer.index_word[predicted_word_index]

        # Append predicted word to seed text
        seed_text += " " + predicted_word
    return seed_text

# Test text generation
seed_text = "shall i compare thee"
generated_text = generate_text(seed_text, num_words_to_generate=10)
print("Generated Text: ", generated_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
Generated Text:  shall i compare thee wanderest a summer's day thou death more lovely and in


In [16]:
import pickle

# Load the model from the file
with open("model.pkl", "wb") as file:
    loaded_model = pickle.dump(model, file)
