<a href="https://colab.research.google.com/github/KhatijaBegum27/AI-ML-Projects/blob/main/TextPredictionLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow



In [2]:
!pip install gradio



In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import gradio as gr

In [4]:
!unzip '/content/ptb.train.txt.zip'

Archive:  /content/ptb.train.txt.zip
  inflating: ptb.train.txt           


In [5]:
with open('/content/ptb.train.txt', 'r') as f:
    text = f.read()

In [8]:
df = text[:len(text)//3]

In [9]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df)
char_index = tokenizer.word_index
index_char = tokenizer.index_word
vocab_size = len(char_index) + 1

In [10]:
sequences = tokenizer.texts_to_sequences(df)

In [11]:
sequences = [item for sublist in sequences for item in sublist]

In [12]:
seq_length = 10
X, y = [], []
for i in range(len(sequences) - seq_length):
    X.append(sequences[i:i + seq_length])
    y.append(sequences[i + seq_length])

X = np.array(X)
y = to_categorical(y, num_classes=vocab_size)

In [13]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=50, input_length=seq_length),
    LSTM(150, return_sequences=False),
    Dense(vocab_size, activation='softmax')#Activation tanh
])



In [14]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
model.fit(X, y, epochs=6, batch_size=128)

Epoch 1/6
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 6ms/step - accuracy: 0.3111 - loss: 2.3155
Epoch 2/6
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 6ms/step - accuracy: 0.4819 - loss: 1.7527
Epoch 3/6
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 6ms/step - accuracy: 0.5119 - loss: 1.6442
Epoch 4/6
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 6ms/step - accuracy: 0.5261 - loss: 1.5925
Epoch 5/6
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 6ms/step - accuracy: 0.5349 - loss: 1.5599
Epoch 6/6
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 6ms/step - accuracy: 0.5414 - loss: 1.5348


<keras.src.callbacks.history.History at 0x7ccd2c0eb350>

In [16]:
# Continue training
print("Continuing training for more epochs...")
model.fit(X, y, epochs=4, batch_size=128)

Continuing training for more epochs...
Epoch 1/4
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 6ms/step - accuracy: 0.5451 - loss: 1.5201
Epoch 2/4
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 6ms/step - accuracy: 0.5485 - loss: 1.5085
Epoch 3/4
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6ms/step - accuracy: 0.5520 - loss: 1.4978
Epoch 4/4
[1m10456/10456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6ms/step - accuracy: 0.5535 - loss: 1.4888


<keras.src.callbacks.history.History at 0x7ccd2bf056d0>

In [17]:
model.summary()

In [18]:
def generate_text(seed_text, num_chars):
    for _ in range(num_chars):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=seq_length, truncating='pre')
        predicted_probs = model.predict(token_list, verbose=0)
        predicted_char_index = np.argmax(predicted_probs)
        predicted_char = index_char[predicted_char_index]
        seed_text += predicted_char
    return seed_text

In [19]:
def gradio_generate(seed_text, num_chars):
    return generate_text(seed_text, int(num_chars))

In [22]:
interface = gr.Interface(
    fn=gradio_generate,
    inputs=[
        gr.Textbox(label="Seed Text", placeholder="Enter seed text, e.g., 'Once upon a time'"),
        gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Number of Characters")
    ],
    outputs=gr.Textbox(label="Generated Text"),
    title="LSTM-Based Text Generator",
    description="Enter a seed text and specify the number of characters to generate new text using a GRU-based neural network."
)

In [23]:
interface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2c6ad894734aa5e867.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


