In [1]:
# 📦 Step 1: Install Dependencies
# !pip install keras tensorflow

In [2]:
# pip install streamlit


In [15]:
# 🧹 Step 2: Import Libraries
import streamlit as st

import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense


In [16]:
# # 📄 Step 3: Sample Corpus (replace this with your own dataset!)
# corpus = [
#     "I am going to the store",
#     "I am going to the park",
#     "He is reading a book",
#     "She is playing in the garden",
#     "They are going for a walk"
# ]


# Load and clean your dataset
with open("stories.txt.utf-8", "r", encoding="utf-8") as file:
    text = file.read().lower()

# Optional: Split into lines or sentences
corpus = text.split("\n")  # one line per training example

# Remove empty lines
corpus = [line.strip() for line in corpus if line.strip()]

In [17]:
# 🧼 Step 4: Tokenization and Sequence Creation
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# Create input sequences
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# Split input and label
X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)

In [21]:
# 🧠 Step 5: Define the LSTM Model
model = Sequential()
model.add(Embedding(total_words, 50, input_length=max_sequence_len - 1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

None


In [None]:
# 🏋️ Step 6: Train the Model
model.fit(X, y, epochs=200, verbose=1)

Epoch 1/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 19ms/step - accuracy: 0.0750 - loss: 7.1041
Epoch 2/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.0979 - loss: 6.3461
Epoch 3/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.1014 - loss: 6.2096
Epoch 4/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.1167 - loss: 5.9443
Epoch 5/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.1274 - loss: 5.7847
Epoch 6/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.1433 - loss: 5.6206
Epoch 7/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.1447 - loss: 5.4690
Epoch 8/200
[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.1485 - loss: 5.3463
Epoch 9/200
[1m

<keras.src.callbacks.history.History at 0xffffae11c1a0>

In [19]:
# 🔮 Step 7: Prediction Function
def predict_next_word(seed_text):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
    predicted = model.predict(token_list, verbose=0)
    predicted_word_index = np.argmax(predicted)
    for word, index in tokenizer.word_index.items():
        if index == predicted_word_index:
            return word
    return ""



In [9]:
# # 🎯 Step 8: Try it!
# seed_text = "I am going"
# next_word = predict_next_word(seed_text, max_sequence_len)
# print(f"{seed_text} → {next_word}")

In [20]:
# Streamlit UI
st.title("Ramayan AI")
user_input = st.text_input("Enter your phrase:", "What is The Ramayana about?") 

if st.button("Predict"):
    next_word = predict_next_word(user_input)
    st.success(f"Next word prediction: **{next_word}**")

