In [None]:
import pandas as pd
import numpy as np
import re
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split

# Load Dataset
df = pd.read_csv("Roman-Urdu-Poetry.csv", usecols=["Poetry"])

# Clean Text Data
def clean_poetry_text(text):
    text = text.lower()  # Convert text to lowercase
    text = re.sub(r'[^a-zA-ZāčēğīñōūṣṭẓḳḌ -]', '', text)  # Remove unwanted characters
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

df["Poetry"] = df["Poetry"].apply(clean_poetry_text)

# Tokenizer Setup
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df["Poetry"].tolist())

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(df["Poetry"].tolist())

# Create input-output sequences
input_sequences = []
for seq in sequences:
    for i in range(1, len(seq)):
        input_sequences.append(seq[:i + 1])

# Padding Sequences
max_sequence_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='post')

# Split into X and Y
X, Y = input_sequences[:, :-1], input_sequences[:, -1]

# Train-Test Split
X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.2, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

# LSTM Model Definition
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 100

model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=X_train.shape[1]),
    LSTM(150, return_sequences=True), #long term dependencies 
    LSTM(150),
    Dense(150, activation="relu"),  
    Dense(vocab_size, activation="softmax")  #Predicts the next word using Softmax over vocabulary.
])

model.compile(loss="sparse_categorical_crossentropy", optimizer=Adam(learning_rate=0.001), metrics=["accuracy"])

# Callbacks (disable EarlyStopping for all epochs to train fully)
checkpoint = ModelCheckpoint("poetry_lstm_model.h5", save_best_only=True, monitor="val_loss")

# Train the Model for 50 epochs
history = model.fit(X_train, Y_train, epochs=50, batch_size=128, validation_data=(X_val, Y_val), callbacks=[checkpoint])

# Save the Model
model.save("lstm_poetry_model.h5")

# Generate Poetry Function
def generate_poetry(seed_text, next_words=50, temperature=0.8):
    generated_text = seed_text.lower()
    for _ in range(next_words):
        tokenized_input = tokenizer.texts_to_sequences([generated_text])
        tokenized_input = pad_sequences(tokenized_input, maxlen=max_sequence_length, padding='post')

        predicted_probs = model.predict(tokenized_input, verbose=0)[0]

        # Apply temperature scaling
        predicted_probs = np.log(predicted_probs + 1e-8) / temperature
        predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))  # Normalize

        # Sample a word based on the probabilities
        predicted_word_index = np.random.choice(len(predicted_probs), p=predicted_probs)
        predicted_word = tokenizer.index_word.get(predicted_word_index, "")

        if not predicted_word:
            break

        generated_text += " " + predicted_word

    return generated_text

# Example Usage
seed_text = "muj se pehli se mohabbat"
generated_poetry = generate_poetry(seed_text)
print("📝 Generated Poetry:\n", generated_poetry)




Epoch 1/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - accuracy: 0.0539 - loss: 6.9861



[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 112ms/step - accuracy: 0.0539 - loss: 6.9859 - val_accuracy: 0.0736 - val_loss: 6.5051
Epoch 2/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.0753 - loss: 6.3865



[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 111ms/step - accuracy: 0.0754 - loss: 6.3864 - val_accuracy: 0.0810 - val_loss: 6.4333
Epoch 3/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.0883 - loss: 6.1958



[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 110ms/step - accuracy: 0.0883 - loss: 6.1958 - val_accuracy: 0.0953 - val_loss: 6.3887
Epoch 4/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.0995 - loss: 6.0382



[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 110ms/step - accuracy: 0.0995 - loss: 6.0382 - val_accuracy: 0.0991 - val_loss: 6.3878
Epoch 5/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 111ms/step - accuracy: 0.1075 - loss: 5.9236 - val_accuracy: 0.1042 - val_loss: 6.3920
Epoch 6/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 111ms/step - accuracy: 0.1147 - loss: 5.7884 - val_accuracy: 0.1067 - val_loss: 6.4155
Epoch 7/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 111ms/step - accuracy: 0.1179 - loss: 5.6926 - val_accuracy: 0.1100 - val_loss: 6.4426
Epoch 8/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 111ms/step - accuracy: 0.1228 - loss: 5.5877 - val_accuracy: 0.1107 - val_loss: 6.4895
Epoch 9/50
[1m1124/1124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 110ms/step - accuracy: 0.1268 - loss: 5.4704 - val_accuracy: 0.1126 - val_loss: 6.5480
Epo



📝 Generated Poetry:
 muj se pehli se mohabbat churā kar chale gayā bastiyo ab shahr e ruḳhsat lagā rakkhā hai miyāñ tire kūchoñ meñ mahbūb e dīdār kar diyā aur kyā raushan ham par jalva rau sahte jā rahā huuñ maiñ kaun thā vo batlāo ki mujh ko chhupā ke luut liyā duhāī hai tire kaash yaañ ki


In [8]:
seed_text = "awal ki dosti hai"
generated_poetry = generate_poetry(seed_text)
print("📝 Generated Poetry:\n", generated_poetry)

📝 Generated Poetry:
 awal ki dosti hai hāl e aadā kahūñ aur jo dushvār thā patthar ko to sabāt ho yahī kyā moāmla kyā ho ki zindagī ki vo chilman bhī hai lekin aap aaī maiñ aur bhī kyā mire iḳhtiyār pe pahrā chāhiye us meñ baḳhshish e vafā se kyā kiije usī kī ḳhair ab bhī
