In [8]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import pandas as pd
import numpy as np
import re  
from tensorflow.keras.preprocessing.text import Tokenizer

In [2]:
df = pd.read_csv('/kaggle/input/jughygt/Roman-Urdu-Poetry.csv')
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r"[^a-zA-Zñḳḍāī\s]", "", text)  # Keep letters, diacritics, apostrophes
    text = re.sub(r'(\n)(\S)', r'\1 \2', text)
    return text
df['Poetry'] = df['Poetry'].apply(clean_text)

In [3]:
tokenizer = Tokenizer(num_words=5000, filters='')
tokenizer.fit_on_texts(df['Poetry'])
sequences = tokenizer.texts_to_sequences(df['Poetry'])

In [5]:
max_sequence_len = max([len(seq) for seq in sequences])  
max_sequence_len = min(max_sequence_len, 225) 
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_len, padding='pre')
K.clear_session()

In [9]:
input_sequences = []
output_words = []

for seq in padded_sequences:
    for i in range(1, len(seq)):
        input_sequences.append(seq[:i])
        output_words.append(seq[i])

input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
output_words = np.array(output_words)
total_words = len(tokenizer.word_index) + 1

In [11]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    model = Sequential()
    model.add(Embedding(input_dim=total_words, output_dim=256, input_shape=(max_sequence_len-1,)))
    model.add(LSTM(256))
    model.add(Dense(total_words, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])



In [13]:
history = model.fit(input_sequences, output_words, epochs=60, batch_size=128)


Epoch 1/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 45ms/step - accuracy: 0.5347 - loss: 3.2456
Epoch 2/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 45ms/step - accuracy: 0.5561 - loss: 2.9081
Epoch 3/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 46ms/step - accuracy: 0.5683 - loss: 2.7441
Epoch 4/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 45ms/step - accuracy: 0.5782 - loss: 2.5935
Epoch 5/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 45ms/step - accuracy: 0.5888 - loss: 2.4336
Epoch 6/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 45ms/step - accuracy: 0.5974 - loss: 2.3003
Epoch 7/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 45ms/step - accuracy: 0.6108 - loss: 2.1419
Epoch 8/60
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 45ms/step - accuracy: 0.6253 - loss: 2.0038


In [14]:
model.save('poetry_model.h5')

In [18]:
from tensorflow.keras.models import load_model

model = load_model('poetry_model.h5')


In [27]:
def generate_poem(seed_text, next_words, max_sequence_len, temperature=1.0):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')

        predictions = model.predict(token_list, verbose=0)[0]
        
        # Apply temperature scaling
        predictions = np.log(predictions + 1e-10) / temperature
        exp_preds = np.exp(predictions)
        predictions = exp_preds / np.sum(exp_preds)

        # Sample the next word using the adjusted probabilities
        predicted_word_index = np.random.choice(len(predictions), p=predictions)
        predicted_word = tokenizer.index_word.get(predicted_word_index, '')

        if predicted_word:  
            seed_text += " " + predicted_word  # Append predicted word

    return seed_text

# Example usage
seed_text = input("Enter Seed Text: ")
words = int(input("Enter number of words to generate: "))
temperature = float(input("Enter temperature (default 1.0, lower = predictable, higher = creative): "))

generated_poem = generate_poem(seed_text, words, max_sequence_len=max_sequence_len, temperature=temperature)
print("\nGenerated Poem:\n", generated_poem)


Enter Seed Text:  jo ho ik baar vo har baar ho aisā nahīñ hotā
Enter number of words to generate:  80
Enter temperature (default 1.0, lower = predictable, higher = creative):  0.8



Generated Poem:
 jo ho ik baar vo har baar ho aisā nahīñ hotā 
 hamesha ek hī se pyaar ho aisā nahīñ hotā 
 har ik kashtī kā apnā tajraba hotā hai dariyā meñ 
 magar charāh āñkhoñ se zarrī hai 
 na kisī baat pe na jā aaj ye hulām hotā 
 tumhāre dushman koī allāh kā hamgusār nahīñ hotā 
 jo biit gae haiñ vo ahd to phir na kabhī hotā na mujh ko 
 dikhāī na de kisī ko na jaañ t ne ye haal 
 na vo samjhe apne
