In [3]:
import pandas as pd
import numpy as np
import re
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
import random
import unicodedata

In [4]:


# Load dataset
df = pd.read_csv("/content/Roman-Urdu-Poetry.csv")  # Use the correct file name

def clean_text(text):
    if pd.isna(text):
        return ""

    # Normalize Unicode characters (removes accents and diacritics)
    text = ''.join(c for c in unicodedata.normalize('NFKD', text) if not unicodedata.combining(c))

    # Remove unwanted characters except for basic punctuation
    text = re.sub(r"[^a-zA-Z0-9\s.,?!]", "", text)

    # Remove dots within words (fix ja.ega -> jaega, ro.ega -> roega)
    text = re.sub(r"\.(?=\w)", "", text)

    # Replace multiple spaces and newlines with a single space
    text = re.sub(r"\s+", " ", text).strip()

    # Convert to lowercase
    text = text.lower()

    return text

# Apply cleaning function to poetry column
df["Poetry"] = df["Poetry"].apply(clean_text)


In [5]:
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(df['Poetry'])
sequences = tokenizer.texts_to_sequences(df['Poetry'])
max_sequence_length = 20

input_sequences = []
for seq in sequences:
    for i in range(1, min(len(seq), max_sequence_length)):
        input_sequences.append(seq[:i+1])

input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=len(tokenizer.word_index) + 1)

In [7]:
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_sequence_length - 1),
    Bidirectional(LSTM(128, return_sequences=True)),
    LSTM(128),
    Dense(128, activation='relu'),
    Dense(len(tokenizer.word_index) + 1, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [14]:

model.fit(X, y, epochs=100, verbose=1)
model.save("Poetry_model2.h5")

Epoch 1/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 13ms/step - accuracy: 0.6174 - loss: 1.6716
Epoch 2/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.6256 - loss: 1.6366
Epoch 3/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.6404 - loss: 1.5572
Epoch 4/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 13ms/step - accuracy: 0.6496 - loss: 1.5156
Epoch 5/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.6583 - loss: 1.4895
Epoch 6/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - accuracy: 0.6732 - loss: 1.4055
Epoch 7/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.6823 - loss: 1.3681
Epoch 8/100
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - accuracy: 0.6858 - loss: 1.3369
Epoch 9/100
[1m



In [16]:
import numpy as np
import random
from tensorflow.keras.preprocessing.sequence import pad_sequences

def generate_poem(prompt, num_lines, words_per_line, temperature):
    """
    Generates a poem based on the given prompt.

    Parameters:
    - prompt (str): The initial word or phrase to start the poem.
    - num_lines (int): Number of lines in the generated poem.
    - words_per_line (int): Number of words per line.
    - temperature (float): Controls the randomness of predictions.

    Returns:
    - str: The generated poem.
    """
    poem = []
    current_word = prompt.lower()

    for _ in range(num_lines):
        line = current_word  # Start each line with the prompt word

        for _ in range(words_per_line - 1):
            token_list = tokenizer.texts_to_sequences([line])[0]
            token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')

            predictions = model.predict(token_list, verbose=0)[0]
            predictions = np.log(predictions + 1e-10) / temperature
            exp_preds = np.exp(predictions)
            predictions = exp_preds / np.sum(exp_preds)

            sorted_indices = np.argsort(predictions)[-5:]  # Top 5 words
            possible_words = [tokenizer.index_word.get(idx, None) for idx in sorted_indices if idx in tokenizer.index_word]
            possible_words = [word for word in possible_words if word is not None]

            if possible_words:
                word = random.choices(possible_words, weights=predictions[sorted_indices])[0]
            else:
                break

            line += " " + word
            current_word = word

        poem.append(line.capitalize())

    return "\n".join(poem)

# Get user input
prompt = input("Enter the prompt for the poem: ")
num_lines = int(input("Enter the number of lines: "))
words_per_line = int(input("Enter the number of words per line: "))
temperature = float(input("Enter the temperature (e.g., 0.8 for creativity): "))

# Generate and print the poem
poem = generate_poem(prompt, num_lines, words_per_line, temperature)
print("\nGenerated Poem:\n")
print(poem)

KeyboardInterrupt: Interrupted by user

In [27]:
import gradio as gr
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pandas as pd
import unicodedata
import re

# Load trained model
model = load_model("Poetry_model2.h5")

# Load dataset
df = pd.read_csv("/content/Roman-Urdu-Poetry.csv")

# Text cleaning function
def clean_text(text):
    if pd.isna(text):
        return ""
    text = ''.join(c for c in unicodedata.normalize('NFKD', text) if not unicodedata.combining(c))
    text = re.sub(r"[^a-zA-Z0-9\s.,?!]", "", text)
    text = re.sub(r"\.(?=\w)", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    text = text.lower()
    return text

# Apply cleaning function
df["Poetry"] = df["Poetry"].apply(clean_text)

# Tokenization
max_sequence_length = 20
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(df['Poetry'])
sequences = tokenizer.texts_to_sequences(df['Poetry'])

# Padding sequences
input_sequences = []
for seq in sequences:
    for i in range(1, min(len(seq), max_sequence_length)):
        input_sequences.append(seq[:i+1])

input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')
X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=len(tokenizer.word_index) + 1)

# Poetry generation function
def generate_poem(prompt, num_lines, words_per_line, temperature):
    poem = []
    current_word = prompt.lower()

    for _ in range(num_lines):
        line = current_word
        for _ in range(words_per_line - 1):
            token_list = tokenizer.texts_to_sequences([line])[0]
            token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')

            predictions = model.predict(token_list, verbose=0)[0]
            predictions = np.log(predictions + 1e-10) / temperature
            exp_preds = np.exp(predictions)
            predictions = exp_preds / np.sum(exp_preds)

            sorted_indices = np.argsort(predictions)[-5:]
            possible_words = [tokenizer.index_word.get(idx, None) for idx in sorted_indices if idx in tokenizer.index_word]
            possible_words = [word for word in possible_words if word is not None]

            if possible_words:
                word = random.choices(possible_words, weights=predictions[sorted_indices])[0]
            else:
                break

            line += " " + word
            current_word = word

        poem.append(line.capitalize())

    return "\n".join(poem)

# Gradio UI with custom CSS for full-page view
with gr.Blocks(theme="soft") as iface:
    gr.Markdown(
        """
        <h1 style="text-align: center; font-size: 2rem; margin-top: 20px;">✨ Roman Urdu Poetry Generator ✨</h1>
        <p style="text-align: center; font-size: 1rem; margin-bottom: 30px;">Enter a word or phrase, and let the AI generate poetic lines in Roman Urdu. 🎶</p>
        """
    )

    # Full-page layout with centered content
    with gr.Row(elem_id="main-row", scale=1):
        with gr.Column(elem_id="inputs-column", scale=1):
            prompt = gr.Textbox(label="Enter Prompt", placeholder="e.g., Ishq, Dard, Khwab...", interactive=True, lines=1)
            num_lines = gr.Slider(1, 10, value=4, step=1, label="Number of Lines")
            words_per_line = gr.Slider(3, 10, value=5, step=1, label="Words per Line")
            temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Creativity Level (Temperature)")
            generate_button = gr.Button("Generate Poetry 🎤", variant="primary", size="sm")  # Default small button

        with gr.Column(elem_id="output-column", scale=1):
            output_poem = gr.Textbox(label="Generated Poem", interactive=False, lines=6)

    # Add custom CSS to ensure full-page view and responsiveness
    iface.css = """
    .gradio-container {
        height: 85vh;
        display: flex;
        flex-direction: column;
        justify-content: center;
    }
    #main-row {
        height: 80%;
        display: flex;
        flex-direction: column;
        justify-content: center;
        align-items: center;
    }
    #inputs-column {
        display: flex;
        flex-direction: column;
        justify-content: center;
        align-items: center;
        width: 100%;
    }
    #output-column {
        display: flex;
        flex-direction: column;
        justify-content: center;
        align-items: center;
        width: 100%;
    }
    .gr-button-primary {
        padding-left: 20px;
        padding-right: 20px;
        font-size: 1rem;
    }
    """

    generate_button.click(generate_poem, inputs=[prompt, num_lines, words_per_line, temperature], outputs=output_poem)

# Launch app
iface.launch()




Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7a3be70a2b88d3d2de.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


