In [2]:
!pip install gradio
!pip install tensorflow

Collecting gradio
  Downloading gradio-5.16.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.28.1 (from gradio)
  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Co

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.regularizers import l2
import pickle

# Load dataset
df = pd.read_csv('Roman-Urdu-Poetry.csv')
poetry = df['Poetry'].dropna().astype(str).tolist()

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(poetry)
total_words = len(tokenizer.word_index) + 1

# Create input sequences
input_sequences = []
for line in poetry:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        input_sequences.append(token_list[:i+1])

# Padding sequences
max_sequence_length = max(len(seq) for seq in input_sequences)
X = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')
y = np.array([seq[-1] for seq in input_sequences])

# Build Model
model = Sequential([
    Embedding(total_words, 100, input_length=X.shape[1]),  # Increased embedding dimension to 100
    LSTM(64, return_sequences=False, dropout=0.3),  # Increased LSTM units and dropout rate
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),  # Changed Dense output to 256 units and added regularization
    Dense(total_words, activation='softmax', kernel_regularizer=l2(0.001))  # Softmax output layer
])

# Compile Model
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.0005), metrics=['accuracy'])  # Reduced learning rate

# Train Model without callbacks (removing ModelCheckpoint and EarlyStopping)
history = model.fit(X, y, epochs=20, batch_size=32, verbose=1)

# Plot Loss & Accuracy
plt.plot(history.history['loss'], label='Loss')
plt.plot(history.history['accuracy'], label='Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Value')
plt.legend()
plt.show()

# Save Model & Tokenizer
model.save('poet.h5')  # Save the final model
with open('poet.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

print("Model and tokenizer saved successfully!")


In [8]:
import numpy as np
import gradio as gr
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

# Load the saved model and tokenizer
model = load_model('/content/poet.h5')  # Load the saved model
with open('/content/poet.pkl', 'rb') as f:
    tokenizer = pickle.load(f)  # Load the saved tokenizer

# Set the maximum sequence length (should match the training data)
max_sequence_length = 20  # Replace with the actual max sequence length used during training

# Function to generate poetry
def generate_poetry(seed_text, next_words=50, temperature=0.7):
    for _ in range(next_words):
        # Convert the seed text to a sequence of tokens
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        # Pad the sequence
        token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
        # Predict the next word (returns probabilities for each word in the vocabulary)
        predicted_probs = model.predict(token_list, verbose=0)[0]
        # Apply temperature sampling
        predicted_probs = np.log(predicted_probs) / temperature
        predicted_probs = np.exp(predicted_probs)
        predicted_probs = predicted_probs / np.sum(predicted_probs)
        # Sample a word index based on the probabilities
        predicted_index = np.random.choice(len(predicted_probs), p=predicted_probs)
        # Convert the index to the corresponding word
        predicted_word = tokenizer.index_word.get(predicted_index, '')
        # Append the predicted word to the seed text
        seed_text += " " + predicted_word
    return seed_text

# Gradio Interface function
def poetry_generator(seed_text: str, next_words: int, temperature: float):
    return generate_poetry(seed_text, next_words, temperature)

# Create the Gradio app with updated syntax
with gr.Blocks() as iface:
    gr.Markdown("# Roman Urdu Poetry Generator")
    gr.Markdown("Generate Roman Urdu poetry using an LSTM model. Enter a seed word and adjust the settings!")

    # Input components
    seed_input = gr.Textbox(label="Seed Text", placeholder="Enter a seed word (e.g., 'dil')")
    words_input = gr.Slider(minimum=10, maximum=100, value=50, label="Number of Words to Generate")
    temperature_input = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature (Creativity)")

    # Output component
    output_text = gr.Textbox(label="Generated Poetry")

    # Create the button and set the function
    generate_btn = gr.Button("Generate Poetry")
    generate_btn.click(poetry_generator, inputs=[seed_input, words_input, temperature_input], outputs=output_text)

# Launch the Gradio app
iface.launch()




Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1232d7d76737e113d8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


