In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

# Load the dataset
df = pd.read_csv("urdu_poetry.csv")  # Ensure the CSV is in the working directory

# Remove label rows and blank rows
def is_valid_row(text):
    if isinstance(text, str):
        text = text.strip()
        return text and not text.startswith("###")
    return False

poetry_lines = df["poetry"][df["poetry"].apply(is_valid_row)].tolist()
text_corpus = "\n".join(poetry_lines)

print(f"Total characters in dataset: {len(text_corpus)}")

# Create character-level vocabulary
chars = sorted(set(text_corpus))
vocab_size = len(chars)
print(f"Vocabulary size: {vocab_size}")

# Create mappings
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for ch, i in char_to_idx.items()}

# Encode text as numerical sequence
encoded_text = np.array([char_to_idx[ch] for ch in text_corpus], dtype=np.int32)


Total characters in dataset: 17681
Vocabulary size: 56


In [2]:
class PoetryDataset(Dataset):
    def __init__(self, encoded_text, seq_length=100):
        self.data = encoded_text
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = self.data[idx : idx + self.seq_length]
        y = self.data[idx + 1 : idx + self.seq_length + 1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

# Hyperparameters
seq_length = 100
batch_size = 64

# Create dataset and DataLoader
dataset = PoetryDataset(encoded_text, seq_length=seq_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

print(f"Dataset size: {len(dataset)}, Batches per epoch: {len(dataloader)}")


Dataset size: 17581, Batches per epoch: 275


In [3]:
import torch.nn as nn

class StackedLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(StackedLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out)
        return out, hidden

    def init_hidden(self, batch_size, device):
        return (torch.zeros(num_layers, batch_size, hidden_dim).to(device),
                torch.zeros(num_layers, batch_size, hidden_dim).to(device))

# Hyperparameters
embedding_dim = 128
hidden_dim = 256
num_layers = 2

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = StackedLSTM(vocab_size, embedding_dim, hidden_dim, num_layers).to(device)


In [12]:
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm  # Progress bar for better monitoring
import os

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.002, weight_decay=1e-5)  # AdamW is better than Adam for generalization
scaler = GradScaler()  # For mixed-precision training (faster on GPUs)

epochs = 3
clip_value = 5  # Gradient clipping to avoid exploding gradients

for epoch in range(epochs):
    model.train()
    total_loss = 0

    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}", leave=False)
    
    for inputs, targets in progress_bar:
        batch_size = inputs.shape[0]  # Dynamic batch size
        hidden = model.init_hidden(batch_size, device)  # Initialize hidden state per batch

        inputs, targets = inputs.to(device), targets.to(device)
        hidden = tuple([h.detach() for h in hidden])  # Prevent backprop through entire history

        optimizer.zero_grad()

        with autocast():  # Use mixed precision for speed
            output, hidden = model(inputs, hidden)
            loss = criterion(output.view(-1, vocab_size), targets.view(-1))

        scaler.scale(loss).backward()  # Scales loss for better float16 handling
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)  # Clip gradients

        scaler.step(optimizer)  # Apply scaled gradients
        scaler.update()  # Update scaler for next step

        total_loss += loss.item()
        progress_bar.set_postfix(loss=f"{loss.item():.4f}")  # Update progress bar with loss

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Avg Loss: {avg_loss:.4f}")

    save_path = os.path.join(os.getcwd(), "poetry_generator_epoch_20.pth")

# Save the model's state dictionary
torch.save(model.state_dict(), save_path)

print(f"Model training complete! Model saved to: {save_path}")



  scaler = GradScaler()  # For mixed-precision training (faster on GPUs)
Epoch 1/3:   0%|          | 0/275 [00:00<?, ?it/s]

  with autocast():  # Use mixed precision for speed
                                                                         

Epoch 1/3 - Avg Loss: 0.0834


                                                                         

Epoch 2/3 - Avg Loss: 0.0822


                                                                         

Epoch 3/3 - Avg Loss: 0.0812
Model training complete! Model saved to: d:\Programming stuff\GenAi A1\poetry_generator_epoch_20.pth


In [9]:
def generate_text(model, start_text="Mohabbat", length=300, temperature=1.0):
    model.eval()
    input_seq = torch.tensor([char_to_idx[ch] for ch in start_text], dtype=torch.long).unsqueeze(0).to(device)
    hidden = model.init_hidden(1, device)
    generated_text = start_text

    for _ in range(length):
        output, hidden = model(input_seq, hidden)
        logits = output[:, -1, :] / temperature
        probs = torch.softmax(logits, dim=-1).detach().cpu().numpy().ravel()
        next_idx = np.random.choice(len(probs), p=probs)
        next_char = idx_to_char[next_idx]
        generated_text += next_char
        input_seq = torch.cat([input_seq, torch.tensor([[next_idx]], dtype=torch.long).to(device)], dim=1)[:, 1:]

    return generated_text

# Generate text example
print(generate_text(model, start_text="Mohabbat", length=300))


Mohabbat
raushnī aadhī idhar aadhi udhar ek diya rakkha hai aaine mein chehre mein mera na itni tez laan bhi dhok ke thi fasamkin lekin un ki tah par malbus KHush-numā hai na itne ki hum kar-e-mohabbat rahu nakaam mohabbat Romantic
havā ke se phaloñ kī dukān bas tarah meri tanhai mein shamil ho dekhne aaya 
