In [None]:
!pip install numpy pandas



In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import random

In [None]:
data_path = '/content/updated_ghazals.txt'

# Open and read the .txt file
with open(data_path, 'r', encoding='utf-8') as file:
    poetry_texts = file.readlines()

# Remove any leading/trailing whitespaces or newlines
poetry_texts = [line.strip() for line in poetry_texts if line.strip()]

# Join all texts into a single string (if needed for further processing)
all_text = "\n".join(poetry_texts)


In [None]:
chars = sorted(list(set(all_text)))
vocab_size = len(chars)

char2idx = {ch: idx for idx, ch in enumerate(chars)}
idx2char = {idx: ch for idx, ch in enumerate(chars)}

def text_to_int(text):
    return [char2idx[ch] for ch in text]

def int_to_text(indices):
    return ''.join([idx2char[idx] for idx in indices])

all_data = text_to_int(all_text)

In [None]:
class PoetryDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = self.data[idx : idx + self.seq_length]
        y = self.data[idx + 1 : idx + self.seq_length + 1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

SEQ_LENGTH = 256  # you can adjust this
BATCH_SIZE = 256

full_dataset = PoetryDataset(all_data, SEQ_LENGTH)

In [None]:
dataset_size = len(full_dataset)
train_size = int(0.9 * dataset_size)
test_size = dataset_size - train_size

train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

In [None]:
class CharRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers=1):
        super(CharRNN, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embed(x)
        output, hidden = self.lstm(x, hidden)
        output = output.contiguous().view(-1, output.shape[2])
        logits = self.fc(output)
        return logits, hidden

EMBED_SIZE = 128
HIDDEN_SIZE = 256
NUM_LAYERS = 2
NUM_EPOCHS = 10
LEARNING_RATE = 0.003

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CharRNN(vocab_size, EMBED_SIZE, HIDDEN_SIZE, NUM_LAYERS).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

In [None]:
model.train()
for epoch in range(1, NUM_EPOCHS + 1):
    epoch_loss = 0.0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        logits, _ = model(inputs)
        loss = criterion(logits, targets.view(-1))

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch [{epoch}/{NUM_EPOCHS}], Loss: {avg_loss:.4f}")

Epoch [1/10], Loss: 0.8518
Epoch [2/10], Loss: 0.1527
Epoch [3/10], Loss: 0.1177
Epoch [4/10], Loss: 0.1068
Epoch [5/10], Loss: 0.0998
Epoch [6/10], Loss: 0.0952
Epoch [7/10], Loss: 0.0918
Epoch [8/10], Loss: 0.0891
Epoch [9/10], Loss: 0.0870
Epoch [10/10], Loss: 0.0851


In [None]:
model_path = 'char_rnn_model.pth'
torch.save({
    'model_state_dict': model.state_dict(),
    'vocab_size': vocab_size,
    'embed_size': EMBED_SIZE,
    'hidden_size': HIDDEN_SIZE,
    'num_layers': NUM_LAYERS,
    'char2idx': char2idx,
    'idx2char': idx2char,
}, model_path)
print(f"Model saved to {model_path}")

Model saved to char_rnn_model.pth


In [None]:
def load_model(model_path, device):
    checkpoint = torch.load(model_path, map_location=device)
    loaded_model = CharRNN(checkpoint['vocab_size'],
                           checkpoint['embed_size'],
                           checkpoint['hidden_size'],
                           checkpoint['num_layers']).to(device)
    loaded_model.load_state_dict(checkpoint['model_state_dict'])
    loaded_model.eval()
    char2idx = checkpoint['char2idx']
    idx2char = checkpoint['idx2char']
    return loaded_model, char2idx, idx2char

loaded_model, loaded_char2idx, loaded_idx2char = load_model(model_path, device)
print("Model loaded for testing or generation.")

Model loaded for testing or generation.


  checkpoint = torch.load(model_path, map_location=device)


In [None]:
def generate_text(model, start_text, char2idx, idx2char, generation_length=200, temperature=0.8):
    model.eval()
    input_indices = [char2idx.get(ch, 0) for ch in start_text]
    input_tensor = torch.tensor(input_indices, dtype=torch.long).unsqueeze(0).to(device)

    hidden = None
    generated_text = start_text

    for _ in range(generation_length):
        logits, hidden = model(input_tensor, hidden)
        logits = logits[-1] / temperature
        probabilities = torch.softmax(logits, dim=0).detach().cpu().numpy()
        next_char_idx = np.random.choice(len(probabilities), p=probabilities)

        next_char = idx2char[next_char_idx]
        generated_text += next_char

        input_tensor = torch.tensor([[next_char_idx]], dtype=torch.long).to(device)

    return generated_text

prompt = "pyar"
generated_poetry = generate_text(loaded_model, prompt, loaded_char2idx, loaded_idx2char)
print("Generated Poetry:\n", generated_poetry)

Generated Poetry:
 pyar shahi mein wo patthar ho
'ais hatheli mein ek rishte main ne dekhte hain
roz maidan-e-jang lagta hai
meri aankhon mein ye nami kyon hai
jis ko aankho se dur rakhna tha
aaj qurbat mein phir wahi kyon 


In [None]:
!pip install numpy pandas torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import random

# Data Loading and Preprocessing
data_path = '/content/updated_ghazals.txt'

with open(data_path, 'r', encoding='utf-8') as file:
    poetry_texts = file.readlines()

poetry_texts = [line.strip() for line in poetry_texts if line.strip()]

# Replace newlines with a special token
NEWLINE_TOKEN = "<NEWLINE>"
all_text = NEWLINE_TOKEN.join(poetry_texts)

chars = sorted(list(set(all_text)))
vocab_size = len(chars)

char2idx = {ch: idx for idx, ch in enumerate(chars)}
idx2char = {idx: ch for idx, ch in enumerate(chars)}

def text_to_int(text):
    return [char2idx[ch] for ch in text]

def int_to_text(indices):
    return ''.join([idx2char[idx] for idx in indices])

all_data = text_to_int(all_text)

# Dataset and DataLoader
class PoetryDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = self.data[idx : idx + self.seq_length]
        y = self.data[idx + 1 : idx + self.seq_length + 1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

SEQ_LENGTH = 256
BATCH_SIZE = 256

full_dataset = PoetryDataset(all_data, SEQ_LENGTH)

# Split into training and validation sets
dataset_size = len(full_dataset)
train_size = int(0.8 * dataset_size)  # Reduced training size
val_size = dataset_size - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)


# Model Definition with Dropout
class CharRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers=2, dropout_rate=0.3):
        super(CharRNN, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, vocab_size)
        self.dropout_rate = dropout_rate # Keep track of dropout rate

    def forward(self, x, hidden=None):
        x = self.embed(x)
        output, hidden = self.lstm(x, hidden)
        output = output.contiguous().view(-1, output.shape[2])
        logits = self.fc(output)
        return logits, hidden

# Hyperparameters
EMBED_SIZE = 128
HIDDEN_SIZE = 256
NUM_LAYERS = 2
NUM_EPOCHS = 50  # Increased epochs, but early stopping will prevent overfitting
LEARNING_RATE = 0.003
WEIGHT_DECAY = 1e-5 # L2 regularization

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CharRNN(vocab_size, EMBED_SIZE, HIDDEN_SIZE, NUM_LAYERS).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)  # Added weight decay
criterion = nn.CrossEntropyLoss()

# Learning Rate Scheduler (Optional)
from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5) # Reduce LR if validation loss plateaus


# Training Loop with Validation and Early Stopping
best_val_loss = float('inf')
patience = 5  # Patience for early stopping
counter = 0

model.train()
for epoch in range(1, NUM_EPOCHS + 1):
    epoch_loss = 0.0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        logits, _ = model(inputs)
        loss = criterion(logits, targets.view(-1))

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)

    # Validation Loop
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_inputs, val_targets in val_loader:
            val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
            val_logits, _ = model(val_inputs)
            loss = criterion(val_logits, val_targets.view(-1))
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    scheduler.step(avg_val_loss)  # Step the scheduler

    print(f"Epoch [{epoch}/{NUM_EPOCHS}], Train Loss: {avg_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

    # Early Stopping Check
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        # Save the best model
        torch.save({
            'model_state_dict': model.state_dict(),
            'vocab_size': vocab_size,
            'embed_size': EMBED_SIZE,
            'hidden_size': HIDDEN_SIZE,
            'num_layers': NUM_LAYERS,
            'dropout_rate': model.dropout_rate,
            'char2idx': char2idx,
            'idx2char': idx2char,
        }, 'best_char_rnn_model.pth')
        print("Best model saved.")
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered!")
            break

    model.train() # Set back to training mode

# Saving the model (save the last model, but ideally use the 'best' one)
model_path = 'char_rnn_model.pth'
torch.save({
    'model_state_dict': model.state_dict(),
    'vocab_size': vocab_size,
    'embed_size': EMBED_SIZE,
    'hidden_size': HIDDEN_SIZE,
    'num_layers': NUM_LAYERS,
    'dropout_rate': model.dropout_rate,
    'char2idx': char2idx,
    'idx2char': idx2char,
}, model_path)
print(f"Model saved to {model_path}")




Epoch [1/50], Train Loss: 0.9605, Validation Loss: 0.4213
Best model saved.
Epoch [2/50], Train Loss: 0.4612, Validation Loss: 0.2184
Best model saved.
Epoch [3/50], Train Loss: 0.3529, Validation Loss: 0.1695
Best model saved.
Epoch [4/50], Train Loss: 0.3112, Validation Loss: 0.1493
Best model saved.
Epoch [5/50], Train Loss: 0.2896, Validation Loss: 0.1408
Best model saved.
Epoch [6/50], Train Loss: 0.2750, Validation Loss: 0.1358
Best model saved.
Epoch [7/50], Train Loss: 0.2650, Validation Loss: 0.1299
Best model saved.
Epoch [8/50], Train Loss: 0.2575, Validation Loss: 0.1264
Best model saved.
Epoch [9/50], Train Loss: 0.2513, Validation Loss: 0.1246
Best model saved.
Epoch [10/50], Train Loss: 0.2469, Validation Loss: 0.1238
Best model saved.
Epoch [11/50], Train Loss: 0.2422, Validation Loss: 0.1228
Best model saved.
Epoch [12/50], Train Loss: 0.2386, Validation Loss: 0.1201
Best model saved.
Epoch [13/50], Train Loss: 0.2352, Validation Loss: 0.1212
Epoch [14/50], Train Loss:

KeyboardInterrupt: 

In [None]:
# Loading the model
def load_model(model_path, device):
    checkpoint = torch.load(model_path, map_location=device)
    loaded_model = CharRNN(checkpoint['vocab_size'],
                            checkpoint['embed_size'],
                            checkpoint['hidden_size'],
                            checkpoint['num_layers'],
                            dropout_rate=checkpoint['dropout_rate']).to(device)
    loaded_model.load_state_dict(checkpoint['model_state_dict'])
    loaded_model.eval()
    char2idx = checkpoint['char2idx']
    idx2char = checkpoint['idx2char']
    return loaded_model, char2idx, idx2char

# Load the *best* model
loaded_model, loaded_char2idx, loaded_idx2char = load_model('best_char_rnn_model.pth', device)
print("Model loaded for testing or generation.")


# Text Generation
def generate_text(model, start_text, char2idx, idx2char, generation_length=200, temperature=0.8):
    model.eval()
    input_indices = [char2idx.get(ch, 0) for ch in start_text]
    input_tensor = torch.tensor(input_indices, dtype=torch.long).unsqueeze(0).to(device)

    hidden = None
    generated_text = start_text

    with torch.no_grad():
        for _ in range(generation_length):
            logits, hidden = model(input_tensor, hidden)
            logits = logits[-1] / temperature
            probabilities = torch.softmax(logits, dim=0).detach().cpu().numpy()
            next_char_idx = np.random.choice(len(probabilities), p=probabilities)

            next_char = idx2char[next_char_idx]
            generated_text += next_char

            input_tensor = torch.tensor([[next_char_idx]], dtype=torch.long).to(device)

    return generated_text

prompt = "pyar"
generated_poetry = generate_text(loaded_model, loaded_char2idx, loaded_idx2char, temperature=0.7)
print("Generated Poetry:\n", generated_poetry)
