Step 1: Environment Setup
Install and import dependencies

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
import matplotlib.pyplot as plt

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)


<torch._C.Generator at 0x1c4dfdc1ed0>

Setting a random seed for reproducibility

In [12]:
# Load the text file (ensure the file is in your working directory)
from pathlib import Path
p = Path(r'E:\slfi\Assignment\assignment-2\Assignment2\slf2\Assign-2\dataset\Pride_and_Prejudice-Jane_Austen.txt')
text = p.read_text(encoding='utf-8')

chars = sorted(set(text))
vocab_size = len(chars)
char2idx = {ch: idx for idx, ch in enumerate(chars)}
idx2char = {idx: ch for idx, ch in enumerate(chars)}
encoded_text = np.array([char2idx[c] for c in text], dtype=np.int32)


Step 2: Data Preprocessing
Read data, build vocabulary, encode the dataset

In [13]:
split = int(0.9 * len(encoded_text))
train_encoded = encoded_text[:split]
val_encoded = encoded_text[split:]


Step 3: Batch Preparation
Define PyTorch Dataset for sequential batching

In [14]:
class CharDataset(Dataset):
    def __init__(self, encoded_text, seq_len):
        self.data = encoded_text
        self.seq_len = seq_len
    def __len__(self):
        return len(self.data) - self.seq_len
    def __getitem__(self, idx):
        x = self.data[idx      :idx+self.seq_len]
        y = self.data[idx+1    :idx+self.seq_len+1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

SEQ_LEN = 100
BATCH_SIZE = 64

train_dataset = CharDataset(train_encoded, SEQ_LEN)
val_dataset = CharDataset(val_encoded, SEQ_LEN)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

Step 4: Model Design
Implement a simple LSTM language model

In [15]:
class LSTMLanguageModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
    def forward(self, x, hidden=None):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        logits = self.fc(out)
        return logits, hidden

model = LSTMLanguageModel(vocab_size, embed_dim=128, hidden_dim=256, num_layers=2)


Step 5: Training Loop
Train for a fixed number of epochs, saving losses

In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.002)
criterion = nn.CrossEntropyLoss()

def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        logits, _ = model(x)
        loss = criterion(logits.reshape(-1, vocab_size), y.reshape(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


In [17]:
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits, _ = model(x)
            loss = criterion(logits.reshape(-1, vocab_size), y.reshape(-1))
            total_loss += loss.item()
    avg_loss = total_loss / len(loader)
    perplexity = np.exp(avg_loss)
    return avg_loss, perplexity


In [None]:
NUM_EPOCHS = 3
train_losses = []
val_losses = []
val_perplexities = []

for epoch in range(NUM_EPOCHS):
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_perplexity = evaluate(model, val_loader, criterion, device)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_perplexities.append(val_perplexity)
    print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, Val Perplexity={val_perplexity:.2f}")

In [None]:
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(val_perplexities, label='Validation Perplexity')
plt.xlabel('Epoch')
plt.ylabel('Perplexity')
plt.legend()
plt.show()
