In [4]:
import os
import torch
import numpy as np
from torch.utils.data import DataLoader
from tqdm import tqdm, trange

from src.config import DCNConfig
from src.squad import SquadDataset
from src.model import CoattentionModel
from src.glove import GloVeEmbeddings


config = DCNConfig()

glove = GloVeEmbeddings(embedding_dim=config.glove_dim)
glove.load_glove_embeddings(config.glove_path)

train_dataset = SquadDataset(glove.word_to_idx, split="train")
train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)

eval_dataset = SquadDataset(glove.word_to_idx, split="validation")
eval_dataloader = DataLoader(eval_dataset, batch_size=config.batch_size, shuffle=False)

model = CoattentionModel(config.hidden_dim, config.maxout_pool_size, glove.get_embedding_matrix(), config.max_dec_steps, config.dropout_ratio)
use_cuda = True if torch.cuda.is_available() else False
if use_cuda:
    model = model.cuda()

opt = torch.optim.Adam(model.parameters(), lr=config.lr)

INFO:src.glove:Loading GloVe embeddings from glove_embeddings/glove.6B.300d.txt
INFO:src.glove:Processed 0 lines
INFO:src.glove:Processed 100000 lines
INFO:src.glove:Processed 200000 lines
INFO:src.glove:Processed 300000 lines
INFO:src.glove:Loaded 400004 words with 300d embeddings


In [24]:
print(f"Embedding matrix shape: {glove.get_embedding_matrix().shape}") # Should be (vocab_size, embedding_dim)
print(f"Vocabulary size: {len(glove.word_to_idx)}")
print(f"Index of 'the': {glove.word_to_idx.get('the', 'Not found')}")
print(f"Index of 'McDonald': {glove.word_to_idx.get('McDonald', 'Not found')}")
print(f"Index of '<PAD>': {glove.word_to_idx.get('<PAD>', 'Not found')}")
print(f"Index of '<UNK>': {glove.word_to_idx.get('<UNK>', 'Not found')}")
print(f"Total training samples: {len(train_dataset)}")
print(f"Total validation samples: {len(eval_dataset)}")
print("")

sample_idx = 33
print(f"\nSample #{sample_idx}")
print(f"Context: {eval_dataset.context_data[sample_idx][:5]}...")
print(f"Question: {eval_dataset.question_data[sample_idx]}")
print(f"Answer: {eval_dataset.answer_span_data[sample_idx]}")
start_pos, end_pos = eval_dataset.answer_span_data[sample_idx]
answer_text = eval_dataset.context_data[sample_idx][start_pos:end_pos]
print(f"Answer: '{answer_text}'")
print("")

print("Embedded view:")
context_ids, context_len, question_ids, question_len, answer_span = eval_dataset[sample_idx]

print(f"Context IDs: {context_ids}")
print(f"Context length: {context_len}")
print(f"Question IDs: {question_ids}")
print(f"Question length: {question_len}")
print(f"Answer span: {answer_span}")


Embedding matrix shape: (400004, 300)
Vocabulary size: 400004
Index of 'the': 4
Index of 'McDonald': Not found
Index of '<PAD>': 0
Index of '<UNK>': 1
Total training samples: 87599
Total validation samples: 10570


Sample #33
Context: The P...
Question: What team did the Panthers defeat?
Answer: (152, 169)
Answer: 'Arizona Cardinals'

Embedded view:
Context IDs: tensor([     4,   9003,   1030,      4,   1389,    199,     21,     11,    408,
           221,    180,    388,      5,      9,   3680,  15638,   8115,     19,
           568,      4,   2656,    100,   4644,    652,     27,  10047,     28,
             6,     43,   1957,      4,   2207,   5508,   3838,    221,    408,
            10,      4,  13055,    883,    190,      9,   1981,      8,     48,
           130,   1823,   1995,   1886,    112,      4,   4272,     19,   1301,
            10,   1014,      6,      4,   7720,   1030,      4,   1389,    199,
            21,     11,    425,    221,    413,    388,      5,      9,   1

## Train

In [3]:
# Training tracking
best_eval_loss = float('inf')
train_losses = []
eval_losses = []

print("Training started!")
for epoch in trange(config.num_epochs, desc="Epoch"):

    for iteration, batch in enumerate(tqdm(train_dataloader)):
        # Unpack the batch
        context, context_lens, question, question_lens, answer_spans = batch

        if use_cuda:
            context = context.cuda()
            context_lens = context_lens.view(-1).cuda()
            question = question.cuda()
            question_lens = question_lens.view(-1).cuda()
            answer_spans = answer_spans.cuda()
        else:
            context_lens = context_lens.view(-1)
            question_lens = question_lens.view(-1)
        
        # Reset gradients
        opt.zero_grad()
        
        # === Forward pass ===
        loss = model(context, context_lens, question, question_lens, answer_spans)
        
        # === Backpropagation ===
        loss.backward()
        opt.step()

        total_loss = loss.item()

        if (iteration+1) % config.print_frequency == 0:
            print(
                f"Epoch: {epoch+1} Iteration: {iteration+1} loss: {total_loss}")

    # === Save model checkpoint ===
    print("Saving model checkpoint...")
    os.makedirs(config.model_save_path, exist_ok=True)
    torch.save(model.state_dict(), os.path.join(config.model_save_path, f'model_epoch_{epoch+1}.pt'))

print("Training completed!!!")

Training started!


Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1 Iteration: 100 loss: 5.90866231918335




Epoch: 1 Iteration: 200 loss: 7.000124454498291


 10%|█         | 287/2738 [02:15<19:13,  2.13it/s]
Epoch:   0%|          | 0/50 [02:15<?, ?it/s]


KeyboardInterrupt: 

## Plots

In [None]:
# Optional: Plot training curves
# import matplotlib.pyplot as plt

# plt.figure(figsize=(10, 6))
# plt.plot(train_losses, label='Training Loss')
# plt.plot(eval_losses, label='Evaluation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.title('Training and Evaluation Loss')
# plt.legend()
# plt.grid(True)
# plt.savefig('training_curves.png')
# plt.show()