In [1]:
import os
import torch
import numpy as np
from torch.utils.data import DataLoader
from tqdm import tqdm, trange


from src.config import DCNConfig
from src.squad import SquadDataset
from src.model import CoattentionModel
from src.glove import GloVeEmbeddings


config = DCNConfig()
use_cuda = True if torch.cuda.is_available() else False

glove = GloVeEmbeddings(embedding_dim=50)
glove.load_glove_embeddings("glove_embeddings/glove.6B.50d.txt")

model = CoattentionModel(config.hidden_dim, config.maxout_pool_size, glove.get_embedding_matrix(), config.max_dec_steps, config.dropout_ratio)




INFO:src.glove:Loading GloVe embeddings from glove_embeddings/glove.6B.50d.txt
INFO:src.glove:Processed 0 lines
INFO:src.glove:Processed 100000 lines
INFO:src.glove:Processed 200000 lines
INFO:src.glove:Processed 300000 lines
INFO:src.glove:Loaded 400004 words with 50d embeddings


In [2]:
if use_cuda:
    model = model.cuda()

train_dataset = SquadDataset(glove.word_to_idx, split="train")
train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)

eval_dataset = SquadDataset(glove.word_to_idx, split="eval")
eval_dataloader = DataLoader(eval_dataset, batch_size=config.batch_size, shuffle=False)

opt = torch.optim.Adam(model.parameters(), lr=config.lr)

# Training tracking
best_eval_loss = float('inf')
train_losses = []
eval_losses = []


In [3]:
print("Training started!")
for epoch in trange(config.epochs, desc="Epoch"):

    for iteration, batch in enumerate(tqdm(train_dataloader)):
        # Unpack the batch
        context, context_lens, question, question_lens, answer_spans = batch

        if use_cuda:
            context = context.cuda()
            context_lens = context_lens.view(-1).cuda()
            question = question.cuda()
            question_lens = question_lens.view(-1).cuda()
            answer_spans = answer_spans.cuda()
        else:
            context_lens = context_lens.view(-1)
            question_lens = question_lens.view(-1)
        
        # Reset gradients
        opt.zero_grad()
        
        # === Forward pass ===
        loss = model(context, context_lens.view(-1), question, question_lens.view(-1), answer_spans)
        
        # === Backpropagation ===
        loss.backward()
        opt.step()

        total_loss = loss.item()

        if (iteration+1) % config.print_frequency == 0:
            print(
                f"Epoch: {epoch+1} Iteration: {iteration+1} loss: {total_loss}")

    # === Save model checkpoint ===
    print("Saving model checkpoint...")
    os.makedirs(config.model_save_path, exist_ok=True)
    torch.save(model.state_dict(), os.path.join(config.model_save_path, f'model_epoch_{epoch+1}.pt'))

print("Training completed!!!")

# Optional: Plot training curves
# import matplotlib.pyplot as plt

# plt.figure(figsize=(10, 6))
# plt.plot(train_losses, label='Training Loss')
# plt.plot(eval_losses, label='Evaluation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.title('Training and Evaluation Loss')
# plt.legend()
# plt.grid(True)
# plt.savefig('training_curves.png')
# plt.show()

Training started!


Epoch:   0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 1 Iteration: 100 loss: 5.985013484954834




Epoch: 1 Iteration: 200 loss: 3.02683687210083




Epoch: 1 Iteration: 300 loss: 2.783381462097168




Epoch: 1 Iteration: 400 loss: 2.8168649673461914




Epoch: 1 Iteration: 500 loss: 2.997023105621338




Epoch: 1 Iteration: 600 loss: 2.957282781600952




Epoch: 1 Iteration: 700 loss: 2.904247999191284




Epoch: 1 Iteration: 800 loss: 3.1266651153564453




Epoch: 1 Iteration: 900 loss: 3.152479887008667




Epoch: 1 Iteration: 1000 loss: 2.916602373123169




Epoch: 1 Iteration: 1100 loss: 3.5630900859832764


 42%|████▏     | 1150/2738 [09:02<12:28,  2.12it/s]
Epoch:   0%|          | 0/30 [09:02<?, ?it/s]


KeyboardInterrupt: 