In [4]:
import os
import torch
import numpy as np
from torch.utils.data import DataLoader
from tqdm import tqdm, trange

from src.config import DCNConfig
from src.squad import SquadDataset
from src.model import Encoder, BRNN, CoattentionEncoder, DynamicDecoder, CoattentionModel
from src.glove import GloVeEmbeddings


config = DCNConfig()
use_cuda = True if torch.cuda.is_available() else False

glove = GloVeEmbeddings(embedding_dim=300)
glove.load_glove_embeddings("glove_embeddings/glove.6B.50d.txt")

model = CoattentionModel(config.hidden_dim, config.maxout_pool_size, glove.get_embedding_matrix(), config.max_dec_steps, config.dropout_ratio)

if use_cuda:
    model = model.cuda()

INFO:src.glove:Loading GloVe embeddings from glove_embeddings/glove.6B.50d.txt
INFO:src.glove:Processed 0 lines
INFO:src.glove:Processed 100000 lines
INFO:src.glove:Processed 200000 lines
INFO:src.glove:Processed 300000 lines
INFO:src.glove:Loaded 4 words with 300d embeddings


In [5]:


#=============== Define dataloader ================#
# train_dataset = SquadDataset(glove.word_to_idx, split="train")
# train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)

eval_dataset = SquadDataset(glove.word_to_idx, split="eval")
eval_dataloader = DataLoader(eval_dataset, batch_size=config.batch_size, shuffle=False)

#============== Define optimizer ================#
opt = torch.optim.Adam(model.parameters(), lr=config.lr)

# Define loss function
criterion = torch.nn.CrossEntropyLoss()

# Training tracking
best_eval_loss = float('inf')
train_losses = []
eval_losses = []


In [6]:
print("Training started!")
for epoch in trange(config.epochs, desc="Epoch"):

    for iteration, batch in enumerate(tqdm(eval_dataloader)):
        # unpack the batch
        context, context_lens, question, question_lens, answer_spans = batch

        if use_cuda:
            context = context.cuda()
            context_lens = context_lens.cuda()
            question = question.cuda()
            question_lens = question_lens.cuda()
            answer_spans = answer_spans.cuda()
        # Zero out the gradients before forward pass
        opt.zero_grad()
        loss = model(context, context_lens.view(-1), question, question_lens.view(-1), answer_spans)
        loss.backward()
        opt.step()
        total_loss = loss.item()

        if (iteration+1) % config.print_frequency == 0:
            print(
                f"Epoch: {epoch+1} Iteration: {iteration+1} loss: {total_loss}")

    print("Saving states")
    #================ Saving states ==========================#
    if not os.path.exists(config.model_save_path):
        os.mkdir(config.model_save_path)
    # save model state
    torch.save(model.state_dict(), config.model_save_path +
                f'/model_epoch_{epoch+1}.pt')
print("Training completed!!!")

# Optional: Plot training curves
# import matplotlib.pyplot as plt

# plt.figure(figsize=(10, 6))
# plt.plot(train_losses, label='Training Loss')
# plt.plot(eval_losses, label='Evaluation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.title('Training and Evaluation Loss')
# plt.legend()
# plt.grid(True)
# plt.savefig('training_curves.png')
# plt.show()

Training started!


Epoch:   0%|          | 0/30 [00:00<?, ?it/s]

tensor([[53],
        [53],
        [36],
        [34],
        [37],
        [33],
        [27],
        [37],
        [25],
        [39],
        [23],
        [44],
        [43],
        [73],
        [56],
        [79],
        [43],
        [46],
        [43],
        [56],
        [29],
        [32],
        [32],
        [23],
        [59],
        [30],
        [30],
        [42],
        [40],
        [63],
        [55],
        [35]])


  0%|          | 0/327 [00:00<?, ?it/s]
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]


RuntimeError: start (19200) + length (32) exceeds dimension size (19200).