# Training Notebook

In this notebook, we will run models, also this notebook can be a template to run other models with different hyperparameters.

## Import libraries

In [1]:
from get_loader import get_loader
from models import Encoder, Decoder
import torch
import torch.nn as nn
import torch.optim as optim
from utils import *
from pathlib import Path

## Load train and validation loaders

In [2]:
#image_path = '../../CW/Data/train2017'
#captions_path = '../../CW/Data/annotations_trainval2017/annotations/captions_train2017.json'
IMAGE_PATH = '../Datasets/coco/images/train2017'
CAPTIONS_PATH = '../Datasets/coco/annotations/captions_train2017.json'
FREQ_THRESHOLD = 5
CAPS_PER_IMAGE = 5
BATCH_SIZE = 32
SHUFFLE = True

# for encoder and decoder
EMBED_SIZE = 512  # dimension of vocab embedding vector
HIDDEN_SIZE = 512
NUM_LAYERS = 1  # hidden layers in LTSM

# training parameters
TOTAL_EPOCH = 10
CHECKPOINT = '../model/model_v1'

PRINT_EVERY = 500 # run print_every batches and then

In [3]:
train_loader_params = {
    'images_path': IMAGE_PATH,
    'captions_path': CAPTIONS_PATH,
    'freq_threshold': FREQ_THRESHOLD,
    'caps_per_image': 5,
    'batch_size': BATCH_SIZE,
    'shuffle': SHUFFLE,
    'mode': 'train',
    'idx2word': None,
    'word2idx': None
}

train_loader, train_dataset = get_loader(**train_loader_params)

val_loader_params = {
    'images_path': IMAGE_PATH,
    'captions_path': CAPTIONS_PATH,
    'freq_threshold': FREQ_THRESHOLD,
    'caps_per_image': 3,
    'batch_size': BATCH_SIZE,
    'shuffle': SHUFFLE,
    'mode': 'validation',
    'idx2word': train_dataset.vocab.idx2word,
    'word2idx': train_dataset.vocab.word2idx
}

val_loader, val_dataset = get_loader(**val_loader_params)

print(f"Length of training dataloader: {len(train_dataset)}, Length of testing dataloader: {len(val_loader)}")
print(f"Length of vocabulary: {len(train_dataset.vocab.idx2word)}")

Lenght of training dataloader: 50000, Lenght of testing dataloader: 188
Lenght of vocabulary: 3387


In [4]:
train_dataset.vocab.export_vocab('../vocabulary')

In [8]:
vocab_size = len(train_dataset.vocab.idx2word)

## Load the model

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"We are using {device}.")

We are using cuda.


In [10]:
encoder = Encoder(embed_size=EMBED_SIZE, pretrained=True)
decoder = Decoder(embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, vocab_size=vocab_size, num_layers=NUM_LAYERS)

In [11]:
# the loss is a cross entropy loss and ignore the index of <PAD> since it doesn't make any difference
criterion = nn.CrossEntropyLoss(ignore_index=train_dataset.vocab.word2idx["<PAD>"]).cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss(ignore_index=train_dataset.vocab.word2idx["<PAD>"])

# combine the parameters of decoder and encoder
params = list(decoder.parameters()) + list(encoder.embed.parameters())

# Adam optimizer
opt_pars = {'lr':1e-3, 'weight_decay':1e-3, 'betas':(0.9, 0.999), 'eps':1e-08}
optimizer = optim.Adam(params, **opt_pars)

In [12]:
model_params = {
    'path': CHECKPOINT,
    'batch_size': BATCH_SIZE,
    'embed_size': EMBED_SIZE,
    'hidden_size': HIDDEN_SIZE,
    'num_layers': NUM_LAYERS,
    'vocab_size': len(train_dataset.vocab.idx2word)
}

save_params(**model_params)

## Training

In [13]:
train_params = {
    'encoder': encoder,
    'decoder': decoder,
    'criterion': criterion,
    'optimizer': optimizer,
    'train_loader': train_loader,
    'val_loader': val_loader,
    'total_epoch': TOTAL_EPOCH,
    'device': device,
    'checkpoint_path': CHECKPOINT,
    'print_every': PRINT_EVERY,
    'load_checkpoint': False
}

training_loss, validation_loss = train(**train_params) 

Epoch: [0/10] || Step: [0/1563] || Average Training Loss: 8.1324
Epoch: [0/10] || Step: [500/1563] || Average Training Loss: 3.8319
Epoch: [0/10] || Step: [1000/1563] || Average Training Loss: 3.5866
Epoch: [0/10] || Step: [1500/1563] || Average Training Loss: 3.4698
Epoch: [0/10] || Step: [0/188] || Average Validation Loss: 3.2409
****************************************************************************************************
Epoch: [0/10] || Training Loss = 3.46 || Validation Loss: 3.15 || Time: 14.850750
****************************************************************************************************
Epoch: [1/10] || Step: [0/1563] || Average Training Loss: 3.0344
Epoch: [1/10] || Step: [500/1563] || Average Training Loss: 3.2017
Epoch: [1/10] || Step: [1000/1563] || Average Training Loss: 3.1969
Epoch: [1/10] || Step: [1500/1563] || Average Training Loss: 3.1911
Epoch: [1/10] || Step: [0/188] || Average Validation Loss: 2.9978
************************************************

## Try with different hyperparamters

In [None]:
# for data loader
BATCH_SIZE = 128
CAPS_PER_IMAGE = 5 # how many captions for each image to include in data set

# for encoder and decoder
EMBED_SIZE = 1024 # dimension of vocab embedding vector
HIDDEN_SIZE = 512
NUM_LAYERS = 3 #hidden layers in LTSM
vocab_size = len(train_dataset.vocab.idx2word)

# training parameters
TOTAL_EPOCH = 10
CHECKPOINT = '../model/model_v2'
PRINT_EVERY = 500 # run print_every batches and then

In [15]:
model_params = {
    'path': CHECKPOINT,
    'batch_size': BATCH_SIZE,
    'embed_size': EMBED_SIZE,
    'hidden_size': HIDDEN_SIZE,
    'num_layers': NUM_LAYERS,
    'vocab_size': len(train_dataset.vocab.idx2word)
}

save_params(**model_params)

FileNotFoundError: [Errno 2] No such file or directory: '../model/model_v2/model_v2_1_param.json'

In [None]:
encoder_ = Encoder(embed_size=EMBED_SIZE, pretrained=True)
decoder_ = Decoder(embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, vocab_size=vocab_size, num_layers=NUM_LAYERS)

In [None]:
# the loss is a cross entropy loss and ignore the index of <PAD> since it doesn't make any difference
criterion = nn.CrossEntropyLoss(ignore_index=train_dataset.vocab.word2idx["<PAD>"]).cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss(ignore_index=train_dataset.vocab.word2idx["<PAD>"])

# combine the parameters of decoder and encoder
params = list(decoder_.parameters()) + list(encoder_.embed.parameters())

# Adam optimizer
opt_pars = {'lr':3e-4, 'weight_decay':1e-3, 'betas':(0.9, 0.999), 'eps':1e-08}
optimizer = optim.Adam(params, **opt_pars)

In [None]:
train_params = {
    'encoder': encoder_,
    'decoder': decoder_,
    'criterion': criterion,
    'optimizer': optimizer,
    'train_loader': train_loader,
    'val_loader': val_loader,
    'total_epoch': TOTAL_EPOCH,
    'device': device,
    'checkpoint_path': CHECKPOINT,
    'print_every': PRINT_EVERY,
    'load_checkpoint': False
}

training_loss, validation_loss = train(**train_params) 