In [1]:
import os

import torch
import torch.optim as optim
from torch.utils.data import random_split

from dataset import MiniFlickrDataset, get_loader
from todo import CaptioningModel
from trainer import Trainer
from lr_warmup import LRWarmup
import utils

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Args:
    def __init__(self):
        self.data_path = 'dataset.pkl'
        self.clip_model = 'openai/clip-vit-base-patch32'
        self.text_model = 'gpt2'
        self.seed = 100
        self.num_workers = 0
        self.train_size = 0.84
        self.val_size = 0.13
        self.test_size = 100
        self.epochs = 1 #10
        self.lr = 3e-3
        self.k = 0.33
        self.batch_size_exp = 6
        self.ep_len = 4
        self.num_layers = 6
        self.n_heads = 16
        self.forward_expansion = 4
        self.max_len = 40
        self.dropout = 0.1
config = Args()

In [3]:
device = utils.init_env(config.seed)

# Create data loaders
dataset = MiniFlickrDataset(config.data_path)
config.train_size = int(config.train_size * len(dataset))
config.val_size = len(dataset) - config.train_size - config.test_size
train_dataset, val_dataoet, test_dataset = random_split(dataset, [config.train_size, config.val_size, config.test_size])
train_loader = get_loader(
    train_dataset, 
    bs_exp=config.batch_size_exp, 
    shuffle=True, 
    num_workers=config.num_workers,
    pin_memory=True,
)
test_loader = get_loader(
    test_dataset,
    bs_exp=0,
    shuffle=False,
    num_workers=config.num_workers,
    pin_memory=True,
    train=False,
)

# Creat model
model = CaptioningModel(
    clip_model=config.clip_model,
    text_model=config.text_model,
    ep_len=config.ep_len,
    num_layers=config.num_layers, 
    n_heads=config.n_heads, 
    forward_expansion=config.forward_expansion, 
    dropout=config.dropout, 
    max_len=config.max_len,
    device=device
)

# Create optimizer, lr scheduler
optimizer = optim.Adam(model.parameters(), lr=config.lr)
warmup = LRWarmup(epochs=config.epochs, max_lr=config.lr, k=config.k)
scheduler = optim.lr_scheduler.LambdaLR(optimizer, warmup.lr_warmup)

# Create trainer
trainer = Trainer(
    model=model,
    optimizer=optimizer,
    scaler=torch.cuda.amp.GradScaler(),
    scheduler=scheduler,
    train_loader=train_loader,
    test_loader=test_loader,
    device=device
)

# use _load_ckpt method of the trainer to load weights from the saved checkpoint to resume the training. Below is a sample code for the same

#trainer._load_ckp("path to .pt file")

  return torch.load(checkpoint_file, map_location="cpu")


ZeroDivisionError: float division by zero

In [4]:
# i = 0
# for img_emb, input_ids, attention_mask in train_loader:

#     print(img_emb.shape)
#     print(input_ids.shape)
#     print(attention_mask.shape)

#     loss = model.train_forward(
#         img_emb= img_emb.to(device),
#         trg_cap= input_ids.to(device),
#         att_mask= attention_mask.to(device)
#     )

#     print('\n', loss)

#     if i > 1:
#         break

#     i += 1
#     print()


# # model.train_forward(
# #     img_emb=
# # )

In [None]:
# Start training
for epoch in range(trainer.epoch, config.epochs):
    trainer.train_epoch()

    score = trainer.test_epoch()
    print("Score: {:.4f}".format(score))

    os.makedirs("checkpoints", exist_ok=True)
    if (epoch + 1) % 3 == 0:
        trainer.save_ckp(os.path.join("checkpoints", f'epoch_{epoch + 1}.pt'))