Mounting to googledrive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Opening directiory where the git repo is located

In [None]:
%cd "/content/drive/MyDrive/Colab Notebooks/BanglaImageCaption"



/content/drive/MyDrive/Colab Notebooks/BanglaImageCaption


Installing necessary dependencies

In [None]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
from torch.utils.data import DataLoader

import numpy as np
import time
import sys
import os

from models import utils, caption
from datasets import coco
from configuration import Config
from engine import train_one_epoch, evaluate

Training the model on the dataset

In [None]:
def main(config):
    device = torch.device(config.device)
    print(f'Initializing Device: {device}')

    seed = config.seed + utils.get_rank()
    torch.manual_seed(seed)
    np.random.seed(seed)

    model, criterion = caption.build_model(config)
    model.to(device)

    n_parameters = sum(p.numel()
                       for p in model.parameters() if p.requires_grad)
    print(f"Number of params: {n_parameters}")

    param_dicts = [
        {"params": [p for n, p in model.named_parameters(
        ) if "backbone" not in n and p.requires_grad]},
        {
            "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
            "lr": config.lr_backbone,
        },
    ]
    optimizer = torch.optim.AdamW(
        param_dicts, lr=config.lr, weight_decay=config.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.lr_drop)

    dataset_train = coco.build_dataset(config, mode='training')
    dataset_val = coco.build_dataset(config, mode='validation')
    print(f"Train: {len(dataset_train)}")
    print(f"Valid: {len(dataset_val)}")

    sampler_train = torch.utils.data.RandomSampler(dataset_train)
    sampler_val = torch.utils.data.SequentialSampler(dataset_val)

    batch_sampler_train = torch.utils.data.BatchSampler(
        sampler_train, config.batch_size, drop_last=True
    )

    data_loader_train = DataLoader(
        dataset_train, batch_sampler=batch_sampler_train, num_workers=config.num_workers)
    data_loader_val = DataLoader(dataset_val, config.batch_size,
                                 sampler=sampler_val, drop_last=False, num_workers=config.num_workers)

    if os.path.exists(config.checkpoint):
        print("Loading Checkpoint...")
        checkpoint = torch.load(config.checkpoint, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        config.start_epoch = checkpoint['epoch'] + 1

    print("Start Training..")
    for epoch in range(config.start_epoch, config.epochs):
        print(f"Epoch: {epoch}")
        epoch_loss = train_one_epoch(
            model, criterion, data_loader_train, optimizer, device, epoch, config.clip_max_norm)
        lr_scheduler.step()
        print(f"Training Loss: {epoch_loss}")

        torch.save({
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch,
        }, config.checkpoint)

        validation_loss = evaluate(model, criterion, data_loader_val, device)
        print(f"Validation Loss: {validation_loss}")

        print()


if __name__ == "__main__":
    config = Config()
    main(config)

Initializing Device: cuda
Number of params: 83959866


  cpuset_checked))


Train: 6864
Valid: 2289
Start Training..
Epoch: 0


  dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
100%|██████████| 214/214 [10:11<00:00,  2.86s/it]


Training Loss: 1.9871663270152617


100%|██████████| 72/72 [03:39<00:00,  3.04s/it]


Validation Loss: 0.59122023690078

Epoch: 1


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.4971315195070249


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.4260006849136617

Epoch: 2


100%|██████████| 214/214 [06:39<00:00,  1.87s/it]


Training Loss: 0.39749212862453726


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.35455930709011024

Epoch: 3


100%|██████████| 214/214 [06:36<00:00,  1.85s/it]


Training Loss: 0.3410776017982269


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.3125749994069338

Epoch: 4


100%|██████████| 214/214 [06:39<00:00,  1.87s/it]


Training Loss: 0.3054068771617435


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.2871535977141725

Epoch: 5


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.2821021870335686


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.26825182967715794

Epoch: 6


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.26384002603938644


100%|██████████| 72/72 [01:12<00:00,  1.00s/it]


Validation Loss: 0.25738337366945213

Epoch: 7


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.25037309868591967


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.24673015417324173

Epoch: 8


100%|██████████| 214/214 [06:39<00:00,  1.87s/it]


Training Loss: 0.23917672281788888


100%|██████████| 72/72 [01:12<00:00,  1.00s/it]


Validation Loss: 0.24004379328754213

Epoch: 9


100%|██████████| 214/214 [06:41<00:00,  1.88s/it]


Training Loss: 0.2294786482631603


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.23201687613295185

Epoch: 10


100%|██████████| 214/214 [06:36<00:00,  1.85s/it]


Training Loss: 0.22100851141682296


100%|██████████| 72/72 [01:12<00:00,  1.01s/it]


Validation Loss: 0.22816972579393122

Epoch: 11


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.21286357953169635


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]


Validation Loss: 0.22491361283593708

Epoch: 12


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.20628477688704697


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]


Validation Loss: 0.22396006228195298

Epoch: 13


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.19925589587922407


100%|██████████| 72/72 [01:12<00:00,  1.01s/it]


Validation Loss: 0.21832117003699145

Epoch: 14


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.1928642134173451


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]


Validation Loss: 0.21416924086709818

Epoch: 15


100%|██████████| 214/214 [06:36<00:00,  1.85s/it]


Training Loss: 0.187204326981696


100%|██████████| 72/72 [01:11<00:00,  1.00it/s]


Validation Loss: 0.21112528298464087

Epoch: 16


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.18094321472622524


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.2112585614538855

Epoch: 17


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.17562225676864107


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]


Validation Loss: 0.20826268713507387

Epoch: 18


100%|██████████| 214/214 [06:39<00:00,  1.87s/it]


Training Loss: 0.17029696643770298


100%|██████████| 72/72 [01:12<00:00,  1.01s/it]


Validation Loss: 0.21168081565863556

Epoch: 19


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.16512522428670776


100%|██████████| 72/72 [01:10<00:00,  1.01it/s]


Validation Loss: 0.20871193706989288

Epoch: 20


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.15239263552232324


100%|██████████| 72/72 [01:12<00:00,  1.01s/it]


Validation Loss: 0.20635976021488509

Epoch: 21


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.1487436083571933


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]


Validation Loss: 0.206596489995718

Epoch: 22


100%|██████████| 214/214 [06:39<00:00,  1.87s/it]


Training Loss: 0.1471847203151088


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]


Validation Loss: 0.20711871691875988

Epoch: 23


100%|██████████| 214/214 [06:39<00:00,  1.87s/it]


Training Loss: 0.14596960836342562


100%|██████████| 72/72 [01:13<00:00,  1.01s/it]


Validation Loss: 0.2071727801942163

Epoch: 24


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.14458581704263376


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]


Validation Loss: 0.2082079868349764

Epoch: 25


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.14408047187411896


100%|██████████| 72/72 [01:12<00:00,  1.01s/it]


Validation Loss: 0.20895995199680328

Epoch: 26


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.1428173696535213


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.20888648803035417

Epoch: 27


100%|██████████| 214/214 [06:39<00:00,  1.87s/it]


Training Loss: 0.14198230025924255


100%|██████████| 72/72 [01:10<00:00,  1.02it/s]


Validation Loss: 0.20834517665207386

Epoch: 28


100%|██████████| 214/214 [06:38<00:00,  1.86s/it]


Training Loss: 0.14112609172257307


100%|██████████| 72/72 [01:12<00:00,  1.01s/it]


Validation Loss: 0.20927916136052874

Epoch: 29


100%|██████████| 214/214 [06:37<00:00,  1.86s/it]


Training Loss: 0.14042368148253342


100%|██████████| 72/72 [01:11<00:00,  1.01it/s]

Validation Loss: 0.21060012612077925




