In [1]:
import torch
import os
import torch.backends.cudnn as cudnn
import random
import numpy as np
import logging
import sys 

# For path issues try to add the appropriate path using sys.path.append 

from torchvision.transforms import Compose, ToTensor, Normalize

from torch.utils.data import DataLoader
from dataset.data_loader import VGDataset
from utils.utils import adjust_learning_rate
from utils.checkpoint import save_checkpoint
from todo import VGModel, train_epoch, validate_epoch

  from .autonotebook import tqdm as notebook_tqdm


Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.
Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [None]:
#Change the data_root and split_root paths accroding to your directory and the respective dataset location. 
#Try to change the checkpoint store path to google drive. 

class Args:
    def __init__(self):
        self.data_root = "./data"
        self.gpu = 0
        self.workers = 2
        self.nb_epoch = 3
        self.lr = 5e-5
        self.lr_dev = 0.1
        self.batch_size = 12
        self.size = 640
        self.split_root = "data"
        self.dataset = "gref"
        self.time = 40
        self.print_freq = 50
        self.savename = "ckpt"
        self.seed = 0
        self.bert_model = "bert-base-uncased"
        self.test = False
        self.w_div = 0.125
        self.tunebert = True
        self.device = "cuda"
        self.no_aux_loss = False
        self.backbone = "resnet50"
        self.position_embedding = "sine"
        self.enc_layers = 6
        self.dec_layers = 6
        self.dim_feedforward = 2048
        self.hidden_dim = 256
        self.dropout = 0.1
        self.nheads = 8
        self.num_queries = 441
        self.masks = False
        self.dilation = False
        self.pre_norm = False
args = Args()

# Env settings
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
cudnn.benchmark = False
cudnn.deterministic = True
random.seed(args.seed)
np.random.seed(args.seed+1)
torch.manual_seed(args.seed+2)
torch.cuda.manual_seed_all(args.seed+3)

# Log settings
if not os.path.exists('./logs'):
    os.mkdir('logs')
logging.basicConfig(level=logging.INFO, filename="./logs/%s"%args.savename, filemode="a+",
                    format="%(asctime)-15s %(levelname)-8s %(message)s")
logging.info(str(args))

In [3]:
# Build data loaders
input_transform = Compose([
    ToTensor(),
    Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )
])

# Dataset
train_dataset = VGDataset(data_root=args.data_root,
                        split_root=args.split_root,
                        dataset=args.dataset,
                        split='train',
                        imsize = args.size,
                        transform=input_transform,
                        max_query_len=args.time,
                        augment=True)
val_dataset = VGDataset(data_root=args.data_root,
                        split_root=args.split_root,
                        dataset=args.dataset,
                        split='val',
                        imsize = args.size,
                        transform=input_transform,
                        max_query_len=args.time)
test_dataset = VGDataset(data_root=args.data_root,
                        split_root=args.split_root,
                        dataset=args.dataset,
                        testmode=True,
                        split='val',
                        imsize = args.size,
                        transform=input_transform,
                        max_query_len=args.time)
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True,
                        pin_memory=True, drop_last=True, num_workers=args.workers)
val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False,
                        pin_memory=True, drop_last=True, num_workers=args.workers)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False,
                        pin_memory=True, drop_last=True, num_workers=0)

In [None]:
# Build model
model = VGModel(
    bert_model=args.bert_model,
    tunebert=args.tunebert, 
    args=args,
)
model = torch.nn.DataParallel(model).cuda()
# model = torch.nn.DataParallel(model)

print('Num of parameters:', sum([param.nelement() for param in model.parameters()]))
logging.info('Num of parameters:%d'%int(sum([param.nelement() for param in model.parameters()])))

if args.tunebert:
    visu_param = model.module.visumodel.parameters()
    text_param = model.module.textmodel.parameters()
    rest_param = [param for param in model.parameters() if ((param not in visu_param) and (param not in text_param))]
    visu_param = list(model.module.visumodel.parameters())
    text_param = list(model.module.textmodel.parameters())
    sum_visu = sum([param.nelement() for param in visu_param])
    sum_text = sum([param.nelement() for param in text_param])
    sum_fusion = sum([param.nelement() for param in rest_param])
    print('visu, text, fusion module parameters:', sum_visu, sum_text, sum_fusion)
else:
    visu_param = model.module.visumodel.parameters()
    rest_param = [param for param in model.parameters() if param not in visu_param]
    visu_param = list(model.module.visumodel.parameters())
    sum_visu = sum([param.nelement() for param in visu_param])
    sum_text = sum([param.nelement() for param in model.module.textmodel.parameters()])
    sum_fusion = sum([param.nelement() for param in rest_param]) - sum_text
    print('visu, text, fusion module parameters:', sum_visu, sum_text, sum_fusion)

Num of parameters: 149685188
visu, text, fusion module parameters: 31869888 109482240 8333060


In [5]:
# Build optimizer
if args.tunebert:
    optimizer = torch.optim.AdamW([{'params': rest_param},
            {'params': visu_param, 'lr': args.lr/10.},
            {'params': text_param, 'lr': args.lr/10.}], lr=args.lr, weight_decay=0.0001)
else:
    optimizer = torch.optim.AdamW([{'params': rest_param},
            {'params': visu_param}],lr=args.lr, weight_decay=0.0001)

In [None]:
best_accu = -float('Inf')
for epoch in range(args.nb_epoch):
    adjust_learning_rate(args, optimizer, epoch)
    
    train_epoch(train_loader, model, optimizer, epoch, args)
    accu_new = validate_epoch(val_loader, model, args)
    ## remember best accu and save checkpoint
    is_best = accu_new >= best_accu
    best_accu = max(accu_new, best_accu)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_loss': accu_new,
        'optimizer' : optimizer.state_dict(),
    }, is_best, args, filename=args.savename)

print('\nBest Accu: %f\n'%best_accu)
logging.info('\nBest Accu: %f\n'%best_accu)

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.
Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


  dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)


Epoch: [0][0/7122]	Loss 1.5305 (1.5305)	L1_Loss 0.1469 (0.1469)	GIoU_Loss 1.3836 (1.3836)	Accu 0.0000 (0.0000)	Mean_iu 0.0381 (0.0381)	vis_lr 0.00005000	lang_lr 0.00000500	
2024-11-11 23:46:41.550657
Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.
Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.
[0/794]	Time 7.623 (7.623)	Data Time 0.000 (0.000)	Accu 0.0000 (0.0000)	Mean_iu 0.1190 (0.1190)	
0.0 0.11902108788490295

Best Accu: 0.000000

