In [1]:
pwd

'/home/karthi/tamu/sem1/csce636/project/iSogCLR/bimodal_exps'

In [2]:
import warnings
def warn(*args, **kwargs):
    pass
warnings.warn = warn

import pickle
import argparse

import os
os.environ["TOKENIZERS_PARALLELISM"] = "true"

import numpy as np
import random
import time
import datetime
import json
from pathlib import Path

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.distributed as dist
from torch.utils.data import DataLoader, Subset
from torchvision import transforms, datasets

from models.model_clip import CLIP
from transformers import AutoTokenizer, RobertaTokenizer

import utils
import shutil
from dataset import create_train_dataset, create_val_dataset, create_sampler, create_train_loader, create_val_loader
from scheduler import create_scheduler
from optim import create_optimizer
from zeroshot_transfer.classes import CIFAR10_CLASSES, CIFAR100_CLASSES, IMAGENET_CLASSES

from clip import train, evaluation, itm_eval, zeroshot_transfer, create_zeroshot_dataloader


from tqdm import tqdm

In [3]:
class Config:
    def __init__(self, 
                 data_path=None, 
                 ann_path=None, 
                 train_file='cc3m_train_subset.json', 
                 train_image_root='cc3m_subset_100k',
                 bert_config='configs/config_bert.json',
                 image_encoder='resnet50',
                 text_encoder='distilbert-base-uncased',
                 image_res=256,
                 vision_width=768,
                 embed_dim=256,
                 opt='adamW',
                 sched='cosine',
                 lr=2e-4,
                 lr_temp_net=1e-6,
                 wd_temp_net=1e-3,
                 min_lr=1e-6,
                 warmup=True,
                 warmup_lr=1e-5,
                 weight_decay=0.02,
                 decay_rate=1,
                 epochs=30,
                 warmup_epochs=5,
                 cooldown_epochs=0,
                 use_amp=False,
                 init_model=False,
                 batch_size_train=128,
                 batch_size_test=128,
                 k_test=256,
                 evaluate=False,
                 checkpoint='',
                 device='cuda',
                 seed=42,
                 world_size=1,
                 dist_url='env://',
                 distributed=True,
                 output_dir=None, #"./output/sogclr_cc3m_g0.8_e30",
                 ita_type=None,
                 vicreg_sim_coeff=25.0,
                 vicreg_std_coeff=25.0,
                 sogclr_gamma=0.8,
                 rho_I=8.0,
                 rho_T=8.0,
                 rho_init=8.0,
                 eta_init=0.001,
                 tau_init=0.01,
                 beta_u=0.9,
                 temp=0.01,
                 learnable_temp=False,
                 personalized_tau=False,
                 max_norm=1.0,
                 store_tau=False,
                 isogclr_temp_net=False,
                 alpha=1.0,
                 train_frac=1.0,
                 check_samples_tau=False,
                 extract_data=False,
                 zs_dataset="imagenet",
                 zs_datafolder=None):
        
        # Data path
        self.data_path = data_path
        self.ann_path = ann_path
        self.train_file = os.path.join(self.ann_path, train_file)
        self.train_image_root = os.path.join(self.data_path, train_image_root)
        self.val_coco_file = os.path.join(self.ann_path, 'coco_val.json')
        self.coco_image_root = os.path.join(self.data_path, 'mscoco_val/mscoco_val2014_subset_5k')

        # Model config
        self.bert_config = bert_config
        self.image_encoder = image_encoder
        self.text_encoder = text_encoder
        self.image_res = image_res
        self.vision_width = vision_width
        self.embed_dim = embed_dim

        # Optimizer and scheduler
        self.opt = opt
        self.sched = sched
        self.lr = lr
        self.lr_temp_net = lr_temp_net
        self.wd_temp_net = wd_temp_net  # Weight decay for temperature network
        self.min_lr = min_lr
        self.warmup = warmup
        self.warmup_lr = warmup_lr
        self.weight_decay = weight_decay
        self.decay_rate = decay_rate
        self.epochs = epochs
        self.warmup_epochs = warmup_epochs
        self.cooldown_epochs = cooldown_epochs

        # Training & test settings
        self.use_amp = use_amp
        self.init_model = init_model
        self.batch_size_train = batch_size_train
        self.batch_size_test = batch_size_test
        self.k_test = k_test
        self.evaluate = evaluate
        self.checkpoint = checkpoint
        self.device = device
        self.seed = seed
        self.world_size = world_size  # Number of distributed processes
        self.dist_url = dist_url  # URL used to set up distributed training
        self.distributed = distributed

        # Output path
        self.output_dir = output_dir

        # Loss config
        self.ita_type = ita_type  # Should be set during runtime
        self.vicreg_sim_coeff = vicreg_sim_coeff
        self.vicreg_std_coeff = vicreg_std_coeff
        self.sogclr_gamma = sogclr_gamma
        self.rho_I = rho_I
        self.rho_T = rho_T
        self.rho_init = rho_init
        self.eta_init = eta_init
        self.tau_init = tau_init
        self.beta_u = beta_u
        self.temp = temp
        self.learnable_temp = learnable_temp
        self.personalized_tau = personalized_tau
        self.max_norm = max_norm
        self.store_tau = store_tau
        self.isogclr_temp_net = isogclr_temp_net
        self.alpha = alpha  # For isogclr_denoise

        # Fraction of data used for training
        self.train_frac = train_frac

        # Check samples with high/low temperature values
        self.check_samples_tau = check_samples_tau
        if self.check_samples_tau:
            self.evaluate = True

        # Extract data from the CC3M dataset
        self.extract_data = extract_data

        # Zero-shot transfer
        self.zs_dataset = zs_dataset  # Should be set during runtime
        self.zs_datafolder = zs_datafolder

### Training

In [4]:
def train_runner(args):
    Path(args.output_dir).mkdir(parents=True, exist_ok=True)

    json.dump(args.__dict__, open(os.path.join(args.output_dir, 'args.json'), 'w'), indent=2) 

    args.gpu = 0
    device = torch.device(args.device)
    print(device)
    print(torch.cuda.get_device_name(torch.cuda.current_device()))

    # fix the seed for reproducibility
    seed = args.seed + utils.get_rank()
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    cudnn.benchmark = True

    #### Dataset #### 
    print("Creating retrieval dataset")
    train_dataset = create_train_dataset('re', args)
    print("len of train_dataset:", len(train_dataset))

    num_training = int(args.train_frac * len(train_dataset))
    train_dataset = Subset(train_dataset, list(range(num_training)))

    samplers = [None, None, None]

    train_loader = create_train_loader(train_dataset, samplers[0], args.batch_size_train, 2, None)
        
    if args.text_encoder == 'roberta-large':
        tokenizer = RobertaTokenizer.from_pretrained(args.text_encoder)
    else:
        tokenizer = AutoTokenizer.from_pretrained(args.text_encoder)


    #### Model #### 
    print("Creating model")
    model = CLIP(image_encoder=args.image_encoder, text_encoder=args.text_encoder, embed_dim=args.embed_dim, init_model=args.init_model, bsz=args.batch_size_train*args.world_size,
                    world_size=args.world_size, ita_type=args.ita_type, sogclr_gamma=args.sogclr_gamma, rho_I=args.rho_I, rho_T=args.rho_T, rho_init=args.rho_init, tau_init=args.tau_init,
                    eta_init=args.eta_init, beta_u=args.beta_u, temp=args.temp, learnable_temp=args.learnable_temp,
                    vicreg_sim_coeff=args.vicreg_sim_coeff, vicreg_std_coeff=args.vicreg_std_coeff, personalized_tau=args.personalized_tau, 
                    use_temp_net=args.isogclr_temp_net, alpha=args.alpha, distributed=args.distributed)
    model = model.to(device)


    optimizer = create_optimizer(args, model)
    lr_scheduler, _ = create_scheduler(args, optimizer)
    model_without_ddp = model
    
    if args.use_amp:
        grad_scaler = torch.cuda.amp.GradScaler()
    else:
        grad_scaler = None

    max_epoch = args.epochs
    warmup_steps = args.warmup_epochs

    print("Start training")
    start_time = time.time()    
    for epoch in range(0, max_epoch):
        train_stats = train(model, train_loader, optimizer, tokenizer, epoch, max_epoch, warmup_steps, device, lr_scheduler, 
                            grad_scaler, args)
        
        log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},     
                        'epoch': epoch,
                        'data': 'coco',
                    }
        with open(os.path.join(args.output_dir, "coco_log.txt"),"a") as f:
            f.write(json.dumps(log_stats) + "\n")

        save_obj = {
            'model': model_without_ddp.state_dict()
        }
        torch.save(save_obj, os.path.join(args.output_dir, 'checkpoint_'+str(epoch+1)+'.pth'))
            
        lr_scheduler.step(epoch+warmup_steps+1)  
        torch.cuda.empty_cache()

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str)) 

In [5]:
args_train = Config(data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='sogclr', eta_init=0.03, distributed=False)
main(args_train)

NameError: name 'main' is not defined

In [5]:
args_train_mine = Config(epochs=5, data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='sogclr_mine', eta_init=0.03, distributed=False, output_dir="./output_mine/sogclr_mine_cc3m_g0.8_e30")
train_runner(args_train_mine)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of train_dataset: 100000
Creating model
Start training
Train Epoch: [0]  [   0/1562]  eta: 6:36:42  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.1990  avg_image_tau: 0.0000  avg_text_tau: 0.0000  cur_eta: 0.0000  grad_tau_image: 0.0000  grad_tau_text: 0.0000  b_I: 0.0000  b_T: 0.0000  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 15.2387  data: 0.8060  max mem: 4523
Train Epoch: [0]  [  50/1562]  eta: 0:58:13  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.0882  avg_image_tau: 0.0000  avg_text_tau: 0.0000  cur_eta: 0.0000  grad_tau_image: 0.0000  grad_tau_text: 0.0000  b_I: 0.0000  b_T: 0.0000  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 2.0513  data: 0.0002  max mem: 5219
Train Epoch: [0]  [ 100/1562]  eta: 0:53:10  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.0495  avg_image_tau: 0.0000  avg_text_tau: 0.0000  cur_eta: 0

In [5]:
args_train_isog = Config(epochs=5, data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='isogclr_new', eta_init=0.03, distributed=False, output_dir="./output_isog/isogclr_cc3m")
train_runner(args_train_isog)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of train_dataset: 100000
Creating model
Start training
Train Epoch: [0]  [   0/1562]  eta: 2:04:41  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.1988  avg_image_tau: 0.0100  avg_text_tau: 0.0100  cur_eta: 0.0000  grad_tau_image: 4.8155  grad_tau_text: 4.7392  b_I: 5.0000  b_T: 3.9978  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 4.7897  data: 0.6060  max mem: 4572
Train Epoch: [0]  [  50/1562]  eta: 0:10:13  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.0884  avg_image_tau: 0.0100  avg_text_tau: 0.0100  cur_eta: 0.0000  grad_tau_image: 5.0000  grad_tau_text: 4.9122  b_I: 5.0000  b_T: 5.0000  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 0.3181  data: 0.0002  max mem: 5264
Train Epoch: [0]  [ 100/1562]  eta: 0:08:50  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.0494  avg_image_tau: 0.0100  avg_text_tau: 0.0100  cur_eta: 0.

In [5]:
args_train_isog = Config(epochs=5, data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='isogclr_new_v2', eta_init=0.03, distributed=False, output_dir="./output_isog_new_v2/isogclr_new_v2_cc3m")
train_runner(args_train_isog)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of train_dataset: 100000
Creating model
Start training
Train Epoch: [0]  [   0/1562]  eta: 2:01:07  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.2175  avg_image_tau: 0.0050  avg_text_tau: 0.0050  cur_eta: 0.0300  grad_tau_image: 5.4170  grad_tau_text: 4.8411  b_I: 0.0000  b_T: 0.0000  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 4.6526  data: 0.5776  max mem: 4571
Train Epoch: [0]  [  50/1562]  eta: 0:10:13  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.0922  avg_image_tau: 0.0050  avg_text_tau: 0.0050  cur_eta: 0.0300  grad_tau_image: 6.8745  grad_tau_text: 5.1636  b_I: 0.0000  b_T: 0.0000  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 0.3205  data: 0.0002  max mem: 5260
Train Epoch: [0]  [ 100/1562]  eta: 0:08:51  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 0.0516  avg_image_tau: 0.0050  avg_text_tau: 0.0050  cur_eta: 0.

In [5]:
args_train_mine = Config(epochs=5, data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='sogclr_v2_mine', eta_init=0.03, distributed=False, output_dir="./output_mine/sogclr_mine_v2_cc3m")
train_runner(args_train_mine)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of train_dataset: 100000
Creating model
Start training
Train Epoch: [0]  [   0/1562]  eta: 2:00:40  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 12.5186  avg_image_tau: 0.0000  avg_text_tau: 0.0000  cur_eta: 0.0000  grad_tau_image: 0.0000  grad_tau_text: 0.0000  b_I: 0.0000  b_T: 0.0000  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 4.6356  data: 0.5704  max mem: 4524
Train Epoch: [0]  [  50/1562]  eta: 0:10:10  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 12.3407  avg_image_tau: 0.0000  avg_text_tau: 0.0000  cur_eta: 0.0000  grad_tau_image: 0.0000  grad_tau_text: 0.0000  b_I: 0.0000  b_T: 0.0000  v: 0.0000  lamda: 0.0000  weights_image_pos: 0.0000  weights_text_pos: 0.0000  time: 0.3194  data: 0.0002  max mem: 5216
Train Epoch: [0]  [ 100/1562]  eta: 0:08:50  lr: 0.000010  lr_temp_net: 0.00000100  loss_ita: 12.2649  avg_image_tau: 0.0000  avg_text_tau: 0.0000  cur_eta:

### Evaluation

In [4]:
def eval_runner(args):

    args.gpu = 0
    device = torch.device(args.device)
    print(device)
    print(torch.cuda.get_device_name(torch.cuda.current_device()))

    # fix the seed for reproducibility
    seed = args.seed + utils.get_rank()
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    cudnn.benchmark = True

    #### Dataset #### 
    print("Creating retrieval dataset")
    val_coco_dataset = create_val_dataset('re', args, args.val_coco_file, args.coco_image_root, None)
    print("len of coco val:", len(val_coco_dataset))

    samplers = [None, None, None]

    val_coco_loader = create_val_loader([val_coco_dataset], samplers[1:2],
                                        [args.batch_size_test], [8], [None])[0]
        
    if args.text_encoder == 'roberta-large':
        tokenizer = RobertaTokenizer.from_pretrained(args.text_encoder)
    else:
        tokenizer = AutoTokenizer.from_pretrained(args.text_encoder)

    #### Zero-shot transfer ####
    if args.zs_dataset:
        zeroshot_dataloader = create_zeroshot_dataloader(dataset_name=args.zs_dataset, data_folder=args.zs_datafolder, image_size=args.image_res)
    else:
        zeroshot_dataloader = None

    #### Model #### 
    print("Creating model")
    model = CLIP(image_encoder=args.image_encoder, text_encoder=args.text_encoder, embed_dim=args.embed_dim, init_model=args.init_model, bsz=args.batch_size_train*args.world_size,
                    world_size=args.world_size, ita_type=args.ita_type, sogclr_gamma=args.sogclr_gamma, rho_I=args.rho_I, rho_T=args.rho_T, rho_init=args.rho_init,  tau_init=args.tau_init,
                    eta_init=args.eta_init, beta_u=args.beta_u, temp=args.temp, learnable_temp=args.learnable_temp,
                    vicreg_sim_coeff=args.vicreg_sim_coeff, vicreg_std_coeff=args.vicreg_std_coeff, personalized_tau=args.personalized_tau, 
                    use_temp_net=args.isogclr_temp_net, alpha=args.alpha, distributed=args.distributed)
    model = model.to(device)

    assert len(args.checkpoint) > 0
    checkpoint = torch.load(args.checkpoint, map_location='cpu') 
    state_dict = checkpoint['model']
    model.load_state_dict(state_dict, strict=False)
    print('load checkpoint from %s' % args.checkpoint)


    print("Start Evaluation")
    start_time = time.time()   

    score_val_i2t_coco, score_val_t2i_coco = evaluation(model, val_coco_loader, tokenizer, device, args)

    val_result_coco = itm_eval(score_val_i2t_coco, score_val_t2i_coco, val_coco_loader.dataset.txt2img, val_coco_loader.dataset.img2txt)  
    print("coco val:", val_result_coco)
    
    if args.zs_dataset:
        zeroshot_results = zeroshot_transfer(model, zeroshot_dataloader, args.zs_dataset, tokenizer, device)
        print("zeroshot:", zeroshot_results)
    else:
        zeroshot_results = None  

    log_stats = {**{f'val_{k}': v for k, v in val_result_coco.items()},               
                    'epoch': 0,
                    'data': 'coco',
                }
    with open(os.path.join(args.output_dir, "coco_log.txt"),"a") as f:
        f.write(json.dumps(log_stats) + "\n")


        if zeroshot_results:
            with open(os.path.join(args.output_dir, f"zeroshot_{args.zs_dataset}_log.txt"), "a") as f:
                f.write(json.dumps(zeroshot_results) + "\n")
        
    torch.cuda.empty_cache()

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Evaluation time {}'.format(total_time_str)) 

In [5]:
args = Config(data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='sogclr', eta_init=0.03, distributed=False, evaluate=True, checkpoint="./output/sogclr_cc3m_g0.8_e30/checkpoint_30.pth", zs_datafolder='../../datasets/imagenet/val')
eval_runner(args)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of coco val: 5000
Creating model
load checkpoint from ./output/sogclr_cc3m_g0.8_e30/checkpoint_30.pth
Start Evaluation
Computing features for evaluation...
Evaluation time 0:00:37
coco val: {'txt_r1': 12.06, 'txt_r5': 29.44, 'txt_r10': 41.1, 'txt_r_mean': 27.53333333333333, 'img_r1': 9.616538046303331, 'img_r5': 25.446839137910352, 'img_r10': 36.075012995321686, 'img_r_mean': 23.712796726511794, 'r_mean': 25.623065029922564}
zeroshot: {'zeroshot_top1': 22.124, 'zeroshot_top3': 35.422, 'zeroshot_top5': 41.55, 'zeroshot_top10': 48.976}
Evaluation time 0:04:21


In [5]:
args = Config(data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='sogclr', eta_init=0.03, distributed=False, evaluate=True, checkpoint="./output/sogclr_cc3m_g0.8_e30/checkpoint_5.pth", zs_datafolder='../../datasets/imagenet/val')
eval_runner(args)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of coco val: 5000
Creating model
load checkpoint from ./output/sogclr_cc3m_g0.8_e30/checkpoint_5.pth
Start Evaluation
Computing features for evaluation...
Evaluation time 0:02:41
coco val: {'txt_r1': 9.92, 'txt_r5': 25.36, 'txt_r10': 35.84, 'txt_r_mean': 23.706666666666667, 'img_r1': 6.377704026550441, 'img_r5': 18.50933663881003, 'img_r10': 27.654044543964172, 'img_r_mean': 17.513695069774883, 'r_mean': 20.610180868220773}
zeroshot: {'zeroshot_top1': 17.49, 'zeroshot_top3': 30.356, 'zeroshot_top5': 36.746, 'zeroshot_top10': 45.488}
Evaluation time 0:18:59


In [8]:
args_eval_mine = Config(data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='sogclr_mine', eta_init=0.03, distributed=False, evaluate=True, checkpoint="./output_mine/sogclr_mine_cc3m_g0.8_e30/checkpoint_5.pth", zs_datafolder='../../datasets/imagenet/val', output_dir="./output_mine/sogclr_mine_cc3m_g0.8_e30")
eval_runner(args_eval_mine)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of coco val: 5000
Creating model
load checkpoint from ./output_mine/sogclr_mine_cc3m_g0.8_e30/checkpoint_5.pth
Start Evaluation
Computing features for evaluation...
Evaluation time 0:00:30
coco val: {'txt_r1': 12.36, 'txt_r5': 30.92, 'txt_r10': 42.7, 'txt_r_mean': 28.66, 'img_r1': 8.968771242352753, 'img_r5': 24.79107521292335, 'img_r10': 35.61517853572714, 'img_r_mean': 23.125008330334413, 'r_mean': 25.892504165167207}
zeroshot: {'zeroshot_top1': 20.54, 'zeroshot_top3': 34.106, 'zeroshot_top5': 40.97, 'zeroshot_top10': 49.67}
Evaluation time 0:04:03


In [7]:
args_eval_isog = Config(data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='isogclr_new', eta_init=0.03, distributed=False, evaluate=True, checkpoint="./output_isog/isogclr_cc3m/checkpoint_5.pth", zs_datafolder='../../datasets/imagenet/val', output_dir="./output_isog/isogclr_cc3m")
eval_runner(args_eval_isog)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of coco val: 5000
Creating model
load checkpoint from ./output_isog/isogclr_cc3m/checkpoint_5.pth
Start Evaluation
Computing features for evaluation...
Evaluation time 0:00:37
coco val: {'txt_r1': 12.48, 'txt_r5': 29.88, 'txt_r10': 41.4, 'txt_r_mean': 27.919999999999998, 'img_r1': 8.540925266903914, 'img_r5': 23.847414930624975, 'img_r10': 34.031748570514615, 'img_r_mean': 22.140029589347836, 'r_mean': 25.030014794673917}
zeroshot: {'zeroshot_top1': 19.292, 'zeroshot_top3': 32.962, 'zeroshot_top5': 39.418, 'zeroshot_top10': 48.434}
Evaluation time 0:04:12


In [6]:
args_eval_isog = Config(data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='isogclr_new_v2', eta_init=0.03, distributed=False, evaluate=True, checkpoint="./output_isog_new_v2/isogclr_new_v2_cc3m/checkpoint_5.pth", zs_datafolder='../../datasets/imagenet/val', output_dir="./output_isog_new_v2/isogclr_new_v2_cc3m")
eval_runner(args_eval_isog)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of coco val: 5000
Creating model
load checkpoint from ./output_isog_new_v2/isogclr_new_v2_cc3m/checkpoint_5.pth
Start Evaluation
Computing features for evaluation...
Evaluation time 0:00:38
coco val: {'txt_r1': 11.0, 'txt_r5': 27.24, 'txt_r10': 37.88, 'txt_r_mean': 25.373333333333335, 'img_r1': 7.917149826062618, 'img_r5': 22.371946099404216, 'img_r10': 32.0644567955536, 'img_r_mean': 20.78451757367348, 'r_mean': 23.07892545350341}
zeroshot: {'zeroshot_top1': 17.474, 'zeroshot_top3': 30.902, 'zeroshot_top5': 37.456, 'zeroshot_top10': 47.0}
Evaluation time 0:04:24


In [5]:
args_eval_mine = Config(data_path='../../datasets', ann_path='../../clip_train', batch_size_train=64, init_model=True, use_amp=True, ita_type='sogclr_v2_mine', eta_init=0.03, distributed=False, evaluate=True, checkpoint="./output_mine/sogclr_mine_v2_cc3m/checkpoint_5.pth", zs_datafolder='../../datasets/imagenet/val', output_dir="./output_mine/sogclr_mine_v2_cc3m")
eval_runner(args_eval_mine)

cuda
NVIDIA GeForce RTX 3060 Laptop GPU
Creating retrieval dataset
len of coco val: 5000
Creating model
load checkpoint from ./output_mine/sogclr_mine_v2_cc3m/checkpoint_5.pth
Start Evaluation
Computing features for evaluation...
Evaluation time 0:00:38
coco val: {'txt_r1': 12.82, 'txt_r5': 31.46, 'txt_r10': 42.74, 'txt_r_mean': 29.00666666666667, 'img_r1': 8.752848974369227, 'img_r5': 24.803070894477987, 'img_r10': 35.331280738933984, 'img_r_mean': 22.96240020259373, 'r_mean': 25.9845334346302}
zeroshot: {'zeroshot_top1': 20.336, 'zeroshot_top3': 33.984, 'zeroshot_top5': 40.72, 'zeroshot_top10': 49.614}
Evaluation time 0:04:21
