In [1]:
import numpy as np
import torch
from torch import nn
import torchvision
from torchvision.transforms import transforms
import torch.backends.cudnn as cudnn
from torchvision import models
from torch.optim.lr_scheduler import CosineAnnealingLR

import atm
import atm.simclr as simclr

## DataSet

#### Q: How do I let a dataset yield a pair of images?

#### Notes
1. A calss method doesn't get the implicit first argument 'self'. Thus, it can't modify a class instance's state.


In [2]:
import argparse 

args = argparse.Namespace()

args.data='./datasets' 
args.dataset_name='cifar10'
args.arch='resnet50'
args.workers=1
args.epochs=30 
args.batch_size=256 
args.lr=0.06 
args.wd=0.0005
args.disable_cuda=False
args.fp16_precision=True
args.out_dim=128
args.log_every_n_steps=100
args.temperature=0.07
args.n_views = 2
args.gpu_index=0
args.device='cuda' if torch.cuda.is_available() else 'cpu'


#args.bn_splits=8 
#args.cos=True 
#args.knn_k=200 
#args.knn_t=0.1 

#args.resume='' 
#args.schedule=[] 
#args.symmetric=False 

print("Using device:", args.device)

Using device: cuda


In [3]:
assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2."
# check if gpu training is available
if not args.disable_cuda and torch.cuda.is_available():
    args.device = torch.device('cuda')
    cudnn.deterministic = True
    cudnn.benchmark = True
else:
    args.device = torch.device('cpu')
    args.gpu_index = -1

In [4]:
dataset = simclr.dataloader.ContrastiveLearningDataset(args.data) 
train_dataset = dataset.get_dataset(args.dataset_name, args.n_views)

Files already downloaded and verified


A custom dataset should mimic torchvision.datasets.CIFAR10's behavior (+transfromation)

In [5]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=True,
    num_workers=args.workers, pin_memory=True, drop_last=True)

In [6]:
model = simclr.models.ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim)

## training setting

In [7]:
optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.wd)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0,
                                                           last_epoch=-1)

In [8]:
with torch.cuda.device(args.gpu_index):
    simc = simclr.models.SimCLR(model=model, optimizer=optimizer, scheduler=scheduler, args=args)
    simc.train(train_loader) # model is saved at the end of train.

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
100%|██████████| 195/195 [01:08<00:00,  2.84it/s]
100%|██████████| 195/195 [01:06<00:00,  2.94it/s]
100%|██████████| 195/195 [01:05<00:00,  2.96it/s]
100%|██████████| 195/195 [01:07<00:00,  2.89it/s]
100%|██████████| 195/195 [01:06<00:00,  2.93it/s]
100%|██████████| 195/195 [01:06<00:00,  2.94it/s]
100%|██████████| 195/195 [01:05<00:00,  2.97it/s]
100%|██████████| 195/195 [01:05<00:00,  2.97it/s]
100%|██████████| 195/195 [01:05<00:00,  2.96it/s]
100%|██████████| 195/195 [01:05<00:00,  2.97it/s]
100%|██████████| 195/195 [01:05<00:00,  2.98it/s]
100%|██████████| 195/195 [01:05<00:00,  2.96it/s]
100%|██████████| 195/195 [01:05<00:00,  2.99it/s]
100%|██████████| 195/195 [01:04<00:00,  3.02it/s]
100%|██████████| 195/195 [01:03<00:00,  3.08it/s]
100%|██████████| 195/195 [01:05<00:00,  3.00it/s]
100%|██████████| 195/195 [01:04<00:00,  3.02it/s]
100%|██████████| 195/195 [01:05<00:00,  2.96it/s]
100%|██████████|

# representation quality check

## linear evaluation protocol, a standard way
Train a linear classifier on the fixed representations

In [7]:
simclr = simclr.models.SimCLR(model=model, optimizer=optimizer, scheduler=scheduler, args=args)

In [53]:
state_dict.keys()

odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 'layer1.1.bn2.running_var', 'layer1.1.bn2.num_batches_tracked', 'layer2.0.conv1.weight', 'layer2.0.bn1.weight', 'layer2.0.bn1.bias', 'layer2.0.bn1.running_mean', 'layer2.0.bn1.running_var', 'layer2.0.bn1.num_batches_tracked', 'layer2.0.conv2.weight', 'layer2.0.bn2.weight', 'layer2.0.bn2.bias', '

In [59]:
# ResNet50 
#if config.arch == 'resnet50':
#model = torchvision.models.resnet50(pretrained=False, num_classes=10).to(device)
model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim)
checkpoint = torch.load('./runs/Aug08_23-08-40_hoseung/checkpoint_0030.pth.tar', map_location=args.device)
state_dict = checkpoint['state_dict']

In [55]:
for k in list(state_dict.keys()):
    if k.startswith('backbone.'):
        if k.startswith('backbone') and not k.startswith('backbone.fc'):
            # remove prefix
            state_dict[k[len("backbone."):]] = state_dict[k]
    del state_dict[k]

In [60]:
log = model.load_state_dict(state_dict, strict=False)
#assert log.missing_keys == ['fc.weight', 'fc.bias']

RuntimeError: Error(s) in loading state_dict for ResNetSimCLR:
	size mismatch for backbone.layer1.0.conv1.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 1, 1]).
	size mismatch for backbone.layer1.1.conv1.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]).
	size mismatch for backbone.layer2.0.conv1.weight: copying a param with shape torch.Size([128, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 256, 1, 1]).
	size mismatch for backbone.layer2.0.downsample.0.weight: copying a param with shape torch.Size([128, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 256, 1, 1]).
	size mismatch for backbone.layer2.0.downsample.1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for backbone.layer2.0.downsample.1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for backbone.layer2.0.downsample.1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for backbone.layer2.0.downsample.1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for backbone.layer2.1.conv1.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]).
	size mismatch for backbone.layer3.0.conv1.weight: copying a param with shape torch.Size([256, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 512, 1, 1]).
	size mismatch for backbone.layer3.0.downsample.0.weight: copying a param with shape torch.Size([256, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 512, 1, 1]).
	size mismatch for backbone.layer3.0.downsample.1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for backbone.layer3.0.downsample.1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for backbone.layer3.0.downsample.1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for backbone.layer3.0.downsample.1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for backbone.layer3.1.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 1024, 1, 1]).
	size mismatch for backbone.layer4.0.conv1.weight: copying a param with shape torch.Size([512, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([512, 1024, 1, 1]).
	size mismatch for backbone.layer4.0.downsample.0.weight: copying a param with shape torch.Size([512, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([2048, 1024, 1, 1]).
	size mismatch for backbone.layer4.0.downsample.1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for backbone.layer4.0.downsample.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for backbone.layer4.0.downsample.1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for backbone.layer4.0.downsample.1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for backbone.layer4.1.conv1.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([512, 2048, 1, 1]).
	size mismatch for backbone.fc.0.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([2048, 2048]).
	size mismatch for backbone.fc.0.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for backbone.fc.2.weight: copying a param with shape torch.Size([128, 512]) from checkpoint, the shape in current model is torch.Size([128, 2048]).