In [1]:
#--config_env configs/env.yml 
#--config_exp configs/pretext/clPcl_stl10.yml

import argparse
import os
import torch
import numpy as np

from utils.config import create_config
from utils.common_config import get_criterion, get_backbone_model , get_instance_model,get_group_model, get_train_dataset,\
                                get_val_dataset, get_train_dataloader,\
                                get_val_dataloader, get_train_transformations,\
                                get_val_transformations, get_optimizer,\
                                adjust_learning_rate, get_clustering
from utils.evaluate_utils import contrastive_evaluate
from utils.memory import MemoryBank
from utils.train_utils import pcl_cld_train
from utils.utils import fill_memory_bank
from termcolor import colored



In [2]:
    #1# Retrieve config file
p = create_config("configs/env.yml", "configs/pretext/clPcl_stl10.yml")
print(colored(p, 'red'))

[31m{'setup': 'clPcl', 'clustering': [2, 4, 8, 16], 'backbone': 'resnet18', 'model_kwargs': {'head': 'linear', 'features_dim': 128}, 'train_db_name': 'stl-10', 'val_db_name': 'stl-10', 'num_classes': 10, 'criterion': 'clPcl', 'criterion_kwargs': {'temperature': 0.1}, 'epochs': 500, 'optimizer': 'sgd', 'optimizer_kwargs': {'nesterov': False, 'weight_decay': 0.0001, 'momentum': 0.9, 'lr': 0.4}, 'scheduler': 'cosine', 'scheduler_kwargs': {'lr_decay_rate': 0.1}, 'batch_size': 64, 'num_workers': 0, 'augmentation_strategy': 'simclr', 'augmentation_kwargs': {'random_resized_crop': {'size': 96, 'scale': [0.2, 1.0]}, 'color_jitter_random_apply': {'p': 0.8}, 'color_jitter': {'brightness': 0.4, 'contrast': 0.4, 'saturation': 0.4, 'hue': 0.1}, 'random_grayscale': {'p': 0.2}, 'normalize': {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}}, 'transformation_kwargs': {'crop_size': 96, 'normalize': {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}}, 'pretext_dir': 'RESULTS\\stl

In [3]:
    print(colored('Retrieve model', 'blue'))
    
    backbone = get_backbone_model(p)
    print('Model is {}'.format(backbone.__class__.__name__))
    #print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in backbone.parameters()) / 1e6))
    print(backbone)

[34mRetrieve model[0m
Model is dict
{'backbone': ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): 

In [4]:
    instance_model = get_instance_model(p, backbone)
    group_model = get_group_model(p, backbone)
    print('Model is {}'.format(instance_model.__class__.__name__))
    print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in instance_model.parameters()) / 1e6))
    print(instance_model)
    print('Model is {}'.format(group_model.__class__.__name__))
    print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in group_model.parameters()) / 1e6))
    print(group_model)

Model is MoCo
Model parameters: 22.47M
MoCo(
  (encoder_q): ContrastiveModel(
    (backbone): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (shortcut): Sequential()
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn

In [5]:
    #3# Dataset                                                       OK
    #A - get transformormations for the dataset
    print(colored('Retrieve dataset', 'blue'))
    train_transforms = get_train_transformations(p) 
    print('Train transforms:', train_transforms)
    val_transforms = get_val_transformations(p)
    
    #B - get Dataset from files
    print('Validation transforms:', val_transforms)
    split_ = 'train'
    if p['train_db_name'] == 'stl-10':
        split_ = 'train+unlabeled'
    train_dataset = get_train_dataset(p, train_transforms, to_augmented_dataset=True,
                                        split=split_) # Split is for stl-10
                                        
    val_dataset = get_val_dataset(p, val_transforms)
    
    #C - put the dataset to the dataloader for training purposes
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Dataset contains {}/{} train/val samples'.format(len(train_dataset), len(val_dataset)))

[34mRetrieve dataset[0m
Train transforms: Compose(
    RandomResizedCrop(size=(96, 96), scale=(0.2, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear)
    RandomHorizontalFlip(p=0.5)
    RandomApply(
    p=0.8
    ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.6, 1.4], hue=[-0.1, 0.1])
)
    RandomGrayscale(p=0.2)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
Validation transforms: Compose(
    CenterCrop(size=(96, 96))
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
Files already downloaded and verified
Files already downloaded and verified
Dataset contains 10000/8000 train/val samples


In [6]:
    #4# Memory Bank
print(colored('Build MemoryBank', 'blue'))
base_dataset = get_train_dataset(p, val_transforms, split='train') # Dataset w/o augs for knn eval
base_dataloader = get_val_dataloader(p, base_dataset) 
    
memory_bank_base = MemoryBank(len(base_dataset), 
                                p['model_kwargs']['features_dim'],
                                p['num_classes'], p['criterion_kwargs']['temperature'])

    
memory_bank_val = MemoryBank(len(val_dataset),
                                p['model_kwargs']['features_dim'],
                                p['num_classes'], p['criterion_kwargs']['temperature'])

[34mBuild MemoryBank[0m
Files already downloaded and verified


In [7]:
#5# Training Parameter                                              OK
# Criterion
print(colored('Retrieve criterion', 'blue'))
criterion = get_criterion(p)
print('Criterion is {}'.format(criterion.__class__.__name__))

[34mRetrieve criterion[0m
Criterion is PclCldLoss


In [9]:
    # Optimizer and scheduler                                       
print(colored('Retrieve optimizer', 'blue'))
optimizer = get_optimizer(p, group_model)
print(optimizer)
    ###

[34mRetrieve optimizer[0m
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.4
    momentum: 0.9
    nesterov: False
    weight_decay: 0.0001
)


In [10]:
M_num_clusters = get_clustering(p)
M_num_clusters

[2, 4, 8, 16]

In [11]:
type(M_num_clusters)

list

In [12]:
if os.path.exists( str(p.get('pretext_checkpoint_instance')) ):
    print(colored('Restart from checkpoint (instance_model) {}'.format(p['pretext_checkpoint_instance']), 'blue'))
    checkpoint = torch.load(p['pretext_checkpoint_instance'], map_location='cpu')
    optimizer.load_state_dict(checkpoint['optimizer'])
    instance_model.load_state_dict(checkpoint['model'])
    instance_model.cuda()
    start_epoch = checkpoint['epoch']

else:
    print(colored('No checkpoint file at {}'.format( str(p.get('pretext_checkpoint_instance')) ), 'blue'))
    start_epoch = 0

[34mNo checkpoint file at None[0m


In [13]:
if os.path.exists( str(p.get('pretext_checkpoint_group')) ):
    print(colored('Restart from checkpoint (group_model) {}'.format(p['pretext_checkpoint_group']), 'blue'))
    checkpoint = torch.load(p['pretext_checkpoint_group'], map_location='cpu')
    optimizer.load_state_dict(checkpoint['optimizer'])
    group_model.load_state_dict(checkpoint['model'])
    group_model.cuda()
    start_epoch = checkpoint['epoch']

In [18]:
        #7# Training
print(colored('Starting main loop', 'blue'))
#for epoch in range(start_epoch, 1):
    #print(colored('Epoch %d/%d' %(epoch, p['epochs']), 'yellow'))
    #print(colored('-'*15, 'yellow'))

        #a - Adjust lr
lr = adjust_learning_rate(p, optimizer, 1)
print('Adjusted learning rate to {:.5f}'.format(lr))
        
        #b - Train the model with the clPcl method for one epoch (iteration)
print('Train ...')
#def pcl_cld_train(args, train_loader, instance_branch, group_branch, criterion, optimizer, epoch, M_num_clusters):
import torch
import numpy as np
from spherecluster import VonMisesFisherMixture
from utils.utils import AverageMeter, ProgressMeter

losses = AverageMeter('Loss', ':.4e')
progress = ProgressMeter(len(train_dataloader),[losses],prefix="Epoch: [{}]".format(1))
        
instance_model.train()
group_model.train()
print("initialized pcl_cld_train")

i = 1
batch = next(iter(train_dataloader))
type(batch)



    

#pcl_cld_train(train_loader = train_dataloader, instance_branch = instance_model, group_branch = group_model, criterion = criterion, optimizer = optimizer, epoch = epoch, M_num_clusters = M_num_clusters)

[34mStarting main loop[0m
Adjusted learning rate to 0.40000
Train ...
initialized pcl_cld_train


dict

In [19]:
batch

{'image': tensor([[[[ 1.4440,  1.0844,  0.3823,  ..., -0.9020, -0.5767, -0.4054],
           [ 1.5982,  1.3242,  0.7933,  ..., -0.9363, -0.5596, -0.3541],
           [ 1.8893,  1.7865,  1.5810,  ..., -1.0048, -0.5253, -0.2856],
           ...,
           [-0.2684, -0.2684, -0.2513,  ..., -1.1589, -1.1589, -1.1760],
           [-0.2513, -0.2513, -0.2684,  ..., -0.9363, -0.8164, -0.7650],
           [-0.2342, -0.2513, -0.2856,  ..., -0.8164, -0.6281, -0.5424]],
 
          [[ 1.5182,  1.1155,  0.3627,  ..., -0.7927, -0.3725, -0.1625],
           [ 1.7283,  1.4307,  0.8529,  ..., -0.7752, -0.3550, -0.1275],
           [ 2.1310,  2.0084,  1.7458,  ..., -0.7577, -0.3025, -0.0574],
           ...,
           [-0.2500, -0.2675, -0.2850,  ..., -1.0553, -1.0553, -1.0378],
           [-0.2675, -0.2850, -0.3025,  ..., -1.0028, -0.8627, -0.7752],
           [-0.2675, -0.2850, -0.3025,  ..., -0.9678, -0.7577, -0.6352]],
 
          [[ 1.8557,  1.5768,  1.0539,  ..., -0.4450,  0.2522,  0.6182],
    

In [21]:
originImage_batch = batch['image']
augmentedImage_batch = batch['image_augmented']
print("batch_image_shape: "+str(originImage_batch.shape))

batch_image_shape: torch.Size([64, 3, 96, 96])


In [24]:
logits, labels = instance_model(originImage_batch,augmentedImage_batch)

AssertionError: Torch not compiled with CUDA enabled

In [None]:
"""

    #originImage_batch.cuda(args.gpu, non_blocking=True)
    #augmentedImage_batch.cuda(args.gpu, non_blocking=True)
    print("loaded batch images, forwarding to MoCo model...")

    
    instance_loss = F.cross_entropy(logits,labels)
        print("forwarding to group_branch... ")
#M_num_clusters = [2,4,8,16]
        original_view = group_branch(originImage_batch)
        augmented_view = group_branch(augmentedImage_batch)
        M_kmeans_results = []
        MI_kmeans_results = []
        concentration_matrices = []
        concentration_matrices_I = []
        M_labels = []
        M_labels_I = []
        print("start batch-local clustering...")
        for k in M_num_clusters:
        
            vmf_hard = VonMisesFisherMixture(n_clusters=k, posterior_type='hard', n_init=10)
            vmf_hard.fit(original_view)
            vmf_hard_I = VonMisesFisherMixture(n_clusters=k, posterior_type='hard', n_init=10)
            vmf_hard_I.fit(augmented_view)

            M_kmeans_results.append(torch.Tensor(vmf_hard.cluster_centers_))
            MI_kmeans_results.append(torch.Tensor(vmf_hard_I.cluster_centers_))
            concentration_matrices.append(vmf_hard.concentrations_) # not a tensor
            concentration_matrices_I.append(vmf_hard_I.concentrations_)
            M_labels.append( vmf_hard.labels_ )
            M_labels_I.append( vmf_hard_I.labels_ )
#-------------------------------------------------------------------------------------------------------
#group_loss = pcl_cld_loss(original_view,augmented_view,M_kmeans_results,MI_kmeans_results,concentration_matrices,concentration_matrices_I)
        print("local clustering done, applying group_loss...")
        group_loss = criterion( features = original_view, features_I = augmented_view, M_kmeans = M_kmeans_results , M_kmeans_I = MI_kmeans_results, concentrations = concentration_matrices, concentrations_I = concentration_matrices_I,labels = M_labels, labels_I = M_labels_I, lb = 1)
        
        loss = instance_loss + group_loss
        print("loss computed, optimization step... ")
        losses.update(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print("optimization done !")
        if i % 25 == 0:
            progress.display(i)
            
"""