In [1]:
import os
import sys
import wandb
import argparse
import numpy as np


sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "../")))
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "")))
import torch
import torchvision.transforms as T
import torchvision

from dataloaders.dataloader_cifar10 import get_cifar10
from dataloaders.dataloader_cifar100 import get_cifar100
from utils.eval_metrics import linear_evaluation, get_t_SNE_plot
from models.linear_classifer import LinearClassifier
from models.ssl import  SimSiam, Siamese, Encoder, Predictor

from trainers.train_simsiam import train_simsiam
from trainers.train_infomax import train_infomax
from trainers.train_barlow import train_barlow

from trainers.train_PFR import train_PFR_simsiam
from trainers.train_PFR_contrastive import train_PFR_contrastive_simsiam
from trainers.train_contrastive import train_contrastive_simsiam
from trainers.train_ering import train_ering_simsiam

from torchsummary import summary
import random
from utils.lr_schedulers import LinearWarmupCosineAnnealingLR, SimSiamScheduler
from utils.eval_metrics import Knn_Validation_cont
from copy import deepcopy
from loss import invariance_loss,CovarianceLoss,ErrorCovarianceLoss
import torch.nn as nn
import time
import torch.nn.functional as F
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC


os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class GaussianBlur(object):
    """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709"""

    def __init__(self, sigma=[0.1, 2.0]):
        self.sigma = sigma

    def __call__(self, x):
        sigma = random.uniform(self.sigma[0], self.sigma[1])
        x = torchvision.transforms.functional.gaussian_blur(x,kernel_size=[3,3],sigma=sigma)#kernel size and sigma are open problems but right now seems ok!
        return x


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [3]:
class Args():
    normalization = 'group'
    weight_standard = False
    same_lr = False
    pretrain_batch_size = 512
    pretrain_warmup_epochs = 10
    pretrain_warmup_lr = 3e-3
    pretrain_base_lr = 0.03
    pretrain_momentum = 0.9
    pretrain_weight_decay = 5e-4
    min_lr = 0.00
    lambdap = 1.0
    appr = 'barlow_PFR'
    knn_report_freq = 10
    cuda_device = 5
    num_workers = 8
    contrastive_ratio = 0.001
    dataset = 'cifar100'
    class_split = [25,25,25,25]
    epochs = [500,500,500,500]
    cov_loss_weight = 1.0
    sim_loss_weight = 250.0
    info_loss = 'invariance'
    lambda_norm = 1.0
    subspace_rate = 0.99
    lambda_param = 5e-3
    bsize = 32
    msize = 150
    proj_hidden = 2048
    proj_out = 2048 #infomax 64
    pred_hidden = 512
    pred_out = 2048



In [4]:
args = Args()

In [5]:
if args.dataset == "cifar10":
    get_dataloaders = get_cifar10
    num_classes=10
elif args.dataset == "cifar100":
    get_dataloaders = get_cifar100
    num_classes=100
assert sum(args.class_split) == num_classes
assert len(args.class_split) == len(args.epochs)

In [6]:
num_worker = args.num_workers
#device
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)

cuda:5


In [7]:
#wandb init
wandb.init(project="CSSL",  entity="yavuz-team",
            mode="disabled",
            config=args,
            name= str(args.dataset) + '-algo' + str(args.appr) + "-e" + str(args.epochs) + "-b" 
            + str(args.pretrain_batch_size) + "-lr" + str(args.pretrain_base_lr)+"-CS"+str(args.class_split))

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




In [8]:
if 'infomax' in args.appr or 'barlow' in args.appr:
    transform = T.Compose([
            T.RandomResizedCrop(size=32, scale=(0.2, 1.0)),
            T.RandomHorizontalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)]), p=0.8),
            T.RandomGrayscale(p=0.2),
            T.RandomApply([GaussianBlur()], p=0.5), 
            T.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])])

    transform_prime = T.Compose([
            T.RandomResizedCrop(size=32, scale=(0.2, 1.0)),
            T.RandomHorizontalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)]), p=0.8),
            T.RandomGrayscale(p=0.2),
            T.RandomApply([GaussianBlur()], p=0.5), 
            T.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])])

In [9]:
#Dataloaders
print("Creating Dataloaders..")
#Class Based
train_data_loaders, train_data_loaders_knn, test_data_loaders, _, train_data_loaders_linear, train_data_loaders_pure  = get_dataloaders(transform, transform_prime, \
                                    classes=args.class_split, valid_rate = 0.00, batch_size=args.pretrain_batch_size, seed = 0, num_worker= num_worker)
_, train_data_loaders_knn_all, test_data_loaders_all, _, train_data_loaders_linear_all, train_data_loaders_pure_all = get_dataloaders(transform, transform_prime, \
                                        classes=[num_classes], valid_rate = 0.00, batch_size=args.pretrain_batch_size, seed = 0, num_worker= num_worker)


Creating Dataloaders..


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [10]:
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)
if 'infomax' in args.appr or 'barlow' in args.appr:
    proj_hidden = args.proj_hidden
    proj_out = args.proj_out
    encoder = Encoder(hidden_dim=proj_hidden, output_dim=proj_out, normalization = args.normalization, weight_standard = args.weight_standard,appr_name =args.appr)
    model = Siamese(encoder)
    model.to(device) #automatically detects from model

cuda:5


In [11]:
#load model here
file_name = 'checkpoints/checkpoint_cifar100-algocassle_barlow-e[500, 500, 500, 500]-b256-lr0.06-CS[25, 25, 25, 25]acc_62.57.pth.tar'
dict = torch.load(file_name)

In [12]:
model.temporal_projector = nn.Sequential(
            nn.Linear(args.proj_out, args.proj_hidden, bias=False),
            nn.BatchNorm1d(args.proj_hidden),
            nn.ReLU(),
            nn.Linear(args.proj_hidden, args.proj_out),
        ).to(device)

In [13]:
model.load_state_dict(dict['state_dict'])

<All keys matched successfully>

In [14]:
def total_performance(model, loaders, test_loaders):
    X = []
    y = []
    Xtest = []
    ytest = []
    model.eval()
    with torch.no_grad():
        for task, loader in enumerate(loaders):
            for data_tuple in loader:
                data, target = [t.to(device) for t in data_tuple]
                output = model(data)
                X.append(output.cpu().numpy())
                y.append(target.cpu().numpy())
        for task, loader in enumerate(test_loaders):
            for data_tuple in loader:
                data, target = [t.to(device) for t in data_tuple]
                output = model(data)
                Xtest.append(output.cpu().numpy())
                ytest.append(target.cpu().numpy())

    X = np.concatenate(X)
    y = np.concatenate(y)
    Xtest = np.concatenate(Xtest)
    ytest = np.concatenate(ytest)
    clf = LinearSVC(random_state=0, tol=1e-5)
    clf.fit(X, y)
    return clf.score(X, y), clf.score(Xtest, ytest)

In [16]:
total_acc_train,  total_acc_test = total_performance(model, train_data_loaders_knn, test_data_loaders)#Not real performance just shows the linear seperability
print(f'Total Performance of the model train: {total_acc_train}  test: {total_acc_test}')



Total Performance of the model train: 0.73062  test: 0.5405


In [17]:
def train_clf(model, loader, test_loader):
    X = []
    y = []
    Xtest = []
    ytest = []
    model.eval()
    with torch.no_grad():
        for data_tuple in loader:
            data, target = [t.to(device) for t in data_tuple]
            # Forward prop of the model with single augmented batch
            output = model(data)
            X.append(output.cpu().numpy())
            y.append(target.cpu().numpy())
        for data_tuple in test_loader:
            data, target = [t.to(device) for t in data_tuple]
            # Forward prop of the model with single augmented batch
            output = model(data)
            Xtest.append(output.cpu().numpy())
            ytest.append(target.cpu().numpy())
    X = np.concatenate(X)
    y = np.concatenate(y)
    Xtest = np.concatenate(Xtest)
    ytest = np.concatenate(ytest)
    clf = LinearSVC(random_state=0, tol=1e-5)
    clf.fit(X, y)
    return clf.score(X, y), clf.score(Xtest, ytest)

In [18]:
for task, loader in enumerate(train_data_loaders_knn):
    acc, acc_test = train_clf(model, loader, test_data_loaders[task])
    print(f"Task {task}  acc train: {acc*100:2f}   acc test: {acc_test*100:2f}")
    print()

Task 0  acc train: 89.480000   acc test: 71.680000





Task 1  acc train: 86.912000   acc test: 67.440000





Task 2  acc train: 90.424000   acc test: 68.640000





Task 3  acc train: 95.248000   acc test: 74.280000





In [19]:
def train_task_seperation(model, loaders, test_loaders):
    X = []
    y = []
    Xtest = []
    ytest = []
    model.eval()
    with torch.no_grad():
        for task, loader in enumerate(loaders):
            for (data, target) in loader:
                output = model(data.to(device))
                X.append(output.cpu().numpy())
                for k in range(len(target)):
                    y.append(task)
        for task, loader in enumerate(test_loaders):
            for (data, target) in loader:
                output = model(data.to(device))
                Xtest.append(output.cpu().numpy())
                for k in range(len(target)):
                    ytest.append(task)

    X = np.concatenate(X)
    y = np.array(y)
    Xtest = np.concatenate(Xtest)
    ytest = np.array(ytest)
    clf = LinearSVC(random_state=0, tol=1e-5)
    clf.fit(X, y)
    return clf.score(X, y), clf.score(Xtest, ytest)
            


In [20]:
task_seperability, task_seperability_test = train_task_seperation(model, train_data_loaders_knn, test_data_loaders)
print(f'Task linear seperable performance train: {task_seperability}  test: {task_seperability_test}')

Task linear seperable performance train: 0.50926  test: 0.4943




In [21]:
#load model here
file_name = 'checkpoints/checkpoint_cifar100-algoLRD_barlow-e[500, 500, 500, 500]-b256-lr0.06-CS[25, 25, 25, 25]_task_0_lambdap_10.0_lambda_norm_0.1_same_lr_False_norm_batch_ws_False.pth.tar'
dict = torch.load(file_name)
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)
if 'infomax' in args.appr or 'barlow' in args.appr:
    proj_hidden = args.proj_hidden
    proj_out = args.proj_out
    encoder = Encoder(hidden_dim=proj_hidden, output_dim=proj_out, normalization = 'batch', weight_standard = args.weight_standard,appr_name =args.appr)
    old_model = Siamese(encoder)
    old_model.to(device) #automatically detects from model

old_model.load_state_dict(dict['state_dict'])

cuda:5


<All keys matched successfully>

In [22]:
total_acc_train,  total_acc_test = total_performance(old_model, train_data_loaders_knn, test_data_loaders)#Not real performance just shows the linear seperability
print(f'Total Performance of the model train: {total_acc_train}  test: {total_acc_test}')



Total Performance of the model train: 0.69098  test: 0.4925


In [23]:
for task, loader in enumerate(train_data_loaders_knn):
    acc, acc_test = train_clf(old_model, loader, test_data_loaders[task])
    print(f"Task {task}  acc train: {acc*100:2f}   acc test: {acc_test*100:2f}")
    print()

Task 0  acc train: 92.136000   acc test: 69.360000





Task 1  acc train: 84.672000   acc test: 60.920000





Task 2  acc train: 86.136000   acc test: 63.920000





Task 3  acc train: 90.000000   acc test: 68.080000





In [24]:
task_seperability, task_seperability_test = train_task_seperation(old_model, train_data_loaders_knn, test_data_loaders)
print(f'Task linear seperable performance train: {task_seperability}  test: {task_seperability_test}')

Task linear seperable performance train: 0.52462  test: 0.5052




In [25]:
#load model here
file_name = 'checkpoints/checkpoint_cifar100-algoLRD_barlow-e[500, 500, 500, 500]-b256-lr0.06-CS[25, 25, 25, 25]_task_1_lambdap_10.0_lambda_norm_0.1_same_lr_False_norm_batch_ws_False.pth.tar'
dict = torch.load(file_name)
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)
if 'infomax' in args.appr or 'barlow' in args.appr:
    proj_hidden = args.proj_hidden
    proj_out = args.proj_out
    encoder = Encoder(hidden_dim=proj_hidden, output_dim=proj_out, normalization = 'batch', weight_standard = args.weight_standard,appr_name =args.appr)
    old_model = Siamese(encoder)
    old_model.to(device) #automatically detects from model

old_model.load_state_dict(dict['state_dict'])

cuda:5


<All keys matched successfully>

In [26]:
total_acc_train,  total_acc_test = total_performance(old_model, train_data_loaders_knn, test_data_loaders)#Not real performance just shows the linear seperability
print(f'Total Performance of the model train: {total_acc_train}  test: {total_acc_test}')
for task, loader in enumerate(train_data_loaders_knn):
    acc, acc_test = train_clf(old_model, loader, test_data_loaders[task])
    print(f"Task {task}  acc train: {acc*100:2f}   acc test: {acc_test*100:2f}")
    print()

task_seperability, task_seperability_test = train_task_seperation(old_model, train_data_loaders_knn, test_data_loaders)
print(f'Task linear seperable performance train: {task_seperability}  test: {task_seperability_test}')




Total Performance of the model train: 0.71624  test: 0.5069
Task 0  acc train: 92.152000   acc test: 68.960000





Task 1  acc train: 89.544000   acc test: 66.800000





Task 2  acc train: 87.032000   acc test: 63.240000





Task 3  acc train: 91.088000   acc test: 67.480000





Task linear seperable performance train: 0.52936  test: 0.5116




In [27]:
#load model here
file_name = 'checkpoints/checkpoint_cifar100-algoLRD_barlow-e[500, 500, 500, 500]-b256-lr0.06-CS[25, 25, 25, 25]_task_2_lambdap_10.0_lambda_norm_0.1_same_lr_False_norm_batch_ws_False.pth.tar'
dict = torch.load(file_name)
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)
if 'infomax' in args.appr or 'barlow' in args.appr:
    proj_hidden = args.proj_hidden
    proj_out = args.proj_out
    encoder = Encoder(hidden_dim=proj_hidden, output_dim=proj_out, normalization = 'batch', weight_standard = args.weight_standard,appr_name =args.appr)
    old_model = Siamese(encoder)
    old_model.to(device) #automatically detects from model

old_model.load_state_dict(dict['state_dict'])

cuda:5


<All keys matched successfully>

In [28]:
total_acc_train,  total_acc_test = total_performance(old_model, train_data_loaders_knn, test_data_loaders)#Not real performance just shows the linear seperability
print(f'Total Performance of the model train: {total_acc_train}  test: {total_acc_test}')
for task, loader in enumerate(train_data_loaders_knn):
    acc, acc_test = train_clf(old_model, loader, test_data_loaders[task])
    print(f"Task {task}  acc train: {acc*100:2f}   acc test: {acc_test*100:2f}")
    print()

task_seperability, task_seperability_test = train_task_seperation(old_model, train_data_loaders_knn, test_data_loaders)
print(f'Task linear seperable performance train: {task_seperability}  test: {task_seperability_test}')




Total Performance of the model train: 0.72046  test: 0.5151
Task 0  acc train: 91.120000   acc test: 68.040000





Task 1  acc train: 88.352000   acc test: 66.000000





Task 2  acc train: 90.384000   acc test: 66.280000





Task 3  acc train: 91.080000   acc test: 70.040000





Task linear seperable performance train: 0.534  test: 0.5117




In [29]:
#load model here
file_name = 'checkpoints/checkpoint_cifar100-algoLRD_barlow-e[500, 500, 500, 500]-b256-lr0.06-CS[25, 25, 25, 25]_task_3_lambdap_10.0_lambda_norm_0.1_same_lr_False_norm_batch_ws_False.pth.tar'
dict = torch.load(file_name)
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)
if 'infomax' in args.appr or 'barlow' in args.appr:
    proj_hidden = args.proj_hidden
    proj_out = args.proj_out
    encoder = Encoder(hidden_dim=proj_hidden, output_dim=proj_out, normalization = 'batch', weight_standard = args.weight_standard,appr_name =args.appr)
    old_model = Siamese(encoder)
    old_model.to(device) #automatically detects from model

old_model.load_state_dict(dict['state_dict'])

cuda:5


<All keys matched successfully>

In [30]:
total_acc_train,  total_acc_test = total_performance(old_model, train_data_loaders_knn, test_data_loaders)#Not real performance just shows the linear seperability
print(f'Total Performance of the model train: {total_acc_train}  test: {total_acc_test}')
for task, loader in enumerate(train_data_loaders_knn):
    acc, acc_test = train_clf(old_model, loader, test_data_loaders[task])
    print(f"Task {task}  acc train: {acc*100:2f}   acc test: {acc_test*100:2f}")
    print()

task_seperability, task_seperability_test = train_task_seperation(old_model, train_data_loaders_knn, test_data_loaders)
print(f'Task linear seperable performance train: {task_seperability}  test: {task_seperability_test}')




Total Performance of the model train: 0.73272  test: 0.5262
Task 0  acc train: 90.968000   acc test: 69.120000





Task 1  acc train: 87.576000   acc test: 65.640000





Task 2  acc train: 89.352000   acc test: 66.400000





Task 3  acc train: 96.168000   acc test: 74.680000





Task linear seperable performance train: 0.53856  test: 0.5251


