In [1]:
import os
import sys
import wandb
import argparse
import numpy as np


sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "../")))
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "")))
import torch
import torchvision.transforms as T
import torchvision

from dataloaders.dataloader_cifar10 import get_cifar10
from dataloaders.dataloader_cifar100 import get_cifar100
from utils.eval_metrics import linear_evaluation, get_t_SNE_plot
from models.linear_classifer import LinearClassifier
from models.ssl import  SimSiam, Siamese, Encoder, Predictor

from trainers.train_simsiam import train_simsiam
from trainers.train_infomax import train_infomax
from trainers.train_barlow import train_barlow

from trainers.train_PFR import train_PFR_simsiam
from trainers.train_PFR_contrastive import train_PFR_contrastive_simsiam
from trainers.train_contrastive import train_contrastive_simsiam
from trainers.train_ering import train_ering_simsiam

from torchsummary import summary
import random
from utils.lr_schedulers import LinearWarmupCosineAnnealingLR, SimSiamScheduler
from utils.eval_metrics import Knn_Validation_cont
from copy import deepcopy
from loss import invariance_loss,CovarianceLoss,ErrorCovarianceLoss
import torch.nn as nn
import time
import torch.nn.functional as F
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC


os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class GaussianBlur(object):
    """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709"""

    def __init__(self, sigma=[0.1, 2.0]):
        self.sigma = sigma

    def __call__(self, x):
        sigma = random.uniform(self.sigma[0], self.sigma[1])
        x = torchvision.transforms.functional.gaussian_blur(x,kernel_size=[3,3],sigma=sigma)#kernel size and sigma are open problems but right now seems ok!
        return x


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [4]:
class Args():
    normalization = 'batch'
    weight_standard = False
    same_lr = False
    pretrain_batch_size = 512
    pretrain_warmup_epochs = 10
    pretrain_warmup_lr = 3e-3
    pretrain_base_lr = 0.03
    pretrain_momentum = 0.9
    pretrain_weight_decay = 5e-4
    min_lr = 0.00
    lambdap = 1.0
    appr = 'cassle_barlow'
    knn_report_freq = 10
    cuda_device = 6
    num_workers = 8
    contrastive_ratio = 0.001
    dataset = 'cifar100'
    class_split = [20,20,20,20,20]
    epochs = [500,500,500,500,500]
    cov_loss_weight = 1.0
    sim_loss_weight = 250.0
    info_loss = 'invariance'
    lambda_norm = 1.0
    subspace_rate = 0.99
    lambda_param = 5e-3
    bsize = 32
    msize = 150
    proj_hidden = 2048
    proj_out = 2048 #infomax 64
    pred_hidden = 512
    pred_out = 2048



In [5]:
args = Args()

In [6]:
if args.dataset == "cifar10":
    get_dataloaders = get_cifar10
    num_classes=10
elif args.dataset == "cifar100":
    get_dataloaders = get_cifar100
    num_classes=100
assert sum(args.class_split) == num_classes
assert len(args.class_split) == len(args.epochs)

In [7]:
num_worker = args.num_workers
#device
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)

cuda:6


In [8]:
#wandb init
wandb.init(project="CSSL",  entity="yavuz-team",
            mode="disabled",
            config=args,
            name= str(args.dataset) + '-algo' + str(args.appr) + "-e" + str(args.epochs) + "-b" 
            + str(args.pretrain_batch_size) + "-lr" + str(args.pretrain_base_lr)+"-CS"+str(args.class_split))

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




In [9]:
if 'infomax' in args.appr or 'barlow' in args.appr:
    transform = T.Compose([
            T.RandomResizedCrop(size=32, scale=(0.2, 1.0)),
            T.RandomHorizontalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)]), p=0.8),
            T.RandomGrayscale(p=0.2),
            T.RandomApply([GaussianBlur()], p=0.5), 
            T.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])])

    transform_prime = T.Compose([
            T.RandomResizedCrop(size=32, scale=(0.2, 1.0)),
            T.RandomHorizontalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)]), p=0.8),
            T.RandomGrayscale(p=0.2),
            T.RandomApply([GaussianBlur()], p=0.5), 
            T.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])])

In [10]:
#Dataloaders
print("Creating Dataloaders..")
#Class Based
train_data_loaders, train_data_loaders_knn, test_data_loaders, _, train_data_loaders_linear, train_data_loaders_pure  = get_dataloaders(transform, transform_prime, \
                                    classes=args.class_split, valid_rate = 0.00, batch_size=args.pretrain_batch_size, seed = 0, num_worker= num_worker)
_, train_data_loaders_knn_all, test_data_loaders_all, _, train_data_loaders_linear_all, train_data_loaders_pure_all = get_dataloaders(transform, transform_prime, \
                                        classes=[num_classes], valid_rate = 0.00, batch_size=args.pretrain_batch_size, seed = 0, num_worker= num_worker)


Creating Dataloaders..
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [29]:

from tqdm import tqdm
def correct_top_k(outputs, targets, top_k=(1,5)):
    with torch.no_grad():
        prediction = torch.argsort(outputs, dim=-1, descending=True)
        result= []
        for k in top_k:
            correct_k = torch.sum((prediction[:, 0:k] == targets.unsqueeze(dim=-1)).any(dim=-1).float()).item() 
            result.append(correct_k)
        return result

def linear_test(net, data_loader, classifier, epoch, device, task_sep=False, intra_task=False):
    # evaluate model:
    net.eval() # for not update batchnorm
    linear_loss = 0.0
    num = 0
    total_loss, total_correct_1, total_correct_5, total_num, test_bar = 0.0, 0.0, 0.0, 0, tqdm(data_loader)
    with torch.no_grad():
        for data_tuple in test_bar:
            data, target = [t.to(device) for t in data_tuple]
            if task_sep:
                target = target // 20
            if intra_task:
                target = target % 20
            output = net(data)
            if classifier is not None:  #else net is already a classifier
                output = classifier(output) 
            linear_loss = F.cross_entropy(output, target)
            
            # Batchsize for loss and accuracy
            num = data.size(0)
            total_num += num 
            total_loss += linear_loss.item() * num 
            # Accumulating number of correct predictions 
            correct_top_1, correct_top_5 = correct_top_k(output, target, top_k=(1,5))    
            total_correct_1 += correct_top_1
            total_correct_5 += correct_top_5

            test_bar.set_description('Lin.Test Epoch: [{}] Loss: {:.4f} ACC@1: {:.2f}% ACC@5: {:.2f}% '
                                     .format(epoch,  total_loss / total_num,
                                             total_correct_1 / total_num * 100, total_correct_5 / total_num * 100
                                             ))
        acc_1 = total_correct_1/total_num*100
        acc_5 = total_correct_5/total_num*100
        wandb.log({" Linear Layer Test Loss ": linear_loss / total_num, " Epoch ": epoch})
        wandb.log({" Linear Layer Test - Acc": acc_1, " Epoch ": epoch})
    return total_loss / total_num, acc_1 , acc_5 

def linear_train(net, data_loader, train_optimizer, classifier, scheduler, epoch, device, task_sep=False, intra_task=False):

    net.eval() # for not update batchnorm 
    total_num, train_bar = 0, tqdm(data_loader)
    linear_loss = 0.0
    total_correct_1, total_correct_5 = 0.0, 0.0
    for data_tuple in train_bar:
        # Forward prop of the model with single augmented batch
        pos_1, target = data_tuple
        pos_1 = pos_1.to(device)
        feature_1 = net(pos_1)
        # feature_1 = net.get_representation(pos_1) 

        # Batchsize
        batchsize_bc = feature_1.shape[0]
        features = feature_1
        targets = target.to(device)
        if task_sep:
            targets = targets // 20
        if intra_task:
            targets = targets % 20
        logits = classifier(features.detach()) 
        # Cross Entropy Loss 
        linear_loss_1 = F.cross_entropy(logits, targets)

        # Number of correct predictions
        linear_correct_1, linear_correct_5 = correct_top_k(logits, targets, top_k=(1, 5))
    

        # Backpropagation part
        train_optimizer.zero_grad()
        linear_loss_1.backward()
        train_optimizer.step()

        # Accumulating number of examples, losses and correct predictions
        total_num += batchsize_bc
        linear_loss += linear_loss_1.item() * batchsize_bc
        total_correct_1 += linear_correct_1 
        total_correct_5 += linear_correct_5


        acc_1 = total_correct_1/total_num*100
        # # This bar is used for live tracking on command line (batch_size -> batchsize_bc: to show current batchsize )
        train_bar.set_description('Lin.Train Epoch: [{}] Loss: {:.4f} ACC: {:.2f}'.format(\
                epoch, linear_loss / total_num, acc_1))
    scheduler.step()
    acc_1 = total_correct_1/total_num*100
    acc_5 = total_correct_5/total_num*100       
    wandb.log({" Linear Layer Train Loss ": linear_loss / total_num, " Epoch ": epoch})
    wandb.log({" Linear Layer Train - Acc": acc_1, " Epoch ": epoch})
    # print(f'Linear Layer Train - Acc: {acc_1}')
        
    return linear_loss/total_num, acc_1, acc_5

#train classifier on all classes, test task by task
def linear_evaluation(net, data_loader,test_data_loaders,train_optimizer,classifier, scheduler, epochs, device, task_sep=False, intra_task=False):
    for epoch in range(1, epochs+1):
        linear_loss, linear_acc1, linear_acc5 = linear_train(net,data_loader,train_optimizer,classifier,scheduler, epoch, device, task_sep, intra_task)
    with torch.no_grad():
        # Testing for linear evaluation
        for test_loader in test_data_loaders:
            test_loss, test_acc1, test_acc5 = linear_test(net, test_loader, classifier, epoch, device, task_sep, intra_task)

    return test_loss, test_acc1, test_acc5, classifier

def inter_intra_task_error(net, classifier, data_loader):
    net.eval() # for not update batchnorm
    test_bar = tqdm(data_loader)
    inter_task = 0.0
    intra_task = 0.0
    total_num = 0.0
    correct = 0.0
    with torch.no_grad():
        for data_tuple in test_bar:
            data, target = [t.to(device) for t in data_tuple]
            output = net(data)
            output = classifier(output) 
            prediction = torch.argsort(output, dim=-1, descending=True)[:, 0]
            # print(data.size(0))
            # print(torch.sum(torch.where(target == prediction, 1, 0)))
            # print(torch.sum(torch.where(target != prediction, 1, 0)))

            error_locs = torch.where(target != prediction, True, False) #locations of misspredictions
            # print(prediction[error_locs]//20, target[error_locs]//20)
            
            inter_temp = torch.sum(torch.where( prediction[error_locs]//20 != target[error_locs]//20 , 1, 0)) #number of inter-task errors
            intra_temp = torch.sum(error_locs) - inter_temp #number of intra-task errors (remaining errors)
            # print(inter_temp, intra_temp)
            inter_task += inter_temp
            intra_task += intra_temp

            # Batchsize for loss and accuracy
            num = data.size(0)
            correct += num - (inter_temp+intra_temp)
            total_num += num 
            test_bar.set_description('Lin.Test Inter Task Error: {:.2f}% Intra Task Error: {:.2f}% Acc: {:.2f}'
                                     .format(inter_task/total_num*100, intra_task/total_num*100, correct/total_num*100))
        inter = inter_task/total_num*100
        intra = intra_task/total_num*100
        acc = 100 - inter - intra
    return inter, intra , acc

### Cassle Model

In [34]:
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)
if 'infomax' in args.appr or 'barlow' in args.appr:
    proj_hidden = args.proj_hidden
    proj_out = args.proj_out
    encoder = Encoder(hidden_dim=proj_hidden, output_dim=proj_out, normalization = args.normalization, weight_standard = args.weight_standard, appr_name = args.appr)
    model = Siamese(encoder)
    model.to(device) #automatically detects from model
#load model here
file_name = "./checkpoints/checkpoint_cifar100-algocassle_barlow-e[500, 500, 500, 500, 500]-b256-lr0.25-CS[20, 20, 20, 20, 20]acc_59.38.pth.tar"
dict = torch.load(file_name)
model.temporal_projector = nn.Sequential(
            nn.Linear(args.proj_out, args.proj_hidden, bias=False),
            nn.BatchNorm1d(args.proj_hidden),
            nn.ReLU(),
            nn.Linear(args.proj_hidden, args.proj_out),
        ).to(device)
model.load_state_dict(dict['state_dict'])

cuda:6


<All keys matched successfully>

In [35]:
#train with all classes an test task by task
lin_epoch=100
classifier = LinearClassifier(num_classes = 100).to(device)
lin_optimizer = torch.optim.SGD(classifier.parameters(), 0.2, momentum=0.9, weight_decay=0) # Infomax: no weight decay, epoch 100, cosine scheduler
lin_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(lin_optimizer, lin_epoch, eta_min=0.002) #scheduler + values ref: infomax paper
test_loss, test_acc1, test_acc5, classifier = linear_evaluation(model, train_data_loaders_linear_all[0],
                                                            test_data_loaders,lin_optimizer, classifier, 
                                                            lin_scheduler, epochs=lin_epoch, device=device) 
#test on all classes but report inter and intra task errors
inter_error, intra_error, acc = inter_intra_task_error(model, classifier, test_data_loaders_all[0])
print(f'Total Accuracy: {test_acc1}')

Lin.Train Epoch: [1] Loss: 5.0308 ACC: 33.42: 100%|██████████| 196/196 [00:12<00:00, 16.13it/s]
Lin.Train Epoch: [2] Loss: 5.1476 ACC: 39.23: 100%|██████████| 196/196 [00:12<00:00, 16.08it/s]
Lin.Train Epoch: [3] Loss: 4.9306 ACC: 40.88: 100%|██████████| 196/196 [00:12<00:00, 16.17it/s]
Lin.Train Epoch: [4] Loss: 4.9995 ACC: 41.35: 100%|██████████| 196/196 [00:10<00:00, 18.32it/s]
Lin.Train Epoch: [5] Loss: 5.0299 ACC: 42.05: 100%|██████████| 196/196 [00:11<00:00, 16.40it/s]
Lin.Train Epoch: [6] Loss: 4.9658 ACC: 42.57: 100%|██████████| 196/196 [00:12<00:00, 16.00it/s]
Lin.Train Epoch: [7] Loss: 5.0888 ACC: 42.87: 100%|██████████| 196/196 [00:12<00:00, 16.10it/s]
Lin.Train Epoch: [8] Loss: 4.9396 ACC: 43.14: 100%|██████████| 196/196 [00:11<00:00, 16.73it/s]
Lin.Train Epoch: [9] Loss: 4.8898 ACC: 43.57: 100%|██████████| 196/196 [00:11<00:00, 17.28it/s]
Lin.Train Epoch: [10] Loss: 4.9250 ACC: 43.46: 100%|██████████| 196/196 [00:11<00:00, 16.45it/s]
Lin.Train Epoch: [11] Loss: 4.9856 ACC:

Total Accuracy: 60.75000000000001





In [37]:
#knn accuracy for all task seperately
for task, loader in enumerate(train_data_loaders_knn):
    knn_acc, task_acc_arr = Knn_Validation_cont(model, [loader], [test_data_loaders[task]], device=device, K=200, sigma=0.5) 
    print(f"Task {task}, knn acc: {knn_acc}")

Task 0, knn acc: 0.683
Task 1, knn acc: 0.7335
Task 2, knn acc: 0.7265
Task 3, knn acc: 0.6975
Task 4, knn acc: 0.7525


In [39]:
#linear accuracy for all tasks seperately
for task, loader in enumerate(train_data_loaders_linear):
    lin_epoch=50
    classifier = LinearClassifier(num_classes = 20).to(device)
    lin_optimizer = torch.optim.SGD(classifier.parameters(), 0.2, momentum=0.9, weight_decay=0) # Infomax: no weight decay, epoch 100, cosine scheduler
    lin_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(lin_optimizer, lin_epoch, eta_min=0.002) #scheduler + values ref: infomax paper
    test_loss, test_acc1, test_acc5, classifier = linear_evaluation(model, loader,[test_data_loaders[task]],lin_optimizer, classifier, 
                                                                lin_scheduler, epochs=lin_epoch, device=device, intra_task=True) 

Lin.Train Epoch: [1] Loss: 11.5543 ACC: 41.69: 100%|██████████| 40/40 [00:02<00:00, 13.61it/s]
Lin.Train Epoch: [2] Loss: 5.6058 ACC: 57.22: 100%|██████████| 40/40 [00:03<00:00, 12.90it/s]
Lin.Train Epoch: [3] Loss: 4.8122 ACC: 58.84: 100%|██████████| 40/40 [00:03<00:00, 12.43it/s]
Lin.Train Epoch: [4] Loss: 4.7837 ACC: 59.30: 100%|██████████| 40/40 [00:03<00:00, 12.17it/s]
Lin.Train Epoch: [5] Loss: 4.6921 ACC: 59.82: 100%|██████████| 40/40 [00:03<00:00, 12.42it/s]
Lin.Train Epoch: [6] Loss: 4.0252 ACC: 61.62: 100%|██████████| 40/40 [00:03<00:00, 12.34it/s]
Lin.Train Epoch: [7] Loss: 4.3301 ACC: 60.35: 100%|██████████| 40/40 [00:03<00:00, 12.25it/s]
Lin.Train Epoch: [8] Loss: 3.5595 ACC: 63.57: 100%|██████████| 40/40 [00:03<00:00, 12.52it/s]
Lin.Train Epoch: [9] Loss: 3.9889 ACC: 61.62: 100%|██████████| 40/40 [00:03<00:00, 12.06it/s]
Lin.Train Epoch: [10] Loss: 4.4718 ACC: 61.31: 100%|██████████| 40/40 [00:03<00:00, 12.12it/s]
Lin.Train Epoch: [11] Loss: 4.2628 ACC: 62.03: 100%|██████

### Offline Model

In [40]:
#load model here
file_name = 'checkpoints/checkpoint_cifar100-algobasic_barlow-e[1000]-b256-lr0.3-CS[100]acc_69.38.pth.tar'
dict = torch.load(file_name)
device = torch.device("cuda:" + str(args.cuda_device) if torch.cuda.is_available() else "cpu")
print(device)
if 'infomax' in args.appr or 'barlow' in args.appr:
    proj_hidden = args.proj_hidden
    proj_out = args.proj_out
    encoder = Encoder(hidden_dim=proj_hidden, output_dim=proj_out, normalization = 'batch', weight_standard = args.weight_standard,appr_name =args.appr)
    model = Siamese(encoder)
    model.to(device) #automatically detects from model

model.load_state_dict(dict['state_dict'])

cuda:6


<All keys matched successfully>

In [41]:
#train with all classes an test task by task
lin_epoch=100
classifier = LinearClassifier(num_classes = 100).to(device)
lin_optimizer = torch.optim.SGD(classifier.parameters(), 0.2, momentum=0.9, weight_decay=0) # Infomax: no weight decay, epoch 100, cosine scheduler
lin_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(lin_optimizer, lin_epoch, eta_min=0.002) #scheduler + values ref: infomax paper
test_loss, test_acc1, test_acc5, classifier = linear_evaluation(model, train_data_loaders_linear_all[0],
                                                            test_data_loaders,lin_optimizer, classifier, 
                                                            lin_scheduler, epochs=lin_epoch, device=device) 
#test on all classes but report inter and intra task errors
inter_error, intra_error, acc = inter_intra_task_error(model, classifier, test_data_loaders_all[0])
print(f'Total Accuracy: {test_acc1}')

Lin.Train Epoch: [1] Loss: 2.0428 ACC: 54.07: 100%|██████████| 196/196 [00:12<00:00, 16.27it/s]
Lin.Train Epoch: [2] Loss: 1.8205 ACC: 59.11: 100%|██████████| 196/196 [00:11<00:00, 16.36it/s]
Lin.Train Epoch: [3] Loss: 1.8224 ACC: 59.63: 100%|██████████| 196/196 [00:10<00:00, 18.27it/s]
Lin.Train Epoch: [4] Loss: 1.7956 ACC: 60.22: 100%|██████████| 196/196 [00:12<00:00, 16.20it/s]
Lin.Train Epoch: [5] Loss: 1.8039 ACC: 60.42: 100%|██████████| 196/196 [00:11<00:00, 16.35it/s]
Lin.Train Epoch: [6] Loss: 1.7664 ACC: 61.39: 100%|██████████| 196/196 [00:12<00:00, 16.07it/s]
Lin.Train Epoch: [7] Loss: 1.7825 ACC: 61.41: 100%|██████████| 196/196 [00:11<00:00, 16.48it/s]
Lin.Train Epoch: [8] Loss: 1.7691 ACC: 61.35: 100%|██████████| 196/196 [00:10<00:00, 17.97it/s]
Lin.Train Epoch: [9] Loss: 1.7824 ACC: 61.26: 100%|██████████| 196/196 [00:11<00:00, 16.69it/s]
Lin.Train Epoch: [10] Loss: 1.7970 ACC: 61.38: 100%|██████████| 196/196 [00:11<00:00, 16.44it/s]
Lin.Train Epoch: [11] Loss: 1.8012 ACC:

Total Accuracy: 70.39999999999999





In [43]:
#knn accuracy for all task seperately
for task, loader in enumerate(train_data_loaders_knn):
    knn_acc, task_acc_arr = Knn_Validation_cont(model, [loader], [test_data_loaders[task]], device=device, K=200, sigma=0.5) 
    print(f"Task {task}, knn acc: {knn_acc}")

Task 0, knn acc: 0.822
Task 1, knn acc: 0.843
Task 2, knn acc: 0.817
Task 3, knn acc: 0.8005
Task 4, knn acc: 0.8565


In [44]:
#linear accuracy for all tasks seperately
for task, loader in enumerate(train_data_loaders_linear):
    lin_epoch=50
    classifier = LinearClassifier(num_classes = 20).to(device)
    lin_optimizer = torch.optim.SGD(classifier.parameters(), 0.2, momentum=0.9, weight_decay=0) # Infomax: no weight decay, epoch 100, cosine scheduler
    lin_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(lin_optimizer, lin_epoch, eta_min=0.002) #scheduler + values ref: infomax paper
    test_loss, test_acc1, test_acc5, classifier = linear_evaluation(model, loader,[test_data_loaders[task]],lin_optimizer, classifier, 
                                                                lin_scheduler, epochs=lin_epoch, device=device, intra_task=True) 

Lin.Train Epoch: [1] Loss: 1.3597 ACC: 67.91: 100%|██████████| 40/40 [00:03<00:00, 11.34it/s]
Lin.Train Epoch: [2] Loss: 1.2619 ACC: 74.14: 100%|██████████| 40/40 [00:03<00:00, 11.80it/s]
Lin.Train Epoch: [3] Loss: 1.2223 ACC: 74.49: 100%|██████████| 40/40 [00:03<00:00, 13.04it/s]
Lin.Train Epoch: [4] Loss: 1.0691 ACC: 76.01: 100%|██████████| 40/40 [00:03<00:00, 12.49it/s]
Lin.Train Epoch: [5] Loss: 1.0968 ACC: 76.49: 100%|██████████| 40/40 [00:03<00:00, 11.80it/s]
Lin.Train Epoch: [6] Loss: 1.2020 ACC: 76.37: 100%|██████████| 40/40 [00:03<00:00, 11.12it/s]
Lin.Train Epoch: [7] Loss: 1.1251 ACC: 76.87: 100%|██████████| 40/40 [00:03<00:00, 11.60it/s]
Lin.Train Epoch: [8] Loss: 1.3021 ACC: 75.37: 100%|██████████| 40/40 [00:03<00:00, 11.59it/s]
Lin.Train Epoch: [9] Loss: 1.1315 ACC: 77.05: 100%|██████████| 40/40 [00:03<00:00, 11.36it/s]
Lin.Train Epoch: [10] Loss: 1.1742 ACC: 76.63: 100%|██████████| 40/40 [00:03<00:00, 11.79it/s]
Lin.Train Epoch: [11] Loss: 1.1222 ACC: 77.12: 100%|███████