In [1]:
from NN_Thesis.nn_classes import *
from NN_Thesis.trainer import *
from NN_Thesis.adapters import *
import torch
import torchvision

import torch.nn as nn
import torch.nn.functional as F
from torch import optim


from torchvision.transforms import transforms
import numpy as np
import os
from PIL import Image
import random

from matplotlib import pyplot as plt

import wandb


In [2]:
from NN_Thesis.models import *

In [3]:
BNN = ResNet_cifar10(10)
x = torch.rand((3,80,32,32))


x1 = BNN.layer2(x)
x2 = BNN.layer3(x1)

x1.shape,x2.shape,BNN.avgpool(x2).shape

(torch.Size([3, 160, 16, 16]),
 torch.Size([3, 320, 8, 8]),
 torch.Size([3, 320, 1, 1]))

In [11]:
BNN.layer2

Sequential(
  (0): BasicBlock(
    (conv1): BinarizeConv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (tanh1): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (conv2): BinarizeConv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (tanh2): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (bn2): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (downsample): Sequential(
      (0): BinarizeConv2d(80, 160, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv1): BinarizeConv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (tanh1): Hardtanh(min_val=-1

In [5]:
torch.manual_seed(123)
random.seed(123)
batch_size = 16


from NN_Thesis.dataset import cifar_n_dataset
from torch.utils.data import DataLoader


train_path = 'data/cifar_5/cifar_59/train/data'
test_path ='data/cifar_5/cifar_59/test/data'

train59_data = cifar_n_dataset(train_path)
test59_data = cifar_n_dataset(train_path)
# print(train_data.data.shape)

def normalize_channels(data):
    #We have a nxCxWxH array
    d = data
    d = torch.flatten(data,2,-1).to(dtype = torch.float32)/255
    mean= torch.mean(d,dim = [0,2])
    std = torch.std(d,dim = [0,2])
    return mean,std

mean,std = normalize_channels(train59_data.data)

train59_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize(mean, std),
])


test59_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

train59_data.transform = train59_transform
test59_data.transform = test59_transform
train59_loader = DataLoader(train59_data,batch_size=64,shuffle=True)
test59_loader = DataLoader(test59_data,batch_size=64,shuffle=True)
classes = tuple(train59_data.label_names)
classes

('dog', 'frog', 'horse', 'ship', 'truck')

In [5]:
train_dataloader = DataLoader(train_data,batch_size =64,shuffle = True)

x = train_data[0]
img,label = next(iter(train_dataloader))

img.shape


torch.Size([64, 3, 32, 32])

# Code For Initial Cifar5 Training

In [5]:
train_dataloader = DataLoader(train_data,batch_size =64,shuffle = True)
test_dataloader = DataLoader(test_data,batch_size =64)

classes = tuple(train_data.label_names)

BNN_resnet18 = resnet_binary(num_classes = 5 , depth = 18, dataset = 'cifar10')
resnet18 = resnet(num_classes = 5 , depth = 18, dataset = 'cifar10')


# ImageNet_resnet18 = resnet_binary(num_classes = 5 , depth = 18, dataset = 'imagenet')

# for name,p in BNN_resnet18.named_parameters():
#     print(name,p.numel())

# print('\n\n','*'*100,'\n\n')

# for name,p in resnet18.named_parameters():
#     print(name,p.numel())

# print('\n\n','*'*100,'\n\n')

# for name,p in ImageNet_resnet18.named_parameters():
#     print(name,p.numel())


BNN_trainer = Trainer(BNN_resnet18,model_name = 'Baseline_BNN_Resnet18',project_name = 'Cifar5_2023',classes = classes,seed = 123,binarise = True)
resnet_trainer =Trainer(resnet18,model_name = 'Baseline_Resnet18',project_name='Cifar5_2023',classes = classes,seed = 123)

#Set Training Params
for trainer in [BNN_trainer]:
    trainer.lr = 1e-3
    trainer.set_optimizer(optimizer=torch.optim.Adam)
    trainer.set_scheduler(None)
    trainer.epochs =200
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 200
    trainer.train(train_dataloader,test_dataloader)

Optimizer Set to New: {'state': {}, 'param_groups': [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}]}
No lr scheduler


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5_2023, Run Name Baseline_BNN_Resnet18 


Run Start : 2023-03-04 11-10-36
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 200
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.models.resnet_binary.ResNet_cifar10'>
binerised_training : True
Number of Elements : 4336415
Initial accuracy:


KeyboardInterrupt: 

# Code For Finetuning

In [6]:
train_path = 'data/cifar_5/cifar_59/train/data'
test_path ='data/cifar_5/cifar_59/test/data'

train59_data = cifar_n_dataset(train_path)
test59_data = cifar_n_dataset(train_path)
# print(train_data.data.shape)

def normalize_channels(data):
    #We have a nxCxWxH array
    d = data
    d = torch.flatten(data,2,-1).to(dtype = torch.float32)/255
    mean= torch.mean(d,dim = [0,2])
    std = torch.std(d,dim = [0,2])
    return mean,std

mean,std = normalize_channels(train59_data.data)

train59_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize(mean, std),
])


test59_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

train59_data.transform = train59_transform
test59_data.transform = test59_transform
train59_loader = DataLoader(train59_data,batch_size=64,shuffle=True)
test59_loader = DataLoader(test59_data,batch_size=64,shuffle=True)
classes = tuple(train59_data.label_names)
classes

('dog', 'frog', 'horse', 'ship', 'truck')

## Feature Extraction

In [7]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

PATH = 'SavedModels\Cifar5_2023\Baseline_BNN_Resnet18_2023-03-03 12-25-21\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)

feat_extr_model = resnet18_adapt(num_classes= 5)
feat_extr_model.load_state_dict(BinCifar5_state_dict)
feat_extr_model.freeze()
feat_extr_model.fc = BinarizeLinear(64*5,5)

feat_ex = Trainer(feat_extr_model,model_name = 'feat_extract_final_layer_only',project_name = 'Cifar5_2023',classes = classes,seed = 123,binarise = True)
feat_ex.lr = 0.001
trainer.set_optimizer(optimizer=torch.optim.Adam)
trainer.set_scheduler(None)
feat_ex.epochs = 20
feat_ex.tags =['finetune']
feat_ex.train(train59_loader,test59_loader)





Optimizer Set to New: {'state': {}, 'param_groups': [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}]}
No lr scheduler


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.131619…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023, Run Name feat_extract_final_layer_only 


Run Start : 2023-03-03 16-35-11
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 20
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4336415
Initial accuracy:
Test Accuracy : 14.6%, Test Loss: 1.8535007784128799
best_acc.pth saved!
Test Accuracy : 15.2%, Test Loss: 1.834990711773143
best_acc.pth saved!
epoch: 1 average loss: 1.074
Test Accuracy : 68.4%, Test Loss: 0.9141801732885259
best_acc.pth saved!
Epoch Time (Training + Test) = 20.37 seconds
epoch: 2 average loss: 0.922
Test Accuracy : 62.4%, Test Loss: 1.3213989371838777
Epoch Time (Training + Test) = 20.14 seconds
epoch:

In [9]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

PATH = 'SavedModels\Cifar5_2023\Baseline_BNN_Resnet18_2023-03-03 12-25-21\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


finetune_model = resnet18_adapt(num_classes= 5)
finetune_model.load_state_dict(BinCifar5_state_dict)
finetune_model.freeze()
finetune_model.fc = BinarizeLinear(64*5,5)

finetune = Trainer(finetune_model,model_name = 'Regular_finetune',project_name='Cifar5_2023',classes = classes,seed = 123,binarise = True)

m = finetune.model

#FineTune
finetune.epochs = 75
finetune.lr = 1e-3
finetune.set_optimizer(optimizer=torch.optim.Adam)
finetune.set_scheduler(None)

m.unfreeze()
finetune.train(train59_loader,test59_loader)

Optimizer Set to New: {'state': {}, 'param_groups': [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}]}
No lr scheduler


0,1
Current Best Acc,▁██
epoch,▁▅█
epoch time (s),▁▇█
lr,▁▁▁
test_accuracy,▁██
test_loss,█▁▁
training_loss,█▃▁

0,1
Current Best Acc,0.76968
epoch,2.0
epoch time (s),36.42151
lr,0.001
test_accuracy,0.76968
test_loss,0.65365
training_loss,0.69222


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023, Run Name Regular_finetune 


Run Start : 2023-03-03 16-45-46
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4336415
Initial accuracy:
Test Accuracy : 26.3%, Test Loss: 1.6069094076790773
best_acc.pth saved!
Test Accuracy : 25.1%, Test Loss: 1.629313858573699
epoch: 1 average loss: 0.999
Test Accuracy : 75.7%, Test Loss: 0.6881066791694183
best_acc.pth saved!
Epoch Time (Training + Test) = 34.89 seconds
epoch: 2 average loss: 0.678
Test Accuracy : 78.2%, Test Loss: 0.6274511711981595
best_acc.pth saved!
Epoch Time (Trainin

In [17]:
a = finetune.get_param_info()

z = finetune.model
element_info = dict()
for layer in z.parameters():
    if str(layer.dtype) not in element_info.keys():
        element_info[str(layer.dtype)] = layer.numel()
    else:
        element_info[str(layer.dtype)] += layer.numel()

element_info[str(layer.dtype)]

4336415

# Adapter FineTune

## Single Autoencoder After bn2 

In [8]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('BinaryCifar5_best_acc.pth',map_location= device)

bottleneck = autoencoder_adapter(320,200)
adapt_fine = resnet18_adapt(num_classes=5)
adapt_fine.load_state_dict(Bin_state_dict)
adapt_fine.freeze()
adapt_fine.add_adapter(after = 'bn2',adapter = bottleneck)

adapt_fine_trainer = adapter_Trainer(model = adapt_fine,seed = 123,model_name = 'adapt_fine',project_name='Cifar5',classes = classes,binarise= True)
for trainer in [adapt_fine_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =70
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 70
    # trainer.scheduler.T_max= trainer.T_max
    # trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
    trainer.train(train59_loader,test59_loader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 70
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 1.388 Epoch Time 0.14 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_latest.pth saved!
Overall Accuracy : 44.1%
best_acc.pth saved!
epoch: 2 average loss: 1.144 Epoch Time 0.28 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_latest.pth saved!
Overall Accuracy : 52.3%
best_acc.pth saved!
epoch: 3 average loss: 1.068 Epoch Time 0.42 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_best_loss.pth saved!
c:\Users\John Su\Downloads\Sydn

## 2 Adapters


In [5]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = 'SavedModels\Cifar5_2023\Baseline_BNN_Resnet18_2023-03-03 12-25-21\Baseline_BNN_Resnet18_best_acc.pth'
Bin_state_dict = torch.load(PATH,map_location= device)

# bottleneck = autoencoder_adapter(320,200)
conv_bn = conv_channel_adapter(80,80,kernel = 1, padding= 0)

adapt_fine2 = resnet18_adapt(num_classes=5)
adapt_fine2.load_state_dict(Bin_state_dict)
adapt_fine2.freeze()

# adapt_fine2.add_adapter(after = 'bn2',adapter = bottleneck)
adapt_fine2.add_adapter(after = 'layer1',adapter = conv_bn)



In [11]:
for p in conv_bn.parameters():
    print(p.shape)

torch.Size([40, 80, 1, 1])
torch.Size([80, 40, 1, 1])
torch.Size([80])
torch.Size([80])
torch.Size([80])
torch.Size([80])


In [14]:
adapt_fine2_trainer = Trainer(model = adapt_fine2,seed = 123,model_name = 'adapter_layer_1_conv_channel_80',project_name = 'Cifar5_2023',classes = classes,binarise= True)
for trainer in [adapt_fine2_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 1e-3
    trainer.batch_size = 64
    trainer.epochs =75
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.set_scheduler(None)
    trainer.set_optimizer(torch.optim.Adam)
    trainer.train(train59_loader,test59_loader)

No lr scheduler
Optimizer Set to New: {'state': {}, 'param_groups': [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56]}]}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Current Best Acc,▁▇▇▇▇███████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁█████▇▇▇▇▇▇▇▇▆▆▅▅▅▅▆▆▅▅▆▆▅▅▅▅▅▆▅▅▆▅▅▅▅▅
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▇▇▇▇███████████████████████████████████
test_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.87952
Current Best Acc,0.87952
Total Time (hours),1.02395
epoch,75.0
epoch time (s),23.67428
lr,0.001
test_accuracy,0.87952
test_loss,0.34309
training_loss,0.40633


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023, Run Name adapter_layer_1_conv_channel_80 


Run Start : 2023-03-04 17-35-41
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4349535
Initial accuracy:
Test Accuracy : 5.0%, Test Loss: 6.487302480146403
best_acc.pth saved!
Test Accuracy : 4.6%, Test Loss: 6.581035326204032
epoch: 1 average loss: 1.784
Test Accuracy : 73.5%, Test Loss: 0.9925111116808089
best_acc.pth saved!
Epoch Time (Training + Test) = 25.63 seconds
epoch: 2 average loss: 0.971
Test Accuracy : 77.6%, Test Loss: 0.7936586121006695
best_acc.pth saved!
Epoch T

In [9]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = 'SavedModels\Cifar5_2023\Baseline_BNN_Resnet18_2023-03-03 12-25-21\Baseline_BNN_Resnet18_best_acc.pth'
Bin_state_dict = torch.load(PATH,map_location= device)


In [8]:
input_channels = [80,160,320]
n_channels_p_group = [1,5,10]
layers = ['layer1','layer2','layer3']

for after_layer,input_channel in zip(layers,input_channels):
    for group in n_channels_p_group:
        conv_bn = conv_channel_adapter3(input_channel,groups = group,kernel = 1, padding= 0,nonlinearity='relu')
        adapt_fine2 = resnet18_adapt(num_classes=5)
        adapt_fine2.load_state_dict(Bin_state_dict)
        adapt_fine2.freeze()
        # adapt_fine2.add_adapter(after = 'bn2',adapter = bottleneck)
        adapt_fine2.add_adapter(after = after_layer,adapter = conv_bn)
        trainer = Trainer(model = adapt_fine2,seed = 123,model_name = f'adapter_{after_layer}_group_{group}',project_name = 'Cifar5_2023_Serial',classes = classes,binarise= True)
        m = trainer.model
        m.to(trainer.device)
        m.fc = BinarizeLinear(320,5) # Pretend we have to start with a new head
        for layer in [m.fc,m.bn3]:
            for p in layer.parameters():
                p.requires_grad = True
            layer.train()
        trainer.lr = 1e-3
        trainer.batch_size = 64
        trainer.epochs =75
        trainer.epoch_chkpts = []
        trainer.start_epoch = 0
        trainer.set_scheduler(None)
        trainer.set_optimizer(torch.optim.Adam)
        trainer.train(train59_loader,test59_loader)
    

No lr scheduler
Optimizer Set to New: {'state': {}, 'param_groups': [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55]}]}


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer1_group_1 


Run Start : 2023-05-01 16-57-03
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4343135
Initial accuracy:
Test Accuracy : 18.6%, Test Loss: 1.67098627218505
best_acc.pth saved!
Test Accuracy : 18.6%, Test Loss: 1.6710480488169834
best_acc.pth saved!
epoch: 1 average loss: 1.475
Test Accuracy : 61.9%, Test Loss: 1.0705235833707063
best_acc.pth saved!
Epoch Time (Training + Test) = 22.92 seconds
epoch: 2 average loss: 0.948
Test Accuracy : 67.0%, Test Loss: 0.8943389462083197
best_ac

0,1
Current Best Acc,▁▆▇▇▇▇▇█████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),█▅▄▄▄▄▄▄▂▂▁▂▂▂▂▂▂▂▂▂▂▁▂▂▂▃▁▂▂▂▁▁▁▂▁▁▁▁▁▁
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▆▇▇▇▇▇█▇███████████████████████████████
test_loss,█▄▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▇▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.82188
Current Best Acc,0.82188
Total Time (hours),0.89711
epoch,75.0
epoch time (s),20.97934
lr,0.001
test_accuracy,0.8206
test_loss,0.49775
training_loss,0.5564


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666883975, max=1.0)…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer1_group_5 


Run Start : 2023-05-01 17-24-27
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4338015
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.8680778205242303
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.8684225820214546
epoch: 1 average loss: 1.933
Test Accuracy : 54.1%, Test Loss: 1.25204926210901
best_acc.pth saved!
Epoch Time (Training + Test) = 22.12 seconds
epoch: 2 average loss: 1.166
Test Accuracy : 60.8%, Test Loss: 1.014149255764759
best_acc.pth saved!
Epoch Ti

0,1
Current Best Acc,▁▅▇▇▇▇▇█████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▇▇▇▇▇█▇███████████████████████████████
test_loss,█▅▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,██▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.76912
Current Best Acc,0.76912
Total Time (hours),0.91492
epoch,75.0
epoch time (s),21.36461
lr,0.001
test_accuracy,0.7596
test_loss,0.64595
training_loss,0.67701


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer1_group_10 


Run Start : 2023-05-01 17-52-23
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4337375
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.6497376705984326
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.6513245245989632
epoch: 1 average loss: 1.849
Test Accuracy : 52.7%, Test Loss: 1.2779286078479895
best_acc.pth saved!
Epoch Time (Training + Test) = 21.75 seconds
epoch: 2 average loss: 1.204
Test Accuracy : 58.0%, Test Loss: 1.0762608412586514
best_acc.pth saved!
Epoc

0,1
Current Best Acc,▁▅▆▇▇▇▇█████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁███████████████████████████████████████
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▆▇▇▇▇█████████████████████████████████
test_loss,█▅▃▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,▇█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.73724
Current Best Acc,0.73724
Total Time (hours),0.90029
epoch,75.0
epoch time (s),21.576
lr,0.001
test_accuracy,0.729
test_loss,0.72052
training_loss,0.77224


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666495924, max=1.0)…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer2_group_1 


Run Start : 2023-05-01 18-19-54
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4362655
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.8003032268465633
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.7986184279327198
epoch: 1 average loss: 1.510
Test Accuracy : 62.6%, Test Loss: 1.0597142034479419
best_acc.pth saved!
Epoch Time (Training + Test) = 18.85 seconds
epoch: 2 average loss: 0.968
Test Accuracy : 67.7%, Test Loss: 0.8915532444749037
best_acc.pth saved!
Epoch

0,1
Current Best Acc,▁▆▇▇▇▇▇▇▇███████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▇▆██████████████████████
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▆▇▇▇▇▇▇▇███████████████████████████████
test_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▆▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.83404
Current Best Acc,0.83404
Total Time (hours),0.81414
epoch,75.0
epoch time (s),20.00437
lr,0.001
test_accuracy,0.82812
test_loss,0.46948
training_loss,0.5353


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333435779, max=1.0)…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer2_group_5 


Run Start : 2023-05-01 18-44-48
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4342175
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.1138985663118874
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.1147666014063997
epoch: 1 average loss: 1.680
Test Accuracy : 54.2%, Test Loss: 1.200319980264015
best_acc.pth saved!
Epoch Time (Training + Test) = 19.99 seconds
epoch: 2 average loss: 1.123
Test Accuracy : 60.2%, Test Loss: 1.020284419626836
best_acc.pth saved!
Epoch T

0,1
Current Best Acc,▁▅▆▇▇▇▇▇▇███████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▃▇█▇█▅▅▅▅▄▅▁▆▆▆▅▇▇▇▆▇▆▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▆▇▇▇▇▇▇▇██████████████████████████████
test_loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▆▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.78284
Current Best Acc,0.78284
Total Time (hours),0.81215
epoch,75.0
epoch time (s),19.70809
lr,0.001
test_accuracy,0.78284
test_loss,0.59019
training_loss,0.65961


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer2_group_10 


Run Start : 2023-05-01 19-09-38
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4339615
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.7790370313712702
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.7793169247219935
epoch: 1 average loss: 1.767
Test Accuracy : 55.7%, Test Loss: 1.2214859577700914
best_acc.pth saved!
Epoch Time (Training + Test) = 19.88 seconds
epoch: 2 average loss: 1.158
Test Accuracy : 60.4%, Test Loss: 1.0397982707108988
best_acc.pth saved!
Epoc

0,1
Current Best Acc,▁▅▇▇▇▇▇█████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁█▅▂▂▂▂▃▃▃▃▃▃▃▃
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▇▇▇▇▇█████████████████████████████████
test_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,██▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.75932
Current Best Acc,0.75932
Total Time (hours),0.875
epoch,75.0
epoch time (s),22.34509
lr,0.001
test_accuracy,0.75628
test_loss,0.65048
training_loss,0.71706


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer3_group_1 


Run Start : 2023-05-01 19-36-22
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4440095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.078699875365743
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.078714650915102
epoch: 1 average loss: 1.470
Test Accuracy : 56.9%, Test Loss: 1.1496806855091963
best_acc.pth saved!
Epoch Time (Training + Test) = 20.47 seconds
epoch: 2 average loss: 1.119
Test Accuracy : 59.6%, Test Loss: 1.061072276833722
best_acc.pth saved!
Epoch Ti

0,1
Current Best Acc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇██████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▃█████▆▃▃▃▃▃▃▃▃▃▂▃▃▃▄▃▃▃▃▃▃▃▃▂▃▃▂▃▂▁▂▂▅▃
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▆▆▇▇▇▇▇▇▇▇▇▇█▇█▇▇█████████████████████
test_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▅▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.7834
Current Best Acc,0.7834
Total Time (hours),0.81163
epoch,75.0
epoch time (s),19.49213
lr,0.001
test_accuracy,0.77908
test_loss,0.60399
training_loss,0.64693


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer3_group_5 


Run Start : 2023-05-01 20-01-15
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4358175
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.8386541952562454
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.8378001381369198
epoch: 1 average loss: 1.589
Test Accuracy : 52.5%, Test Loss: 1.3026168410430479
best_acc.pth saved!
Epoch Time (Training + Test) = 19.06 seconds
epoch: 2 average loss: 1.203
Test Accuracy : 56.6%, Test Loss: 1.1543785016555006
best_acc.pth saved!
Epoch

0,1
Current Best Acc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▅▆▅▅▇▇█▇▇▇▇▆▇▇▇▆▇▇▇▇▇▇▆▇▆▆█▇▇█▇▇█▇███▇▇
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇█▇▇▇█████████████████████
test_loss,█▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▆▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.74612
Current Best Acc,0.74612
Total Time (hours),0.80206
epoch,75.0
epoch time (s),19.28258
lr,0.001
test_accuracy,0.73084
test_loss,0.70491
training_loss,0.74103


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name adapter_layer3_group_10 


Run Start : 2023-05-01 20-25-50
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4347935
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.0609252440655017
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.061098366137356
best_acc.pth saved!
epoch: 1 average loss: 1.755
Test Accuracy : 51.9%, Test Loss: 1.321567915284725
best_acc.pth saved!
Epoch Time (Training + Test) = 19.31 seconds
epoch: 2 average loss: 1.267
Test Accuracy : 56.0%, Test Loss: 1.154541275232954
best_ac

# add 3 adapters

In [10]:
input_channels = [80,160,320]
n_channels_p_group = [1,5,10,20]
layers = ['layer1','layer2','layer3']

# for after_layer,input_channel in zip(layers,input_channels):
for group in n_channels_p_group:
    conv_bn = conv_channel_adapter3(80,groups = group,kernel = 1, padding= 0,nonlinearity='relu')
    conv_bn2 = conv_channel_adapter3(160,groups = group,kernel = 1, padding= 0,nonlinearity='relu')
    conv_bn3 = conv_channel_adapter3(320,groups = group,kernel = 1, padding= 0,nonlinearity='relu')
    adapt_fine2 = resnet18_adapt(num_classes=5)
    adapt_fine2.load_state_dict(Bin_state_dict)
    adapt_fine2.freeze()
    # adapt_fine2.add_adapter(after = 'bn2',adapter = bottleneck)
    adapt_fine2.add_adapter(after = 'layer1',adapter = conv_bn)
    adapt_fine2.add_adapter(after = 'layer2',adapter = conv_bn2)
    adapt_fine2.add_adapter(after = 'layer3',adapter = conv_bn3)
    trainer = Trainer(model = adapt_fine2,seed = 123,model_name = f'3_adapters_group_{group}',project_name = 'Cifar5_2023_Serial',classes = classes,binarise= True)
    m = trainer.model
    m.to(trainer.device)
    m.fc = BinarizeLinear(320,5) # Pretend we have to start with a new head
    for layer in [m.fc,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 1e-3
    trainer.batch_size = 64
    trainer.epochs =75
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.set_scheduler(None)
    trainer.set_optimizer(torch.optim.Adam)
    trainer.train(train59_loader,test59_loader)
    

No lr scheduler
Optimizer Set to New: {'state': {}, 'param_groups': [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]}]}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Current Best Acc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▄██▆▃▄▃▃▃▃▃▃▃▄▃▃▂▂▃▃▂▂▁▄▂▂▃▂▃▃▃▂▂▂▂▂▂▂▂▂
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████████
test_loss,█▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▆▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.72288
Current Best Acc,0.72288
Total Time (hours),0.78011
epoch,75.0
epoch time (s),18.62416
lr,0.001
test_accuracy,0.7156
test_loss,0.74147
training_loss,0.8022


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666883975, max=1.0)…

start
Project Name: Cifar5_2023_Serial, Run Name 3_adapters_group_1 


Run Start : 2023-05-02 10-48-43
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4473055
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.7927269886826616
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.7920419651529063
epoch: 1 average loss: 1.208
Test Accuracy : 71.3%, Test Loss: 0.784606251875153
best_acc.pth saved!
Epoch Time (Training + Test) = 25.35 seconds
epoch: 2 average loss: 0.760
Test Accuracy : 76.0%, Test Loss: 0.6580157061214642
best_acc.pth saved!
Epoch Time

0,1
Current Best Acc,▁▆▇▇▇▇▇▇████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁██████████████████▇▆▇▇▇▇▇▇▇▇▇▃▇▆▇▇▇▇▇▇▇
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▆▇▇▇▇▇█▇███████████████████████████████
test_loss,█▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.9104
Current Best Acc,0.9104
Total Time (hours),1.03652
epoch,75.0
epoch time (s),24.77467
lr,0.001
test_accuracy,0.90248
test_loss,0.27611
training_loss,0.30737


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333330477276, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name 3_adapters_group_5 


Run Start : 2023-05-02 11-20-18
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4365535
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.6990833977604156
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.6990660846690693
epoch: 1 average loss: 1.482
Test Accuracy : 61.6%, Test Loss: 1.0389637355609318
best_acc.pth saved!
Epoch Time (Training + Test) = 24.26 seconds
epoch: 2 average loss: 0.963
Test Accuracy : 67.8%, Test Loss: 0.8571355118775916
best_acc.pth saved!
Epoch Tim

0,1
Current Best Acc,▁▅▆▇▇▇▇▇▇███████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁██▅▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▆▇▇▇▇▇▇█▇█████████████████████████████
test_loss,█▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▇▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.86632
Current Best Acc,0.86632
Total Time (hours),0.84468
epoch,75.0
epoch time (s),19.82344
lr,0.001
test_accuracy,0.86264
test_loss,0.37574
training_loss,0.42418


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name 3_adapters_group_10 


Run Start : 2023-05-02 11-46-10
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4352095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.7093171030664078
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.7094946819193222
epoch: 1 average loss: 1.612
Test Accuracy : 59.0%, Test Loss: 1.0937354360395075
best_acc.pth saved!
Epoch Time (Training + Test) = 20.02 seconds
epoch: 2 average loss: 1.039
Test Accuracy : 66.1%, Test Loss: 0.8983862023524312
best_acc.pth saved!
Epoch Ti

0,1
Current Best Acc,▁▅▇▇▇▇▇▇▇▇██████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████▇▇██▇▇▇▇▇█▇
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▅▇▇▇▇▇▇▇▇██████████████████████████████
test_loss,█▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▇▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.8356
Current Best Acc,0.8356
Total Time (hours),0.83066
epoch,75.0
epoch time (s),20.05672
lr,0.001
test_accuracy,0.83256
test_loss,0.45363
training_loss,0.50632


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333330477276, max=1.0…

start
Project Name: Cifar5_2023_Serial, Run Name 3_adapters_group_20 


Run Start : 2023-05-02 12-11-43
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4345375
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 1.7103955169467975
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 1.7103753278932303
epoch: 1 average loss: 1.706
Test Accuracy : 54.0%, Test Loss: 1.2425737079147183
best_acc.pth saved!
Epoch Time (Training + Test) = 19.99 seconds
epoch: 2 average loss: 1.147
Test Accuracy : 58.7%, Test Loss: 1.0822616778981045
best_acc.pth saved!
Epoch Ti

# UniAdapt


In [6]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = 'SavedModels\Cifar5_2023\Baseline_BNN_Resnet18_2023-03-03 12-25-21\Baseline_BNN_Resnet18_best_acc.pth'
Bin_state_dict = torch.load(PATH,map_location= device)
BNN_uni = BNN_Resnet_UniAdapt(5)
BNN_uni.load_state_dict(Bin_state_dict,strict = False),device

(_IncompatibleKeys(missing_keys=['head_bn.weight', 'head_bn.bias', 'head_bn.running_mean', 'head_bn.running_var', 'head.weight', 'head.bias'], unexpected_keys=[]),
 'cuda:0')

In [7]:
BNN_uni.freeze()
BNN_uni.cuda()
BNN_uni.conv1.weight.device

device(type='cuda', index=0)

In [8]:
n1 = uniAdapt_Net([80,160,320],[80,160,320],block = thinBlock2)
n1.pre_process[-1] = nn.BatchNorm2d(320)
n1 = n1.cuda()
n1.train()
adapt_net = nn.Sequential(n1,nn.AdaptiveMaxPool2d(1),nn.Flatten(),nn.Linear(320,100))
adapt_net.to('cuda:0')

y = [torch.rand((4,80,32,32)).cuda(),torch.rand((4,160,16,16)).cuda(),torch.rand((4,320,8,8)).cuda()]
adapt_net(y).shape

torch.Size([4, 100])

In [9]:
BNN_uni.uniAdaptNet = adapt_net
BNN_uni.uniAdapt = True

x = torch.rand((3,3,32,32)).cuda()
BNN_uni(x)

tensor([[-1.4055,  0.4211, -0.4210,  0.5478, -0.1491],
        [-1.4055,  0.4211, -0.4210,  0.5478, -0.1491],
        [-1.4055,  0.4211, -0.4210,  0.5478, -0.1491]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [10]:
trainer = Trainer(model = BNN_uni,seed = 123,model_name = f'UniAdapter_Cifar5_thin_block2_skip_connection',project_name = 'UniAdapt_Training',classes = classes)
m = trainer.model
m.to(trainer.device)
for layer in [m.head_bn,m.head]:
    for p in layer.parameters():
        p.requires_grad = True
    layer.train()
params = nn.ModuleList([BNN_uni.uniAdaptNet,BNN_uni.head,BNN_uni.head_bn])
trainer.lr = 1e-3
trainer.batch_size = 64
trainer.epochs =75
trainer.epoch_chkpts = []
trainer.start_epoch = 0
trainer.set_scheduler(None)
trainer.set_optimizer(torch.optim.Adam,params.parameters())
trainer.train(train59_loader,test59_loader)
    

No lr scheduler
Optimizer Set to New: {'state': {}, 'param_groups': [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]}]}


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: UniAdapt_Training, Run Name UniAdapter_Cifar5_thin_block2_skip_connection 


Run Start : 2023-03-21 09-10-07
start_epoch : 0
initial_lr : 0.001
batch_size : 64
epochs : 75
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.BNN_Resnet_UniAdapt'>
binerised_training : False
Number of Elements : 4382020
Initial accuracy:
Test Accuracy : 17.5%, Test Loss: 2.407164066343966
best_acc.pth saved!
Test Accuracy : 18.8%, Test Loss: 2.4544760010126607
best_acc.pth saved!
epoch: 1 average loss: 0.552
Test Accuracy : 82.2%, Test Loss: 0.48295379813065004
best_acc.pth saved!
Epoch Time (Training + Test) = 24.66 seconds
epoch: 2 average loss: 0.465
Test Accuracy : 82.9%, Test Lo

In [20]:
import os

path = r'SavedModels\Cifar5_2023\adapter_layer1_conv_channel_80_2023-03-05 15-28-32\adapter_layer1_conv_channel_80_best_acc.pth'
newPath = path.replace(os.sep, '/')

newPath

'SavedModels/Cifar5_2023/adapter_layer1_conv_channel_80_2023-03-05 15-28-32/adapter_layer1_conv_channel_80_best_acc.pth'

In [26]:
best_net = torch.load(newPath)


adapt = conv_channel_adapter(80,80) 
for i in [80,160,320]:
    for g in [5,10]:
        split = i//g
        adapter = conv_channel_adapter3(i,split)
        total = sum([p.numel() for p in adapter.parameters()])
        print(f'group = {g}, layer = {i}, elements = {total}')
    


group = 5, layer = 80, elements = 720
group = 10, layer = 80, elements = 1120
group = 5, layer = 160, elements = 1440
group = 10, layer = 160, elements = 2240
group = 5, layer = 320, elements = 2880
group = 10, layer = 320, elements = 4480


In [28]:
n1 = uniAdapt_Net([80,160,320],[80,160,320],block = thinBlock2)
n1.pre_process[-1] = nn.BatchNorm2d(320)

total = sum([p.numel() for p in n1.parameters()])
total

10560

# Weight Comparison


In [55]:
def compare_weights(main_model:nn.Module,*models:nn.Module):

    main_weights = dict()
    for (name, param) in main_model.named_parameters():
        main_weights[name] = param

    n = len( list(main_model.named_parameters()))
    print(n)
    
    for model in models:
        i = 0
        for name, param in model.named_parameters():
            if name in main_weights.keys():
                main_weight = main_weights[name]
                if not (torch.all(main_weight == param)):
                    print(f'parameters {name} in main_model and other model do not match')
            else:
                print(f'name {name} does not exist in original model')

In [56]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('BinaryCifar5_best_acc.pth',map_location= device)
fineTune_dict = torch.load('adapt_fine2_best_acc.pth',map_location= device)

og_model =  resnet18_adapt(num_classes=5)
og_model.load_state_dict(Bin_state_dict)
og_model.to(device = 'cpu')




finetune = resnet18_adapt(num_classes=5)

finetune.to(device = 'cpu')
bottleneck = autoencoder_adapter(320,200)
conv_bn = conv_bottleneck_adapter(80,40,kernel = 3, padding= 1)
finetune.add_adapter(after = 'bn2',adapter = bottleneck)
finetune.add_adapter(after = 'layer1',adapter = conv_bn)

finetune.load_state_dict(fineTune_dict)

# h = resnet18_adapt(num_classes=5)
# x = resnet18_adapt(num_classes=5)
compare_weights(og_model,finetune)


# h = resnet18_adapt(num_classes=5)
# x = resnet18_adapt(num_classes=5)

# print(torch.all(h.weight == x.weight))

# compare_weights(h,x)
# print(h.weight)
# adapt_fine2.conv1.weight

51
name layer1.2.conv1.weight does not exist in original model
name layer1.2.bn1.weight does not exist in original model
name layer1.2.bn1.bias does not exist in original model
name layer1.2.deconv1.weight does not exist in original model
name layer1.2.bn2.weight does not exist in original model
name layer1.2.bn2.bias does not exist in original model
parameters bn3.weight in main_model and other model do not match
parameters bn3.bias in main_model and other model do not match
parameters fc.weight in main_model and other model do not match
name adapter_dict.bn2,autoencoder_adapter,0.l_in.weight does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.l_in.bias does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.bn1.weight does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.bn1.bias does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.l_out.weight does not exist in original model
name adapter_di

In [15]:
# from NN_Thesis.models.binarized_modules import BinarizeConv2d
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('test_best_acc.pth',map_location= device)

bt_model = resnet18_adapt()
bt_model.load_state_dict(Bin_state_dict)

conv_model = resnet18_adapt()
conv_model.load_state_dict(Bin_state_dict)

conv_bt_model = resnet18_adapt()
conv_bt_model.load_state_dict(Bin_state_dict)



adapters = [
    (bottleneck_adapter((80,32,32),5),bottleneck_adapter((160,16,16),5),bottleneck_adapter((320,8,8),5)),
    (conv_adapter(80,kernel= 3,padding = 1),conv_adapter(160,kernel= 3,padding = 1),conv_adapter(320,kernel=1)),
    (conv_bottleneck_adapter(80,80,kernel=3,padding=1),conv_bottleneck_adapter(160,80,kernel=3,padding=1),conv_bottleneck_adapter(320,160,kernel=3,padding=1))
]


#Set all layer requires grad to false and set to eval mode()
for m,adpts in zip([bt_model,conv_model,conv_bt_model],adapters):
    m.freeze()
    m.add_adapter(after = 'layer1',adapter =adpts[0])
    m.add_adapter(after = 'layer2',adapter =adpts[1])
    m.add_adapter(after = 'layer3',adapter =adpts[2])


# bottle_trainer = adapter_Trainer(model = bt_model,seed = 123,name = 'Bottleneck_BNN2',classes = classes,binarise= True)
conv_trainer = adapter_Trainer(model = conv_model,seed = 123,name = 'Conv_BNN2',classes = classes,binarise= True)
conv_bt_trainer = adapter_Trainer(model = conv_bt_model,seed = 123,name = 'ConvBN_BNN2',classes = classes,binarise= True)
# trainer.load('test_150.pth',map_location= device,load_model= True)
for trainer in [conv_trainer,conv_bt_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =80
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 80
    # trainer.scheduler.T_max= trainer.T_max
    trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.ExponentialLR(trainer.optimizer, gamma=0.95)
    trainer.train(trainloader,testloader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 80
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.01
    lr: 0.01
    maximize: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 2.592 Epoch Time 0.89 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_latest.pth saved!
Overall Accuracy : 29.9%
best_acc.pth saved!
epoch: 2 average loss: 1.996 Epoch Time 1.77 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_latest.pth saved!
Overall Accuracy : 26.5%
epoch: 3 average loss: 1.882 Epoch Time 2.64 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_best_loss.pth saved!
c:\Users\John Su\Downloads\S

# DEBUGGING TIME


In [6]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('test_best_acc.pth',map_location= device)

debug = resnet18_adapt()
debug.load_state_dict(Bin_state_dict)
debug.freeze()
debug.add_adapter(after = 'layer3',adapter = identity_adapter())

debug_trainer = adapter_Trainer(model = debug,seed = 123,name = 'debug',classes = classes,binarise= True)
for trainer in [debug_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =10
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 80
    # trainer.scheduler.T_max= trainer.T_max
    trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.ExponentialLR(trainer.optimizer, gamma=0.95)
    trainer.train(trainloader,testloader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 10
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.01
    lr: 0.01
    maximize: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 0.448 Epoch Time 0.49 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_latest.pth saved!
Overall Accuracy : 83.7%
best_acc.pth saved!
epoch: 2 average loss: 0.447 Epoch Time 0.98 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_latest.pth saved!
Overall Accuracy : 83.7%
best_acc.pth saved!
epoch: 3 average loss: 0.445 Epoch Time 1.47 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_best_loss.pth saved!
c:\Users\John Su\Downloads\S

In [5]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('test_best_acc.pth',map_location= device)

adapter = mini_bottleneck_adapter()

mini_adapt_model = resnet18_adapt()
mini_adapt_model.load_state_dict(Bin_state_dict)
mini_adapt_model.freeze()
mini_adapt_model.add_adapter(after = 'layer3',adapter = adapter)

mini_adapt_trainer = adapter_Trainer(model = mini_adapt_model,seed = 123,name = 'mini_adapt',classes = classes,binarise= True)
for trainer in [mini_adapt_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =10
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 80
    # trainer.scheduler.T_max= trainer.T_max
    trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.ExponentialLR(trainer.optimizer, gamma=0.95)
    trainer.train(trainloader,testloader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 10
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.01
    lr: 0.01
    maximize: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 0.447 Epoch Time 0.56 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_latest.pth saved!
Overall Accuracy : 83.6%
best_acc.pth saved!
epoch: 2 average loss: 0.447 Epoch Time 1.12 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_latest.pth saved!
Overall Accuracy : 83.7%
best_acc.pth saved!
epoch: 3 average loss: 0.444 Epoch Time 1.69 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_best_loss.pth saved!
c:\

# Adapter Location

In [5]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


layers = ['layer1','layer2','layer3']
channels = [80,160,320]


for layer,channel in zip(layers,channels):
    for t in ['adp_only','full_ft']:
        print(t,layer,channel)
        torch.manual_seed(42)
        adapter_model = resnet18_adapt(num_classes=5)
        adapter_model.load_state_dict(BinCifar5_state_dict)
        
        if t == 'adp_only':
            adapter_model.freeze()
        conv_adp = conv_adapter(channel,kernel = 1, padding= 0)
        adapter_model.add_adapter(after = layer,adapter = conv_adp)
        trainer = adapter_Trainer(model = adapter_model,seed = 123,model_name = f'adp_{layer}_{t}',project_name = 'Cifar5',classes = classes,binarise= True)
        m = trainer.model
        m.to(trainer.device)
        for head in [m.fc,m.bn3,m.bn3]:
            for p in head.parameters():
                p.requires_grad = True
            head.train()
        trainer.lr = 0.01
        trainer.batch_size = 32
        trainer.epochs =100
        trainer.epoch_chkpts = []
        trainer.start_epoch = 0
        # trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
        trainer.tags = ['finetune',f'Training: {t}',f'Adpater Location {layer}']
        trainer.train(train59_loader,test59_loader)

adp_only layer1 80
Scheduler Set {'T_max': 10, 'eta_min': 0, 'base_lrs': [0.1], 'last_epoch': 0, '_step_count': 1, 'verbose': False, '_get_lr_called_within_step': False, '_last_lr': [0.1]}


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5, Run Name adp_layer1_adp_only 


Run Start : 2022-10-21 22-03-33
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4343135
Initial accuracy:
Test Accuracy : 8.8%, Test Loss: 4.613198432776019
best_acc.pth saved!
Test Accuracy : 8.4%, Test Loss: 4.8334173107391125
epoch: 1 average loss: 1.031
Test Accuracy : 72.9%, Test Loss: 0.7262849130136583
best_acc.pth saved!
Epoch Time (Training + Test) = 21.24 seconds
epoch: 2 average loss: 0.720
Test Accuracy : 76.9%, Test Loss: 0.6304557144336993
best_acc.pth saved!
Epoch Time (Training + Test) = 21.17 seconds
epoch: 3 average loss: 0.676


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▆▅▆▄▅▅▅▆▆▆▆▆▅▆▅▆▆▆▆▆▅▆▆▆▆▇▅▆▅▅▅▆▆▆▆▆▇▆█
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.84712
Total Time (hours),1.17049
epoch,100.0
epoch time (s),22.58553
lr,0.1
test_accuracy,0.83644
test_loss,0.45782
training_loss,0.49542


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333435779, max=1.0)…

start
Project Name: Cifar5, Run Name adp_layer1_full_ft 


Run Start : 2022-10-21 22-39-02
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4343135
Initial accuracy:
Test Accuracy : 8.8%, Test Loss: 4.613198432776019
best_acc.pth saved!
Test Accuracy : 8.4%, Test Loss: 4.8334173107391125
epoch: 1 average loss: 0.961
Test Accuracy : 73.8%, Test Loss: 0.7023793851475582
best_acc.pth saved!
Epoch Time (Training + Test) = 30.91 seconds
epoch: 2 average loss: 0.694
Test Accuracy : 77.2%, Test Loss: 0.6077157112643542
best_acc.pth saved!
Epoch Time (Training + Test) = 30.88 seconds
epoch: 3 average loss: 0.642
T

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁██▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▇▆▆▆
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.86628
Total Time (hours),1.57341
epoch,100.0
epoch time (s),27.52569
lr,0.1
test_accuracy,0.85088
test_loss,0.40504
training_loss,0.44411


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666899498, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer2_adp_only 


Run Start : 2022-10-21 23-26-45
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4362655
Initial accuracy:
Test Accuracy : 7.1%, Test Loss: 4.672063447630314
best_acc.pth saved!
Test Accuracy : 6.3%, Test Loss: 4.930686494578486
epoch: 1 average loss: 0.767
Test Accuracy : 78.2%, Test Loss: 0.5941561580923818
best_acc.pth saved!
Epoch Time (Training + Test) = 16.50 seconds
epoch: 2 average loss: 0.594
Test Accuracy : 80.0%, Test Loss: 0.545416933496285
best_acc.pth saved!
Epoch Time (Training + Test) = 16.56 seconds
epoch: 3 average loss: 0.555
Te

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▆▆▆▅▅▅▅▆▆▆▆▇▅▅▆▆▆▆▇▆▆▅▇█▆▇▆▆▅▆▇█▆▅▆▅▅▇▆
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.87696
Total Time (hours),0.92809
epoch,100.0
epoch time (s),16.66766
lr,0.1
test_accuracy,0.87044
test_loss,0.35957
training_loss,0.40972


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer2_full_ft 


Run Start : 2022-10-21 23-55-03
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4362655
Initial accuracy:
Test Accuracy : 7.1%, Test Loss: 4.672063447630314
best_acc.pth saved!
Test Accuracy : 6.3%, Test Loss: 4.930686494578486
epoch: 1 average loss: 0.734
Test Accuracy : 80.0%, Test Loss: 0.5462298654686765
best_acc.pth saved!
Epoch Time (Training + Test) = 27.22 seconds
epoch: 2 average loss: 0.556
Test Accuracy : 80.0%, Test Loss: 0.5506100953768587
Epoch Time (Training + Test) = 27.24 seconds
epoch: 3 average loss: 0.517
Test Accuracy : 82.7%,

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▇▇▇▇▇▇▇▇███▇▇▇██▇▇█▇▇▇▇█▇▇▇▇▇▇▇███▇█▇█▇
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.89184
Total Time (hours),1.51186
epoch,100.0
epoch time (s),26.91144
lr,0.1
test_accuracy,0.88444
test_loss,0.32098
training_loss,0.37227


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer3_adp_only 


Run Start : 2022-10-22 00-40-53
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4440095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.5574629739727204
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.562192482716592
epoch: 1 average loss: 1.608
Test Accuracy : 37.7%, Test Loss: 1.4049944045293667
best_acc.pth saved!
Epoch Time (Training + Test) = 14.85 seconds
epoch: 2 average loss: 1.413
Test Accuracy : 39.0%, Test Loss: 1.3817411118456164
best_acc.pth saved!
Epoch Time (Training + Test) = 14.84 seconds
epoch: 3 average loss: 1.39

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▃▅▅▃▆▅▂▃▂▂▃▂▄▄▇▆█▇▆█▆█▆▅▅▅▅▅▅▅▅▅▆▅▅▅▅▅▅
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▄▃▄▆▅▃▅▆▇▃▇▅▆▅▇▇▇▇▇▄▅▇▇▇▇▇▆▇██▆█▇▇▇▆███
test_loss,█▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.6544
Total Time (hours),0.8436
epoch,100.0
epoch time (s),15.25286
lr,0.1
test_accuracy,0.6544
test_loss,0.94395
training_loss,0.99839


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333332417533, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer3_full_ft 


Run Start : 2022-10-22 01-06-40
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4440095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.5574629739727204
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.562192482716592
epoch: 1 average loss: 1.513
Test Accuracy : 42.5%, Test Loss: 1.2526966662663024
best_acc.pth saved!
Epoch Time (Training + Test) = 26.63 seconds
epoch: 2 average loss: 1.219
Test Accuracy : 51.4%, Test Loss: 1.1087592250245917
best_acc.pth saved!
Epoch Time (Training + Test) = 26.72 seconds
epoch: 3 average loss: 1.090

In [13]:
b = resnet18_adapt(num_classes=5)

b.eval()
x = torch.rand((1,3,32,32))

b(x)

tensor([[ -6.1048, -10.0902, -42.1078,  -0.1369,  -2.0739]],
       grad_fn=<LogSoftmaxBackward0>)

In [18]:
b.layer1

Sequential(
  (0): BasicBlock(
    (conv1): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (tanh1): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (conv2): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (tanh2): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (bn2): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (tanh1): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (conv2): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (tanh2): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (bn2): BatchNorm2d(80

# Adapters with init at zero

## Adp Only Training

In [7]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


layers = ['layer1','layer2','layer3']
channels = [80,160,320]


for layer,channel in zip(layers,channels):
    for t in ['adp_only']:
        print(t,layer,channel)
        torch.manual_seed(42)
        adapter_model = resnet18_adapt(num_classes=5)
        adapter_model.load_state_dict(BinCifar5_state_dict)
        
        if t == 'adp_only':
            adapter_model.freeze()
        conv_adp = conv_adapter(channel,kernel = 1, padding= 0)
        conv_adp.init_weight_zeros()

        adapter_model.add_adapter(after = layer,adapter = conv_adp)
        trainer = adapter_Trainer(model = adapter_model,seed = 123,model_name = f'adp_{layer}_{t}',project_name = 'Cifar5',classes = classes,binarise= True)
        m = trainer.model
        m.to(trainer.device)
        for head in [m.fc,m.bn3,m.bn3]:
            for p in head.parameters():
                p.requires_grad = True
            head.train()
        trainer.lr = 0.01
        trainer.batch_size = 32
        trainer.epochs =100
        trainer.epoch_chkpts = []
        trainer.start_epoch = 0
        # trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
        trainer.tags = ['finetune',f'Training: {t}',f'Adpater Location {layer}']
        trainer.train(train59_loader,test59_loader)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


adp_only layer1 80
Scheduler Set {'T_max': 10, 'eta_min': 0, 'base_lrs': [0.1], 'last_epoch': 0, '_step_count': 1, 'verbose': False, '_get_lr_called_within_step': False, '_last_lr': [0.1]}


[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5, Run Name adp_layer1_adp_only 


Run Start : 2022-10-22 11-00-36
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4343135
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 3.4470722077752622
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 3.447869495967465
epoch: 1 average loss: 1.378
Test Accuracy : 39.9%, Test Loss: 1.3051697027957654
best_acc.pth saved!
Epoch Time (Training + Test) = 21.47 seconds
epoch: 2 average loss: 1.309
Test Accuracy : 39.3%, Test Loss: 1.3000686711362561
Epoch Time (Training + Test) = 31.06 seconds
epoch: 3 average loss: 1.268
Test Accuracy : 44

0,1
Current Best Acc,▁▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▅▇▇▇▇▇▇▆█▇▇▇▆▇▇▇▇▆▇▇▇▇▇█▇█▇▇▇▇█▇▇▇▇█▇▇▇
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇███████▇▇█▇██▇██▇▇████▇█
test_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.56948
Current Best Acc,0.56948
Total Time (hours),2.01464
epoch,100.0
epoch time (s),36.56103
lr,0.1
test_accuracy,0.56932
test_loss,1.02788
training_loss,1.06739


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666672124, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer2_adp_only 


Run Start : 2022-10-22 12-01-28
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4362655
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 3.993683311335571
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 3.9929843113550443
epoch: 1 average loss: 1.288
Test Accuracy : 43.4%, Test Loss: 1.1975495796984115
best_acc.pth saved!
Epoch Time (Training + Test) = 30.25 seconds
epoch: 2 average loss: 1.188
Test Accuracy : 46.7%, Test Loss: 1.1470134069242746
best_acc.pth saved!
Epoch Time (Training + Test) = 28.75 seconds
epoch: 3 average loss: 1.16

0,1
Current Best Acc,▁▅▆▆▇▇▇▇▇▇██████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▇▆▇▇▆▇▇▆▆▁▆▆▆▆▆▆▆▆▆▆▆▆▅▆▇▇▆▆▆▆▆▇█▇▆▇▇██▆
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▅▆▆▆▆▇▆▇▇█████▇█▇█████████████▇▇▇████▇▇
test_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.65848
Current Best Acc,0.65848
Total Time (hours),1.583
epoch,100.0
epoch time (s),28.80798
lr,0.1
test_accuracy,0.61504
test_loss,0.9459
training_loss,0.93563


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer3_adp_only 


Run Start : 2022-10-22 12-49-39
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4440095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.556679776867332
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.5566383127666192
epoch: 1 average loss: 1.539
Test Accuracy : 29.9%, Test Loss: 1.4480573743810434
best_acc.pth saved!
Epoch Time (Training + Test) = 28.51 seconds
epoch: 2 average loss: 1.424
Test Accuracy : 35.6%, Test Loss: 1.4382181316995255
best_acc.pth saved!
Epoch Time (Training + Test) = 28.92 seconds
epoch: 3 average loss: 1.42

## Adp = FullFT

In [None]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


layers = ['layer1','layer2','layer3']
channels = [80,160,320]


for layer,channel in zip(layers,channels):
    for t in ['full_ft']:
        print(t,layer,channel)
        torch.manual_seed(42)
        adapter_model = resnet18_adapt(num_classes=5)
        adapter_model.load_state_dict(BinCifar5_state_dict)
        
        if t == 'adp_only':
            adapter_model.freeze()
        conv_adp = conv_adapter(channel,kernel = 1, padding= 0)
        conv_adp.init_weight_zeros()
        adapter_model.add_adapter(after = layer,adapter = conv_adp)
        trainer = adapter_Trainer(model = adapter_model,seed = 123,model_name = f'adp_{layer}_{t}',project_name = 'Cifar5',classes = classes,binarise= True)
        m = trainer.model
        m.to(trainer.device)
        for head in [m.fc,m.bn3,m.bn3]:
            for p in head.parameters():
                p.requires_grad = True
            head.train()
        trainer.lr = 0.01
        trainer.batch_size = 32
        trainer.epochs =100
        trainer.epoch_chkpts = []
        trainer.start_epoch = 0
        # trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
        trainer.tags = ['finetune',f'Training: {t}',f'Adpater Location {layer}','Init_weight: {} ']
        trainer.train(train59_loader,test59_loader)