In [1]:
from NN_Thesis.nn_classes import *
from NN_Thesis.trainer import *
from NN_Thesis.adapters import *
import torch
import torchvision

import torch.nn as nn
import torch.nn.functional as F
from torch import optim


from torchvision.transforms import transforms
import numpy as np
import os
from PIL import Image
import random

from matplotlib import pyplot as plt

import wandb


In [2]:
from NN_Thesis.models import *

In [4]:
torch.manual_seed(123)
random.seed(123)
batch_size = 16


from NN_Thesis.dataset import cifar_n_dataset
from torch.utils.data import DataLoader

train_path = 'data/cifar_5/cifar_04/train/data'
test_path ='data/cifar_5/cifar_04/test/data'

train_data = cifar_n_dataset(train_path)
test_data = cifar_n_dataset(train_path)
print(train_data.data.shape)

def normalize_channels(data):
    #We have a nxCxWxH array
    d = data
    d = torch.flatten(data,2,-1).to(dtype = torch.float32)/255
    mean= torch.mean(d,dim = [0,2])
    std = torch.std(d,dim = [0,2])
    return mean,std

mean,std = normalize_channels(train_data.data)

train_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize(mean, std),
])


test_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

train_data.transform = train_transform
test_data.transform = test_transform



torch.Size([25000, 3, 32, 32])


In [4]:
train_dataloader = DataLoader(train_data,batch_size =64,shuffle = True)

x = train_data[0]
img,label = next(iter(train_dataloader))

img.shape


torch.Size([64, 3, 32, 32])

# Code For Initial Cifar5 Training

In [6]:
# train_dataloader = DataLoader(train_data,batch_size =32,shuffle = True)
# test_dataloader = DataLoader(test_data,batch_size =32)

# classes = tuple(train_data.label_names)

BNN_resnet18 = resnet_binary(num_classes = 5 , depth = 18, dataset = 'cifar10')
resnet18 = resnet(num_classes = 5 , depth = 18, dataset = 'cifar10')


ImageNet_resnet18 = resnet_binary(num_classes = 5 , depth = 18, dataset = 'imagenet')

for name,p in BNN_resnet18.named_parameters():
    print(name,p.numel())

print('\n\n','*'*100,'\n\n')

for name,p in resnet18.named_parameters():
    print(name,p.numel())

print('\n\n','*'*100,'\n\n')

for name,p in ImageNet_resnet18.named_parameters():
    print(name,p.numel())


# BNN_trainer = Trainer(BNN_resnet18,model_name = 'Baseline_BNN_Resnet18',project_name = 'Cifar5',classes = classes,seed = 123,binarise = True)
# resnet_trainer =Trainer(resnet18,model_name = 'Baseline_Resnet18',project_name='Cifar5',classes = classes,seed = 123)

# #Set Training Params
# for trainer in [resnet_trainer,BNN_trainer]:
#     trainer.lr = 0.1
#     trainer.epochs = 200
#     trainer.train(train_dataloader,test_dataloader)

conv1.weight 2160
bn1.weight 80
bn1.bias 80
layer1.0.conv1.weight 57600
layer1.0.bn1.weight 80
layer1.0.bn1.bias 80
layer1.0.conv2.weight 57600
layer1.0.bn2.weight 80
layer1.0.bn2.bias 80
layer1.1.conv1.weight 57600
layer1.1.bn1.weight 80
layer1.1.bn1.bias 80
layer1.1.conv2.weight 57600
layer1.1.bn2.weight 80
layer1.1.bn2.bias 80
layer2.0.conv1.weight 115200
layer2.0.bn1.weight 160
layer2.0.bn1.bias 160
layer2.0.conv2.weight 230400
layer2.0.bn2.weight 160
layer2.0.bn2.bias 160
layer2.0.downsample.0.weight 12800
layer2.0.downsample.1.weight 160
layer2.0.downsample.1.bias 160
layer2.1.conv1.weight 230400
layer2.1.bn1.weight 160
layer2.1.bn1.bias 160
layer2.1.conv2.weight 230400
layer2.1.bn2.weight 160
layer2.1.bn2.bias 160
layer3.0.conv1.weight 460800
layer3.0.bn1.weight 320
layer3.0.bn1.bias 320
layer3.0.conv2.weight 921600
layer3.0.bn2.weight 320
layer3.0.bn2.bias 320
layer3.0.downsample.0.weight 51200
layer3.0.downsample.1.weight 320
layer3.0.downsample.1.bias 320
layer3.1.conv1.weigh

# Code For Finetuning

In [6]:
train_path = 'data/cifar_5/cifar_59/train/data'
test_path ='data/cifar_5/cifar_59/test/data'

train59_data = cifar_n_dataset(train_path)
test59_data = cifar_n_dataset(train_path)
# print(train_data.data.shape)

def normalize_channels(data):
    #We have a nxCxWxH array
    d = data
    d = torch.flatten(data,2,-1).to(dtype = torch.float32)/255
    mean= torch.mean(d,dim = [0,2])
    std = torch.std(d,dim = [0,2])
    return mean,std

mean,std = normalize_channels(train59_data.data)

train59_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize(mean, std),
])


test59_transform = transforms.Compose([
    # transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

train59_data.transform = train_transform
test59_data.transform = test_transform
train59_loader = DataLoader(train59_data,batch_size=64,shuffle=True)
test59_loader = DataLoader(test59_data,batch_size=64,shuffle=True)
classes = tuple(train59_data.label_names)
classes

('dog', 'frog', 'horse', 'ship', 'truck')

## Feature Extraction

In [5]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)

feat_extr_model = resnet18_adapt(num_classes= 5)
feat_extr_model.load_state_dict(BinCifar5_state_dict)
feat_extr_model.freeze()
feat_extr_model.fc = BinarizeLinear(64*5,5)

feat_ex = Trainer(feat_extr_model,model_name = 'feat_extract',project_name = 'Cifar5',classes = classes,seed = 123,binarise = True)
feat_ex.lr = 0.01
feat_ex.epochs = 20
feat_ex.tags =['finetune']
feat_ex.train(train59_loader,test59_loader)




Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Scheduler Set {'T_max': 2, 'eta_min': 0, 'base_lrs': [0.1], 'last_epoch': 0, '_step_count': 1, 'verbose': False, '_get_lr_called_within_step': False, '_last_lr': [0.1]}


[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5, Run Name feat_extract_2022_10_21_15_16_40 


start_epoch : 0
initial_lr : 0.01
batch_size : 16
epochs : 20
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Initial accuracy:
Test Accuracy : 16.2%, Test Loss: 2.1748023987426173
best_acc.pth saved!
Test Accuracy : 16.3%, Test Loss: 2.1617062100973885
best_acc.pth saved!
epoch: 1 average loss: 1.018
Test Accuracy : 71.7%, Test Loss: 0.778345244086307
best_acc.pth saved!
Epoch Time (Training + Test) = 15.59 seconds
epoch: 2 average loss: 0.789
Test Accuracy : 72.5%, Test Loss: 0.7703028683314848
best_acc.pth saved!
Epoch Time (Training + Test) = 15.49 seconds
epoch: 3 average loss: 0.818
Test Accuracy : 72.7%, Test

In [5]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


finetune_model = resnet18_adapt(num_classes= 5)
finetune_model.load_state_dict(BinCifar5_state_dict)
finetune_model.freeze()
finetune_model.fc = BinarizeLinear(64*5,5)

finetune = Trainer(finetune_model,model_name = 'Regular_finetune',project_name='Cifar5',classes = classes,seed = 123,binarise = True)
finetune.lr = 0.01
finetune.epochs = 20
finetune.tags = ['finetune']
m = finetune.model
# m.to(finetune.device)
# for layer in [m.fc,m.bn3,m.bn3]:
#     for p in layer.parameters():
#         p.requires_grad = True
#     layer.train()
# finetune.train(train59_loader,test59_loader)


#FineTune
finetune.epochs = 100
finetune.lr = 0.1
finetune.optimizer = optim.SGD(finetune.model.parameters(), lr=finetune.lr, momentum=0.9)
finetune.scheduler = optim.lr_scheduler.CosineAnnealingLR(finetune.optimizer,finetune.epochs)
m.unfreeze()
finetune.train(train59_loader,test59_loader)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Scheduler Set {'T_max': 10, 'eta_min': 0, 'base_lrs': [0.1], 'last_epoch': 0, '_step_count': 1, 'verbose': False, '_get_lr_called_within_step': False, '_last_lr': [0.1]}


[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5, Run Name Regular_finetune 


Run Start : 2022-10-21 21-04-04
start_epoch : 0
initial_lr : 0.1
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4336415
Initial accuracy:
Test Accuracy : 16.2%, Test Loss: 2.1748023987426173
best_acc.pth saved!
Test Accuracy : 16.3%, Test Loss: 2.1617062100973885
best_acc.pth saved!
epoch: 1 average loss: 0.809
Test Accuracy : 76.2%, Test Loss: 0.6314606831201812
best_acc.pth saved!
Epoch Time (Training + Test) = 27.36 seconds
epoch: 2 average loss: 0.646
Test Accuracy : 77.6%, Test Loss: 0.6018474381750502
best_acc.pth saved!
Epoch Time (Training + Test) = 26.99 seconds
epoch: 3 a

In [17]:
a = finetune.get_param_info()

z = finetune.model
element_info = dict()
for layer in z.parameters():
    if str(layer.dtype) not in element_info.keys():
        element_info[str(layer.dtype)] = layer.numel()
    else:
        element_info[str(layer.dtype)] += layer.numel()

element_info[str(layer.dtype)]

4336415

# Adapter FineTune

## Single Autoencoder After bn2 

In [8]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('BinaryCifar5_best_acc.pth',map_location= device)

bottleneck = autoencoder_adapter(320,200)
adapt_fine = resnet18_adapt(num_classes=5)
adapt_fine.load_state_dict(Bin_state_dict)
adapt_fine.freeze()
adapt_fine.add_adapter(after = 'bn2',adapter = bottleneck)

adapt_fine_trainer = adapter_Trainer(model = adapt_fine,seed = 123,model_name = 'adapt_fine',project_name='Cifar5',classes = classes,binarise= True)
for trainer in [adapt_fine_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =70
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 70
    # trainer.scheduler.T_max= trainer.T_max
    # trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
    trainer.train(train59_loader,test59_loader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 70
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 1.388 Epoch Time 0.14 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_latest.pth saved!
Overall Accuracy : 44.1%
best_acc.pth saved!
epoch: 2 average loss: 1.144 Epoch Time 0.28 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_latest.pth saved!
Overall Accuracy : 52.3%
best_acc.pth saved!
epoch: 3 average loss: 1.068 Epoch Time 0.42 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\adapt_fine_best_loss.pth saved!
c:\Users\John Su\Downloads\Sydn

## 2 Adapters


In [20]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('BinaryCifar5_best_acc.pth',map_location= device)

# bottleneck = autoencoder_adapter(320,200)
conv_bn = conv_adapter(80,kernel = 1, padding= 0)

adapt_fine2 = resnet18_adapt(num_classes=5)
adapt_fine2.load_state_dict(Bin_state_dict)
adapt_fine2.freeze()

# adapt_fine2.add_adapter(after = 'bn2',adapter = bottleneck)
adapt_fine2.add_adapter(after = 'layer1',adapter = conv_bn)

adapt_fine2_trainer = adapter_Trainer(model = adapt_fine2,seed = 123,name = 'adapt_fine2',classes = classes,binarise= True)
for trainer in [adapt_fine2_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 32
    trainer.epochs =50
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 50
    trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
    trainer.train(train59_loader,test59_loader)

start
start_epoch : 0
lr : 0.01
batch_size : 32
epochs : 50
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([64, 320, 8, 8])
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([64, 320, 8, 8])
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([64, 320, 8, 8])
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([64, 320, 8, 8])
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([64, 320, 8, 8])
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([64, 320, 8, 8])
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([64, 320, 8, 8])
torch.Size([64, 80, 32, 32])
torch.Size([64, 160, 16, 16])
torch.Size([6

KeyboardInterrupt: 

# Weight Comparison


In [55]:
def compare_weights(main_model:nn.Module,*models:nn.Module):

    main_weights = dict()
    for (name, param) in main_model.named_parameters():
        main_weights[name] = param

    n = len( list(main_model.named_parameters()))
    print(n)
    
    for model in models:
        i = 0
        for name, param in model.named_parameters():
            if name in main_weights.keys():
                main_weight = main_weights[name]
                if not (torch.all(main_weight == param)):
                    print(f'parameters {name} in main_model and other model do not match')
            else:
                print(f'name {name} does not exist in original model')

In [56]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('BinaryCifar5_best_acc.pth',map_location= device)
fineTune_dict = torch.load('adapt_fine2_best_acc.pth',map_location= device)

og_model =  resnet18_adapt(num_classes=5)
og_model.load_state_dict(Bin_state_dict)
og_model.to(device = 'cpu')




finetune = resnet18_adapt(num_classes=5)

finetune.to(device = 'cpu')
bottleneck = autoencoder_adapter(320,200)
conv_bn = conv_bottleneck_adapter(80,40,kernel = 3, padding= 1)
finetune.add_adapter(after = 'bn2',adapter = bottleneck)
finetune.add_adapter(after = 'layer1',adapter = conv_bn)

finetune.load_state_dict(fineTune_dict)

# h = resnet18_adapt(num_classes=5)
# x = resnet18_adapt(num_classes=5)
compare_weights(og_model,finetune)


# h = resnet18_adapt(num_classes=5)
# x = resnet18_adapt(num_classes=5)

# print(torch.all(h.weight == x.weight))

# compare_weights(h,x)
# print(h.weight)
# adapt_fine2.conv1.weight

51
name layer1.2.conv1.weight does not exist in original model
name layer1.2.bn1.weight does not exist in original model
name layer1.2.bn1.bias does not exist in original model
name layer1.2.deconv1.weight does not exist in original model
name layer1.2.bn2.weight does not exist in original model
name layer1.2.bn2.bias does not exist in original model
parameters bn3.weight in main_model and other model do not match
parameters bn3.bias in main_model and other model do not match
parameters fc.weight in main_model and other model do not match
name adapter_dict.bn2,autoencoder_adapter,0.l_in.weight does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.l_in.bias does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.bn1.weight does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.bn1.bias does not exist in original model
name adapter_dict.bn2,autoencoder_adapter,0.l_out.weight does not exist in original model
name adapter_di

In [15]:
# from NN_Thesis.models.binarized_modules import BinarizeConv2d
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('test_best_acc.pth',map_location= device)

bt_model = resnet18_adapt()
bt_model.load_state_dict(Bin_state_dict)

conv_model = resnet18_adapt()
conv_model.load_state_dict(Bin_state_dict)

conv_bt_model = resnet18_adapt()
conv_bt_model.load_state_dict(Bin_state_dict)



adapters = [
    (bottleneck_adapter((80,32,32),5),bottleneck_adapter((160,16,16),5),bottleneck_adapter((320,8,8),5)),
    (conv_adapter(80,kernel= 3,padding = 1),conv_adapter(160,kernel= 3,padding = 1),conv_adapter(320,kernel=1)),
    (conv_bottleneck_adapter(80,80,kernel=3,padding=1),conv_bottleneck_adapter(160,80,kernel=3,padding=1),conv_bottleneck_adapter(320,160,kernel=3,padding=1))
]


#Set all layer requires grad to false and set to eval mode()
for m,adpts in zip([bt_model,conv_model,conv_bt_model],adapters):
    m.freeze()
    m.add_adapter(after = 'layer1',adapter =adpts[0])
    m.add_adapter(after = 'layer2',adapter =adpts[1])
    m.add_adapter(after = 'layer3',adapter =adpts[2])


# bottle_trainer = adapter_Trainer(model = bt_model,seed = 123,name = 'Bottleneck_BNN2',classes = classes,binarise= True)
conv_trainer = adapter_Trainer(model = conv_model,seed = 123,name = 'Conv_BNN2',classes = classes,binarise= True)
conv_bt_trainer = adapter_Trainer(model = conv_bt_model,seed = 123,name = 'ConvBN_BNN2',classes = classes,binarise= True)
# trainer.load('test_150.pth',map_location= device,load_model= True)
for trainer in [conv_trainer,conv_bt_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =80
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 80
    # trainer.scheduler.T_max= trainer.T_max
    trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.ExponentialLR(trainer.optimizer, gamma=0.95)
    trainer.train(trainloader,testloader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 80
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.01
    lr: 0.01
    maximize: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 2.592 Epoch Time 0.89 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_latest.pth saved!
Overall Accuracy : 29.9%
best_acc.pth saved!
epoch: 2 average loss: 1.996 Epoch Time 1.77 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_latest.pth saved!
Overall Accuracy : 26.5%
epoch: 3 average loss: 1.882 Epoch Time 2.64 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\Conv_BNN2_best_loss.pth saved!
c:\Users\John Su\Downloads\S

# DEBUGGING TIME


In [6]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('test_best_acc.pth',map_location= device)

debug = resnet18_adapt()
debug.load_state_dict(Bin_state_dict)
debug.freeze()
debug.add_adapter(after = 'layer3',adapter = identity_adapter())

debug_trainer = adapter_Trainer(model = debug,seed = 123,name = 'debug',classes = classes,binarise= True)
for trainer in [debug_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =10
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 80
    # trainer.scheduler.T_max= trainer.T_max
    trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.ExponentialLR(trainer.optimizer, gamma=0.95)
    trainer.train(trainloader,testloader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 10
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.01
    lr: 0.01
    maximize: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 0.448 Epoch Time 0.49 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_latest.pth saved!
Overall Accuracy : 83.7%
best_acc.pth saved!
epoch: 2 average loss: 0.447 Epoch Time 0.98 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_latest.pth saved!
Overall Accuracy : 83.7%
best_acc.pth saved!
epoch: 3 average loss: 0.445 Epoch Time 1.47 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\debug_best_loss.pth saved!
c:\Users\John Su\Downloads\S

In [5]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Bin_state_dict = torch.load('test_best_acc.pth',map_location= device)

adapter = mini_bottleneck_adapter()

mini_adapt_model = resnet18_adapt()
mini_adapt_model.load_state_dict(Bin_state_dict)
mini_adapt_model.freeze()
mini_adapt_model.add_adapter(after = 'layer3',adapter = adapter)

mini_adapt_trainer = adapter_Trainer(model = mini_adapt_model,seed = 123,name = 'mini_adapt',classes = classes,binarise= True)
for trainer in [mini_adapt_trainer]:
    m = trainer.model
    m.to(trainer.device)
    for layer in [m.fc,m.bn3,m.bn3]:
        for p in layer.parameters():
            p.requires_grad = True
        layer.train()
    trainer.lr = 0.01
    trainer.batch_size = 128
    trainer.epochs =10
    trainer.epoch_chkpts = []
    trainer.start_epoch = 0
    trainer.T_max = 80
    # trainer.scheduler.T_max= trainer.T_max
    trainer.optimizer = optim.Adam(trainer.model.parameters(), lr=trainer.lr)
    trainer.scheduler = optim.lr_scheduler.ExponentialLR(trainer.optimizer, gamma=0.95)
    trainer.train(trainloader,testloader)

start
start_epoch : 0
lr : 0.01
batch_size : 128
epochs : 10
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.01
    lr: 0.01
    maximize: False
    weight_decay: 0
)
seed : 123
epoch: 1 average loss: 0.447 Epoch Time 0.56 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_latest.pth saved!
Overall Accuracy : 83.6%
best_acc.pth saved!
epoch: 2 average loss: 0.447 Epoch Time 1.12 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_best_loss.pth saved!
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_latest.pth saved!
Overall Accuracy : 83.7%
best_acc.pth saved!
epoch: 3 average loss: 0.444 Epoch Time 1.69 mins
c:\Users\John Su\Downloads\SydneyUni\thesis\Thesis\mini_adapt_best_loss.pth saved!
c:\

# Adapter Location

In [5]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


layers = ['layer1','layer2','layer3']
channels = [80,160,320]


for layer,channel in zip(layers,channels):
    for t in ['adp_only','full_ft']:
        print(t,layer,channel)
        torch.manual_seed(42)
        adapter_model = resnet18_adapt(num_classes=5)
        adapter_model.load_state_dict(BinCifar5_state_dict)
        
        if t == 'adp_only':
            adapter_model.freeze()
        conv_adp = conv_adapter(channel,kernel = 1, padding= 0)
        adapter_model.add_adapter(after = layer,adapter = conv_adp)
        trainer = adapter_Trainer(model = adapter_model,seed = 123,model_name = f'adp_{layer}_{t}',project_name = 'Cifar5',classes = classes,binarise= True)
        m = trainer.model
        m.to(trainer.device)
        for head in [m.fc,m.bn3,m.bn3]:
            for p in head.parameters():
                p.requires_grad = True
            head.train()
        trainer.lr = 0.01
        trainer.batch_size = 32
        trainer.epochs =100
        trainer.epoch_chkpts = []
        trainer.start_epoch = 0
        # trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
        trainer.tags = ['finetune',f'Training: {t}',f'Adpater Location {layer}']
        trainer.train(train59_loader,test59_loader)

adp_only layer1 80
Scheduler Set {'T_max': 10, 'eta_min': 0, 'base_lrs': [0.1], 'last_epoch': 0, '_step_count': 1, 'verbose': False, '_get_lr_called_within_step': False, '_last_lr': [0.1]}


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5, Run Name adp_layer1_adp_only 


Run Start : 2022-10-21 22-03-33
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4343135
Initial accuracy:
Test Accuracy : 8.8%, Test Loss: 4.613198432776019
best_acc.pth saved!
Test Accuracy : 8.4%, Test Loss: 4.8334173107391125
epoch: 1 average loss: 1.031
Test Accuracy : 72.9%, Test Loss: 0.7262849130136583
best_acc.pth saved!
Epoch Time (Training + Test) = 21.24 seconds
epoch: 2 average loss: 0.720
Test Accuracy : 76.9%, Test Loss: 0.6304557144336993
best_acc.pth saved!
Epoch Time (Training + Test) = 21.17 seconds
epoch: 3 average loss: 0.676


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▆▅▆▄▅▅▅▆▆▆▆▆▅▆▅▆▆▆▆▆▅▆▆▆▆▇▅▆▅▅▅▆▆▆▆▆▇▆█
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.84712
Total Time (hours),1.17049
epoch,100.0
epoch time (s),22.58553
lr,0.1
test_accuracy,0.83644
test_loss,0.45782
training_loss,0.49542


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333435779, max=1.0)…

start
Project Name: Cifar5, Run Name adp_layer1_full_ft 


Run Start : 2022-10-21 22-39-02
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4343135
Initial accuracy:
Test Accuracy : 8.8%, Test Loss: 4.613198432776019
best_acc.pth saved!
Test Accuracy : 8.4%, Test Loss: 4.8334173107391125
epoch: 1 average loss: 0.961
Test Accuracy : 73.8%, Test Loss: 0.7023793851475582
best_acc.pth saved!
Epoch Time (Training + Test) = 30.91 seconds
epoch: 2 average loss: 0.694
Test Accuracy : 77.2%, Test Loss: 0.6077157112643542
best_acc.pth saved!
Epoch Time (Training + Test) = 30.88 seconds
epoch: 3 average loss: 0.642
T

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁██▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▇▆▆▆
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.86628
Total Time (hours),1.57341
epoch,100.0
epoch time (s),27.52569
lr,0.1
test_accuracy,0.85088
test_loss,0.40504
training_loss,0.44411


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666899498, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer2_adp_only 


Run Start : 2022-10-21 23-26-45
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4362655
Initial accuracy:
Test Accuracy : 7.1%, Test Loss: 4.672063447630314
best_acc.pth saved!
Test Accuracy : 6.3%, Test Loss: 4.930686494578486
epoch: 1 average loss: 0.767
Test Accuracy : 78.2%, Test Loss: 0.5941561580923818
best_acc.pth saved!
Epoch Time (Training + Test) = 16.50 seconds
epoch: 2 average loss: 0.594
Test Accuracy : 80.0%, Test Loss: 0.545416933496285
best_acc.pth saved!
Epoch Time (Training + Test) = 16.56 seconds
epoch: 3 average loss: 0.555
Te

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▆▆▆▅▅▅▅▆▆▆▆▇▅▅▆▆▆▆▇▆▆▅▇█▆▇▆▆▅▆▇█▆▅▆▅▅▇▆
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.87696
Total Time (hours),0.92809
epoch,100.0
epoch time (s),16.66766
lr,0.1
test_accuracy,0.87044
test_loss,0.35957
training_loss,0.40972


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer2_full_ft 


Run Start : 2022-10-21 23-55-03
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4362655
Initial accuracy:
Test Accuracy : 7.1%, Test Loss: 4.672063447630314
best_acc.pth saved!
Test Accuracy : 6.3%, Test Loss: 4.930686494578486
epoch: 1 average loss: 0.734
Test Accuracy : 80.0%, Test Loss: 0.5462298654686765
best_acc.pth saved!
Epoch Time (Training + Test) = 27.22 seconds
epoch: 2 average loss: 0.556
Test Accuracy : 80.0%, Test Loss: 0.5506100953768587
Epoch Time (Training + Test) = 27.24 seconds
epoch: 3 average loss: 0.517
Test Accuracy : 82.7%,

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▇▇▇▇▇▇▇▇███▇▇▇██▇▇█▇▇▇▇█▇▇▇▇▇▇▇███▇█▇█▇
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▇██████████████████████████████████████
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.89184
Total Time (hours),1.51186
epoch,100.0
epoch time (s),26.91144
lr,0.1
test_accuracy,0.88444
test_loss,0.32098
training_loss,0.37227


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer3_adp_only 


Run Start : 2022-10-22 00-40-53
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4440095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.5574629739727204
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.562192482716592
epoch: 1 average loss: 1.608
Test Accuracy : 37.7%, Test Loss: 1.4049944045293667
best_acc.pth saved!
Epoch Time (Training + Test) = 14.85 seconds
epoch: 2 average loss: 1.413
Test Accuracy : 39.0%, Test Loss: 1.3817411118456164
best_acc.pth saved!
Epoch Time (Training + Test) = 14.84 seconds
epoch: 3 average loss: 1.39

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▃▅▅▃▆▅▂▃▂▂▃▂▄▄▇▆█▇▆█▆█▆▅▅▅▅▅▅▅▅▅▆▅▅▅▅▅▅
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▄▃▄▆▅▃▅▆▇▃▇▅▆▅▇▇▇▇▇▄▅▇▇▇▇▇▆▇██▆█▇▇▇▆███
test_loss,█▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.6544
Total Time (hours),0.8436
epoch,100.0
epoch time (s),15.25286
lr,0.1
test_accuracy,0.6544
test_loss,0.94395
training_loss,0.99839


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333332417533, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer3_full_ft 


Run Start : 2022-10-22 01-06-40
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4440095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.5574629739727204
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.562192482716592
epoch: 1 average loss: 1.513
Test Accuracy : 42.5%, Test Loss: 1.2526966662663024
best_acc.pth saved!
Epoch Time (Training + Test) = 26.63 seconds
epoch: 2 average loss: 1.219
Test Accuracy : 51.4%, Test Loss: 1.1087592250245917
best_acc.pth saved!
Epoch Time (Training + Test) = 26.72 seconds
epoch: 3 average loss: 1.090

In [13]:
b = resnet18_adapt(num_classes=5)

b.eval()
x = torch.rand((1,3,32,32))

b(x)

tensor([[ -6.1048, -10.0902, -42.1078,  -0.1369,  -2.0739]],
       grad_fn=<LogSoftmaxBackward0>)

In [18]:
b.layer1

Sequential(
  (0): BasicBlock(
    (conv1): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (tanh1): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (conv2): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (tanh2): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (bn2): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (tanh1): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (conv2): BinarizeConv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (tanh2): Hardtanh(min_val=-1.0, max_val=1.0, inplace=True)
    (bn2): BatchNorm2d(80

# Adapters with init at zero

## Adp Only Training

In [7]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


layers = ['layer1','layer2','layer3']
channels = [80,160,320]


for layer,channel in zip(layers,channels):
    for t in ['adp_only']:
        print(t,layer,channel)
        torch.manual_seed(42)
        adapter_model = resnet18_adapt(num_classes=5)
        adapter_model.load_state_dict(BinCifar5_state_dict)
        
        if t == 'adp_only':
            adapter_model.freeze()
        conv_adp = conv_adapter(channel,kernel = 1, padding= 0)
        conv_adp.init_weight_zeros()

        adapter_model.add_adapter(after = layer,adapter = conv_adp)
        trainer = adapter_Trainer(model = adapter_model,seed = 123,model_name = f'adp_{layer}_{t}',project_name = 'Cifar5',classes = classes,binarise= True)
        m = trainer.model
        m.to(trainer.device)
        for head in [m.fc,m.bn3,m.bn3]:
            for p in head.parameters():
                p.requires_grad = True
            head.train()
        trainer.lr = 0.01
        trainer.batch_size = 32
        trainer.epochs =100
        trainer.epoch_chkpts = []
        trainer.start_epoch = 0
        # trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
        trainer.tags = ['finetune',f'Training: {t}',f'Adpater Location {layer}']
        trainer.train(train59_loader,test59_loader)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


adp_only layer1 80
Scheduler Set {'T_max': 10, 'eta_min': 0, 'base_lrs': [0.1], 'last_epoch': 0, '_step_count': 1, 'verbose': False, '_get_lr_called_within_step': False, '_last_lr': [0.1]}


[34m[1mwandb[0m: Currently logged in as: [33mjohnny_suu[0m. Use [1m`wandb login --relogin`[0m to force relogin


start
Project Name: Cifar5, Run Name adp_layer1_adp_only 


Run Start : 2022-10-22 11-00-36
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4343135
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 3.4470722077752622
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 3.447869495967465
epoch: 1 average loss: 1.378
Test Accuracy : 39.9%, Test Loss: 1.3051697027957654
best_acc.pth saved!
Epoch Time (Training + Test) = 21.47 seconds
epoch: 2 average loss: 1.309
Test Accuracy : 39.3%, Test Loss: 1.3000686711362561
Epoch Time (Training + Test) = 31.06 seconds
epoch: 3 average loss: 1.268
Test Accuracy : 44

0,1
Current Best Acc,▁▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▁▅▇▇▇▇▇▇▆█▇▇▇▆▇▇▇▇▆▇▇▇▇▇█▇█▇▇▇▇█▇▇▇▇█▇▇▇
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇███████▇▇█▇██▇██▇▇████▇█
test_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.56948
Current Best Acc,0.56948
Total Time (hours),2.01464
epoch,100.0
epoch time (s),36.56103
lr,0.1
test_accuracy,0.56932
test_loss,1.02788
training_loss,1.06739


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666672124, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer2_adp_only 


Run Start : 2022-10-22 12-01-28
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4362655
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 3.993683311335571
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 3.9929843113550443
epoch: 1 average loss: 1.288
Test Accuracy : 43.4%, Test Loss: 1.1975495796984115
best_acc.pth saved!
Epoch Time (Training + Test) = 30.25 seconds
epoch: 2 average loss: 1.188
Test Accuracy : 46.7%, Test Loss: 1.1470134069242746
best_acc.pth saved!
Epoch Time (Training + Test) = 28.75 seconds
epoch: 3 average loss: 1.16

0,1
Current Best Acc,▁▅▆▆▇▇▇▇▇▇██████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch time (s),▇▆▇▇▆▇▇▆▆▁▆▆▆▆▆▆▆▆▆▆▆▆▅▆▇▇▆▆▆▆▆▇█▇▆▇▇██▆
lr,█▇▅▂▁▂▅▇█▇▅▂▁▂▅▇█▇▃▂▁▂▆▇█▆▃▁▁▃▆█▇▆▂▁▂▃▇█
test_accuracy,▁▅▆▆▆▆▇▆▇▇█████▇█▇█████████████▇▇▇████▇▇
test_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Best Accuracy,0.65848
Current Best Acc,0.65848
Total Time (hours),1.583
epoch,100.0
epoch time (s),28.80798
lr,0.1
test_accuracy,0.61504
test_loss,0.9459
training_loss,0.93563


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

start
Project Name: Cifar5, Run Name adp_layer3_adp_only 


Run Start : 2022-10-22 12-49-39
start_epoch : 0
initial_lr : 0.01
batch_size : 32
epochs : 100
epoch_chkpts : []
device : cuda:0
criterion : CrossEntropyLoss()
optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.1
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
seed : 123
model_architecture : <class 'NN_Thesis.nn_classes.resnet18_adapt'>
binerised_training : True
Number of Elements : 4440095
Initial accuracy:
Test Accuracy : 20.0%, Test Loss: 2.556679776867332
best_acc.pth saved!
Test Accuracy : 20.0%, Test Loss: 2.5566383127666192
epoch: 1 average loss: 1.539
Test Accuracy : 29.9%, Test Loss: 1.4480573743810434
best_acc.pth saved!
Epoch Time (Training + Test) = 28.51 seconds
epoch: 2 average loss: 1.424
Test Accuracy : 35.6%, Test Loss: 1.4382181316995255
best_acc.pth saved!
Epoch Time (Training + Test) = 28.92 seconds
epoch: 3 average loss: 1.42

## Adp = FullFT

In [None]:

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
PATH = '.\SavedModels\Cifar5\Baseline_BNN_Resnet18_2022_10_21_10_47_20\Baseline_BNN_Resnet18_best_acc.pth'
BinCifar5_state_dict = torch.load(PATH,map_location= device)


layers = ['layer1','layer2','layer3']
channels = [80,160,320]


for layer,channel in zip(layers,channels):
    for t in ['full_ft']:
        print(t,layer,channel)
        torch.manual_seed(42)
        adapter_model = resnet18_adapt(num_classes=5)
        adapter_model.load_state_dict(BinCifar5_state_dict)
        
        if t == 'adp_only':
            adapter_model.freeze()
        conv_adp = conv_adapter(channel,kernel = 1, padding= 0)
        conv_adp.init_weight_zeros()
        adapter_model.add_adapter(after = layer,adapter = conv_adp)
        trainer = adapter_Trainer(model = adapter_model,seed = 123,model_name = f'adp_{layer}_{t}',project_name = 'Cifar5',classes = classes,binarise= True)
        m = trainer.model
        m.to(trainer.device)
        for head in [m.fc,m.bn3,m.bn3]:
            for p in head.parameters():
                p.requires_grad = True
            head.train()
        trainer.lr = 0.01
        trainer.batch_size = 32
        trainer.epochs =100
        trainer.epoch_chkpts = []
        trainer.start_epoch = 0
        # trainer.scheduler = optim.lr_scheduler.CosineAnnealingLR(trainer.optimizer, T_max= trainer.epochs)
        trainer.tags = ['finetune',f'Training: {t}',f'Adpater Location {layer}','Init_weight: {} ']
        trainer.train(train59_loader,test59_loader)