# Extra bonus experiment
zhao xinyi
2022.5.2

In [1]:
import os
from slbi_toolbox_adam import SLBI_ToolBox
from utils import *
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import numpy as np
import random
from lenet5 import LeNet5
from get_small_model import *
import matplotlib.pyplot as plt

from plot import CNNLayerVisualization


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
batchsize = 128
max_epoch = 20
lr = 3e-4
kappa = 1
mu = 20
weight_decay = 0
interval = 10
betas = (0.9,0.999)
eps = 1e-8
torch.backends.cudnn.benchmark = True


In [3]:
def set_seeds(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seeds(1234)

In [4]:
device = torch.device('cuda:0')
# model = LeNet5().to(device)

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize,shuffle=True, num_workers=0)
validset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
validloader = torch.utils.data.DataLoader(validset, batch_size=batchsize,shuffle=True, num_workers=0)
testset = torchvision.datasets.MNIST(root='./data', train=False,download=True, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize,shuffle=False, num_workers=0)


In [5]:
def evaluate(model, device, valid_dataloader):
    model.eval()
    total_loss = 0.
    correct = 0.
    total_len = len(valid_dataloader.dataset)
    with torch.no_grad():
        for idx, (data, target) in enumerate(valid_dataloader):
            
            data, target = data.to(device), target.to(device)
            output = model(data) # batch_size * 1
            # total_loss += loss_fn(output, target).item()
            total_loss += F.nll_loss(output, target, reduction = "sum").item()
            pred = output.argmax(dim = 1)
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    total_loss = total_loss / total_len
    acc = correct/total_len
    print("valid loss:{}, Accuracy:{}".format(total_loss, acc)) 
    return total_loss, acc

In [6]:
# training loop
model = LeNet5().cuda()
load_pth = torch.load('lenet.pth')
model.load_state_dict(load_pth['model'])
name_list = []
layer_list = []
for name, p in model.named_parameters():
    name_list.append(name)
    print(name)
    if len(p.data.size()) == 4 or len(p.data.size()) == 2:
        layer_list.append(name)
optimizer = SLBI_ToolBox(model.parameters(), lr=1, kappa=kappa, mu=mu, weight_decay=weight_decay)
# optimizer = SLBI_ToolBox(model.parameters(), lr=lr, kappa=kappa, mu=mu, betas=betas, eps=eps, weight_decay=weight_decay)
# optimizer =  torch.optim.SGD(model.parameters(), lr = 0.04, momentum=0.5,weight_decay=weight_decay)
# optimizer.assign_name(name_list)
# optimizer.initialize_slbi(layer_list)

all_num = max_epoch * len(trainloader)

total_loss = []
acc = []
valid_loss = []
valid_acc = []

print('num of all step:', all_num)
# print('num of step per epoch:', len(trainloader))
for ep in range(max_epoch):
    model.train()
    descent_lr(lr, ep, optimizer, interval)
    loss_val = 0.
    correct = num = 0
    total_len = len(trainloader.dataset)

    for iter, pack in enumerate(trainloader):
        data, target = pack[0].to(device), pack[1].to(device)
        logits = model(data)
        loss = F.nll_loss(logits, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        _, pred = logits.max(1)
        loss_val += loss.item()
        correct += pred.eq(target).sum().item()
        num += data.shape[0]            
        if (iter + 1) % 1000 == 0:
            # print('*******************************')
            print('epoch : ', ep + 1)
            print('iteration : ', iter + 1,'loss : ', loss_val)
            print('Train accuracy : ', correct/num)
            loss_val = 0
            correct = num = 0
        
    total_loss.append(loss_val/num)
    acc.append(correct/num)

    total_loss_0, acc_0 = evaluate(model, device, validloader)
    valid_loss.append(total_loss_0)
    valid_acc.append(acc_0)

    optimizer.update_prune_order(ep)

evaluate_batch(model, testloader, device)

save_model_and_optimizer(model, optimizer, 'lenet_sgd.pth')



conv1.weight
conv1.bias
conv2.weight
conv2.bias
conv3.weight
conv3.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
*******************************************
lr  :  1
kappa  :  1
mu  :  20
betas  :  (0.9, 0.999)
eps  :  1e-08
weight_decay  :  0
dampening  :  0
*******************************************
num of all step: 9380
***********************************
learning rate: 0.0003
***********************************


	addcdiv_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcdiv_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1055.)
  p.data.addcdiv_(-step_size, exp_avg, denom)


valid loss:0.08741218917965889, Accuracy:0.9735
***********************************
learning rate: 0.0003
***********************************
valid loss:0.06728858625888824, Accuracy:0.9792
***********************************
learning rate: 0.0003
***********************************
valid loss:0.06017968087270856, Accuracy:0.9798
***********************************
learning rate: 0.0003
***********************************
valid loss:0.05451583837270737, Accuracy:0.9819
***********************************
learning rate: 0.0003
***********************************
valid loss:0.0587643620878458, Accuracy:0.9808
***********************************
learning rate: 0.0003
***********************************
valid loss:0.042018158844858405, Accuracy:0.987
***********************************
learning rate: 0.0003
***********************************
valid loss:0.04255236992910504, Accuracy:0.987
***********************************
learning rate: 0.0003
***********************************
valid lo

In [None]:
# plot loss curve
evaluate_batch(model, testloader, device)
import matplotlib.pyplot as plt
x1 = range(0, max_epoch)
x2 = range(0, max_epoch)
y1 = acc
y2 = total_loss
y3 = valid_loss
y4 = valid_acc

# 绘制结果
plt.clf()
plt.subplot(2, 1, 1)
# plt.ylim(0.4,1)
plt.plot(x1, y1, '.-')
plt.plot(x1, y4, '.-')



plt.title('train loss vs. epoches')
plt.ylabel('accuracy')


    
l1 = plt.legend(["train", "valid"], loc='lower right')
plt.gca().add_artist(l1)

# plt.subplot(2, 1, 2)
# plt.plot(x2, y2, '.-')
# plt.plot(x2, y3, '.-')
# plt.xlabel('epoches')
# plt.ylabel('train loss')

plt.savefig("dessibli+adam_accuracy.jpg")

In [None]:
from plot import CNNLayerVisualization

for index in range(20):
    cnn_layer = "c5"#useless
    filter_pos =index
    pretrained_model = get_lenet()
    
    # Fully connected layer is not needed
    # pretrained_model = models.resnet18(pretrained=True)
    
    print(type(pretrained_model))
    print(pretrained_model)
    linear=pretrained_model.conv3.weight.data
    print(linear.shape)
    linear=torch.reshape(linear,[120,-1])
    linear=torch.sum(linear*linear,dim=1)
    print(linear)
    for i in range(len(linear)):
        # if linear[i]>0.0 and linear[i]<1:
        #     continue
        print("filter index",i,"\t","filter norm{:.4f}".format(linear[i]))
    print(index)

    layer_vis = CNNLayerVisualization(pretrained_model, cnn_layer, filter_pos)

    # Layer visualization with pytorch hooks
    layer_vis.visualise_layer_with_hooks()

In [12]:
# test prune one layer

torch.backends.cudnn.benchmark = True
# load_pth = torch.load('lenet_sgd.pth')
load_pth = torch.load('lenet_dessilbi.pth')
torch.cuda.empty_cache()
model = LeNet5().cuda()
model.load_state_dict(load_pth['model'])
name_list = []
layer_list = []
for name, p in model.named_parameters():
    name_list.append(name)
    if len(p.data.size()) == 4 or len(p.data.size()) == 2:
        layer_list.append(name)

optimizer = SLBI_ToolBox(model.parameters(), lr=1e-2, kappa=1, mu=20, weight_decay=0)
optimizer.load_state_dict(load_pth['optimizer'])
optimizer.assign_name(name_list)
optimizer.initialize_slbi(layer_list)

print('prune conv3')
print('acc before pruning')
evaluate_batch(model, testloader, 'cuda')
print('acc after pruning')
optimizer.prune_layer_by_order_by_name(60, 'conv3.weight', True)
evaluate_batch(model, testloader, 'cuda')

torch.save(model.state_dict(),'lenet_prune.pth')

print('acc after pruning conv3+fc1')
optimizer.prune_layer_by_order_by_list(60, ['conv3.weight', 'fc1.weight'], True)
evaluate_batch(model, testloader, 'cuda')


*******************************************
lr  :  0.01
kappa  :  1
mu  :  20
betas  :  (0.9, 0.999)
eps  :  1e-08
weight_decay  :  0
dampening  :  0
*******************************************
prune conv3
acc before pruning
Correct :  9706
Num :  10000
Test ACC :  0.9706
Top 5 ACC :  0.9998
acc after pruning
conv3.weight
conv3.bias
Correct :  8925
Num :  10000
Test ACC :  0.8925
Top 5 ACC :  0.9983
acc after pruning conv3+fc1
conv3.weight
conv3.bias
fc1.weight
Correct :  8837
Num :  10000
Test ACC :  0.8837
Top 5 ACC :  0.998


0.8837