### Model preperation

In [43]:
from resnet20 import ResNetCIFAR
from train_util import train, finetune, test
import torch
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [44]:
net = ResNetCIFAR(num_layers=20, Nbits1=None, Nbits2=None, Nbits3=None)
net = net.to(device)

In [45]:
# Load the best weight paramters
net.load_state_dict(torch.load("pretrained_model.pt"))
test(net)

Files already downloaded and verified
Test Loss=0.3231, Test accuracy=0.9151


### (b) and (c): Fixed-point quantization

In [None]:
# Define quantized model and load weight
Nbits = 2 #Change this value to finish (b) and (c)

net = ResNetCIFAR(num_layers=20, Nbits1=Nbits, Nbits2=Nbits, Nbits3=Nbits)
net = net.to(device)
net.load_state_dict(torch.load("pretrained_model.pt"))
test(net)

In [None]:
# Quantized model finetuning
finetune(net, epochs=20, batch_size=128, lr=0.002, reg=1e-4)   # Change epochs to 20 for NBits = 2

# Load the model with best accuracy
net.load_state_dict(torch.load("net_after_finetune.pt"))
test(net)

### (d) and (e): Mixed-precision quantization

In [76]:
from FP_layers import *

Nbits1, Nbits2, Nbits3 = 2,4,4

# Define quantized model and load weight
net = ResNetCIFAR(num_layers=20, Nbits1=Nbits1, Nbits2=Nbits2, Nbits3=Nbits3)  #Change the value of Nbits1, Nbits2, Nbits3 to finish (d) and (e)

net = net.to(device)
net.load_state_dict(torch.load("net_after_finetune.pt"))
test(net)

num_nz = []

# Get weights from modules
for n, m in net.named_modules():
        if isinstance(m, FP_Linear):
            weight = m.linear.weight.data.cpu().numpy()
            weight = weight.flatten()
            num_parameters = weight.shape[0]
            num_nonzero_parameters = (weight != 0).sum()
            num_nz.append(num_nonzero_parameters)
        elif isinstance(m, FP_Conv):
            weight = m.conv.weight.data.cpu().numpy()
            weight = weight.flatten()
            num_parameters = weight.shape[0]
            num_nonzero_parameters = (weight != 0).sum()
            num_nz.append(num_nonzero_parameters)

# Calculate average bits
first_and_last = (num_nz[0] + num_nz[21])*32

# 6 in each block
first_block = np.array(num_nz[1:7]).sum() * Nbits1
second_block = np.array(num_nz[7:14]).sum() * Nbits2
third_block = np.array(num_nz[14:21]).sum() * Nbits3

# Get total
total = np.array(num_nz).sum()

average_bits = (first_and_last + first_block + second_block + third_block)/total
average_bits

Files already downloaded and verified
Test Loss=6.8381, Test accuracy=0.2044


4.008741361998701

In [None]:
# Quantized model finetuning
finetune(net, epochs=10, batch_size=128, lr=0.002, reg=1e-4)

# Load the model with best accuracy
net.load_state_dict(torch.load("net_after_finetune.pt"))
test(net)