In [5]:
from imports import *
from utils import *
from model import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data loading

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

dataset = 'MNIST'
data_path = 'raw/'
train_dataset = torchvision.datasets.MNIST(root=data_path, train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root=data_path, train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
# Model Definition

num_classes = 10

# Initialize the model with the given configuration and number of classes, and move it to the specified device
model = NET_4k_MNIST(num_classes).to(device)

total_weights = count_weights(model)
print(f"The network has {total_weights} weights that need to be deployed.")

model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model = torch.quantization.prepare_qat(model, inplace=True)
model = torch.quantization.convert(model, inplace=True)

The network has 499 weights that need to be deployed.


In [7]:
# Load Pretrained Model

# Specify the filename of the saved model
model_filename = f"checkpoint/MNIST-499-96.24_model.t7"

# Load the checkpoint from the file, mapping the model to the specified device
checkpoint = torch.load(model_filename, map_location=device)

# Load the model state dictionary from the checkpoint
model.load_state_dict(checkpoint['model'])

print(model) # The model has been quantized to int8

NET_4k_MNIST(
  (quant): Quantize(scale=tensor([0.0256]), zero_point=tensor([17]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (conv1_depthwise): QuantizedConv2d(1, 1, kernel_size=(3, 3), stride=(1, 1), scale=0.49285900592803955, zero_point=88, padding=(1, 1), dilation=(2, 2))
  (conv1_pointwise): QuantizedConv2d(1, 8, kernel_size=(1, 1), stride=(1, 1), scale=4.715211868286133, zero_point=61)
  (bn1): QuantizedBatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation1): QuantizedHardswish()
  (conv2_depthwise): QuantizedConv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), scale=1.147164225578308, zero_point=90, padding=(1, 1), dilation=(2, 2), groups=8)
  (conv2_pointwise): QuantizedConv2d(8, 10, kernel_size=(1, 1), stride=(1, 1), scale=8.301703453063965, zero_point=80)
  (bn2): QuantizedBatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation2): QuantizedHardswish()
  (conv3_depthwise): QuantizedConv2d(10, 10, kern

In [8]:
# Test the model on the test dataset and store the accuracy
model.eval()
test_accuracy = test_model(model, test_loader)

# Extract the int8 weights from all quantized layers in the model and save them to a list
quantized_weights_list = print_quantized_weights(model)

print(f"The test accuracy after quantization is: {test_accuracy:.2f}%")

Extracting weights from layer: conv1_depthwise
Weight Shape: (9,), Int8 Weights:
[   4   26    4  -49    8   73  -45 -126 -123]

Extracting weights from layer: conv1_pointwise
Weight Shape: (8,), Int8 Weights:
[ 127 -126  127 -128  127 -128 -128  127]

Extracting weights from layer: conv2_depthwise
Weight Shape: (72,), Int8 Weights:
[  -3   36  -24   36   51 -100  127   64  -64   43  -47   23  -21    8
 -128   21   -1   -9  -77  -42  -20  -18   34 -100  -35 -128  -18  -83
 -128  -26  -23   41   51    9    5  -62   59   52   78   82  127   95
    3  -42  -21   13    7   -3   26    7  -35   20   -8 -128   23   48
   49  -92    9   23  -48 -128 -100   25   23  127   47   52  124   17
   21   -2]

Extracting weights from layer: conv2_pointwise
Weight Shape: (80,), Int8 Weights:
[  -6   57  -10   18   84  -48   16  127   13  -78   72   21   39    5
   -3  127   18    8   22   -2   18   30    0 -127  -67  -21   23   35
  -67 -127   17  -86   -1    6  -70  127   55  -38  -22    8   37  -34
  