In [6]:
from imports import *
from utils import *
from model import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

dataset = 'MNIST'
data_path = 'raw/'
train_dataset = torchvision.datasets.MNIST(root=data_path, train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root=data_path, train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [8]:
# Model Definition

# Define the number of classes for the classification task
num_classes = 10

model = NET_1k_MNIST(num_classes).to(device)

total_weights = count_weights(model)
print(f"The network has {total_weights} weights that need to be deployed.")

model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model = torch.quantization.prepare_qat(model, inplace=True)
model = torch.quantization.convert(model, inplace=True)

The network has 123 weights that need to be deployed.


In [9]:
# Load Pretrained Model

# Specify the filename of the saved model
model_filename = f"checkpoint/MNIST-123-86.67_model.t7"

# Load the checkpoint from the file, mapping the model to the specified device
checkpoint = torch.load(model_filename, map_location=device)
model.load_state_dict(checkpoint['model'])

print(model) # The model has been quantized to int8

NET_1k_MNIST(
  (quant): Quantize(scale=tensor([0.0256]), zero_point=tensor([17]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (conv1_depthwise): QuantizedConv2d(1, 1, kernel_size=(3, 3), stride=(1, 1), scale=0.30794036388397217, zero_point=95, padding=(1, 1), dilation=(2, 2))
  (conv1_pointwise): QuantizedConv2d(1, 2, kernel_size=(1, 1), stride=(1, 1), scale=2.5187861919403076, zero_point=79)
  (bn1): QuantizedBatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation1): QuantizedHardswish()
  (conv2_depthwise): QuantizedConv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), scale=1.9942729473114014, zero_point=84, padding=(1, 1), dilation=(2, 2), groups=2)
  (conv2_pointwise): QuantizedConv2d(2, 4, kernel_size=(1, 1), stride=(1, 1), scale=8.642030715942383, zero_point=55)
  (bn2): QuantizedBatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation2): QuantizedHardswish()
  (conv3_depthwise): QuantizedConv2d(4, 4, kernel

In [10]:
# Test the model on the test dataset and store the accuracy
model.eval()
test_accuracy = test_model(model, test_loader)

# Extract the int8 weights from all quantized layers in the model and save them to a list
quantized_weights_list = print_quantized_weights(model)

print(f"The test accuracy after quantization is: {test_accuracy:.2f}%")

Extracting weights from layer: conv1_depthwise
Weight Shape: (9,), Int8 Weights:
[ -78 -128  -71    6   12  -21   41   12    5]

Extracting weights from layer: conv1_pointwise
Weight Shape: (2,), Int8 Weights:
[-128  127]

Extracting weights from layer: conv2_depthwise
Weight Shape: (18,), Int8 Weights:
[ -20   37   86  -63  -82 -103  -73 -128  -21   30   -6  -39  111  127
   96   53   13   21]

Extracting weights from layer: conv2_pointwise
Weight Shape: (8,), Int8 Weights:
[  59 -128   20  127   65  127 -108  127]

Extracting weights from layer: conv3_depthwise
Weight Shape: (16,), Int8 Weights:
[ 127  -21  -43  -98  127   25  -98  -53 -128   29  -85   65   31  127
  -17  -89]

Extracting weights from layer: conv3_pointwise
Weight Shape: (20,), Int8 Weights:
[  24    5   39  127   50   40   29 -128  -87   70 -128   -7  -10 -112
 -128   26   68  -60 -128   16]

Extracting weights from layer: fc
Weight Shape: (50,), Int8 Weights:
[ -18   57   95  -47 -128  -75  -61   35 -128   52  -47 