In [5]:
from imports import *
from utils import *
from model import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data loading

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

dataset = 'MNIST'
data_path = 'raw/'
train_dataset = torchvision.datasets.MNIST(root=data_path, train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root=data_path, train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
# Model Definition

num_classes = 10

# Initialize the model with the given configuration and number of classes, and move it to the specified device
model = NET_32k_MNIST(num_classes).to(device)

total_weights = count_weights(model)
print(f"The network has {total_weights} weights that need to be deployed.")

model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model = torch.quantization.prepare_qat(model, inplace=True)
model = torch.quantization.convert(model, inplace=True)

The network has 3942 weights that need to be deployed.


In [7]:
# Load Pretrained Model

# Specify the filename of the saved model
model_filename = f"checkpoint/MNIST-3942-99.13_model.t7"

# Load the checkpoint from the file, mapping the model to the specified device
checkpoint = torch.load(model_filename, map_location=device)

# Load the model state dictionary from the checkpoint
model.load_state_dict(checkpoint['model'])

print(model) # The model has been quantized to int8

NET_32k_MNIST(
  (conv1): QuantizedConv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), scale=0.2065466344356537, zero_point=74, padding=(1, 1))
  (bn1): QuantizedBatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation1): QuantizedHardswish()
  (conv2): QuantizedConv2d(10, 12, kernel_size=(3, 3), stride=(1, 1), scale=0.7286619544029236, zero_point=76, padding=(1, 1))
  (bn2): QuantizedBatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation2): QuantizedHardswish()
  (conv3): QuantizedConv2d(12, 14, kernel_size=(3, 3), stride=(1, 1), scale=0.6075138449668884, zero_point=68, padding=(1, 1))
  (bn3): QuantizedBatchNorm2d(14, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation3): QuantizedHardswish()
  (fc): QuantizedLinear(in_features=126, out_features=10, scale=0.8871256709098816, zero_point=80, qscheme=torch.per_channel_affine)
  (quant): Quantize(scale=tensor([0.0256]), zero_point=tensor([1

In [8]:
# Test the model on the test dataset and store the accuracy
model.eval()
test_accuracy = test_model(model, test_loader)

# Extract the int8 weights from all quantized layers in the model and save them to a list
quantized_weights_list = print_quantized_weights(model)

print(f"The test accuracy after quantization is: {test_accuracy:.2f}%")

Extracting weights from layer: conv1
Weight Shape: (90,), Int8 Weights:
[ -85   14   99  -68  127    0  -35   15   59  101   11   24  -18  -29
   26   -6  127  -92   54  -26  -82    6  127   66  -54  -40   29  -11
 -107  -59  -45   -5  127   72   57  -16    6   37  -26   10   82  -76
 -128   93   22  127  -68   -7   48  -79  -23   -8  -44  -29   13   27
 -128   62  -14  -64   75   57    8   -1   76    5  127   15  -30  -41
 -111  -25   50   39   26  -36   30    2 -128  -61  -38  -44  -39  -11
  -31  127   32   -6  -45  -59]

Extracting weights from layer: conv2
Weight Shape: (1080,), Int8 Weights:
[ 36 -33 -28 ...  -5  27 -44]

Extracting weights from layer: conv3
Weight Shape: (1512,), Int8 Weights:
[ 43 -11  29 ... -63 -44 -44]

Extracting weights from layer: fc
Weight Shape: (1260,), Int8 Weights:
[  -5  -14   27 ...  -32  -19 -105]

The test accuracy after quantization is: 99.13%
