In [5]:
from imports import *
from utils import *
from model import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data loading

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

dataset = 'FashionMNIST'
data_path = 'raw/'
train_dataset = torchvision.datasets.FashionMNIST(root=data_path, train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.FashionMNIST(root=data_path, train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
# Model Definition

# Define the number of classes for the classification task
num_classes = 10

# Initialize the model with the given configuration and number of classes, and move it to the specified device
model = NET_32k_MNIST(num_classes).to(device)

total_weights = count_weights(model)
print(f"The network has {total_weights} weights that need to be deployed.")

model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model = torch.quantization.prepare_qat(model, inplace=True)
model = torch.quantization.convert(model, inplace=True)

The network has 3942 weights that need to be deployed.


In [7]:
# Load Pretrained Model

# Specify the filename of the saved model
model_filename = f"checkpoint/FMNIST-3942-90.28_model.t7"

# Load the checkpoint from the file, mapping the model to the specified device
checkpoint = torch.load(model_filename, map_location=device)

# Load the model state dictionary from the checkpoint
model.load_state_dict(checkpoint['model'])

print(model) # The model has been quantized to int8

NET_32k_MNIST(
  (conv1): QuantizedConv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), scale=0.13093020021915436, zero_point=76, padding=(1, 1))
  (bn1): QuantizedBatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation1): QuantizedHardswish()
  (conv2): QuantizedConv2d(10, 12, kernel_size=(3, 3), stride=(1, 1), scale=0.42956021428108215, zero_point=94, padding=(1, 1))
  (bn2): QuantizedBatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation2): QuantizedHardswish()
  (conv3): QuantizedConv2d(12, 14, kernel_size=(3, 3), stride=(1, 1), scale=0.18915767967700958, zero_point=55, padding=(1, 1))
  (bn3): QuantizedBatchNorm2d(14, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation3): QuantizedHardswish()
  (fc): QuantizedLinear(in_features=126, out_features=10, scale=0.424058735370636, zero_point=79, qscheme=torch.per_channel_affine)
  (quant): Quantize(scale=tensor([0.0256]), zero_point=tensor(

In [8]:
# Test the model on the test dataset and store the accuracy
model.eval()
test_accuracy = test_model(model, test_loader)

# Extract the int8 weights from all quantized layers in the model and save them to a list
quantized_weights_list = print_quantized_weights(model)

print(f"The test accuracy after quantization is: {test_accuracy:.2f}%")

Extracting weights from layer: conv1
Weight Shape: (90,), Int8 Weights:
[ -47  -11   -9 -128  -35    4  -56  -11   39   37   -2  -36   62   61
 -128   21    1  -23 -128   58   56   33    1  -90   26  -31  -32   22
  -28   12   -7 -128  -30  -14 -112  -39   -7   71 -128  -54   83   -1
  -46   64  -17   39  -40   16   32  -37   30  -62  -64  127 -107  -16
   70  127   95    2   -9  -13   28 -128   16  -44   56   -9  -38   69
  -19   43   47 -127  -69   -9  -71  -81   46    8   46  -30  -25  127
   48   12  -49  -41  -63  -42]

Extracting weights from layer: conv2
Weight Shape: (1080,), Int8 Weights:
[ 39  12 -17 ... -22 -35 -21]

Extracting weights from layer: conv3
Weight Shape: (1512,), Int8 Weights:
[  8 -31  10 ...  11 -14 -15]

Extracting weights from layer: fc
Weight Shape: (1260,), Int8 Weights:
[ 12  31 -29 ...  10 -15  35]

The test accuracy after quantization is: 90.28%
