In [12]:
import os
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tqdm import tqdm
from pathlib import Path

# Load MNIST dataset

In [22]:
_=torch.manual_seed(0)

# This will convert Img into Tensor
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])

mnist_trainset=datasets.MNIST(root='/data',train=True,download=True,transform=transform)
train_loader=torch.utils.data.DataLoader(mnist_trainset,batch_size=10,shuffle=True)

mnist_testset=datasets.MNIST(root='/data',train=False,download=True,transform=transform)
test_loader=torch.utils.data.DataLoader(mnist_testset,batch_size=10,shuffle=True)

device='cpu'

In [23]:
class VerySimpleNet(nn.Module):
    def __init__(self, hidden_size_1=100, hidden_size_2=100):
        super(VerySimpleNet,self).__init__()
        self.linear1 = nn.Linear(28*28, hidden_size_1)
        self.linear2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.linear3 = nn.Linear(hidden_size_2, 10)
        self.relu = nn.ReLU()

    def forward(self, img):
        x = img.view(-1, 28*28)
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        x = self.linear3(x)
        return x


In [24]:
  net=VerySimpleNet().to(device)

# Train the model

In [25]:
def train(train_loader, net, epochs=5, total_iterations_limit=None):
    cross_el = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    total_iterations = 0

    for epoch in range(epochs):
        net.train()

        loss_sum = 0
        num_iterations = 0

        data_iterator = tqdm(train_loader, desc=f'Epoch {epoch+1}')
        if total_iterations_limit is not None:
            data_iterator.total = total_iterations_limit
        for data in data_iterator:
            num_iterations += 1
            total_iterations += 1
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            output = net(x.view(-1, 28*28))
            loss = cross_el(output, y)
            loss_sum += loss.item()
            avg_loss = loss_sum / num_iterations
            data_iterator.set_postfix(loss=avg_loss)
            loss.backward()
            optimizer.step()

            if total_iterations_limit is not None and total_iterations >= total_iterations_limit:
                return

def print_size_of_model(model):
    torch.save(model.state_dict(), "temp_delme.p")
    print('Size (KB):', os.path.getsize("temp_delme.p")/1e3)
    os.remove('temp_delme.p')

MODEL_FILENAME = 'simplenet_ptq.pt'

if Path(MODEL_FILENAME).exists():
    net.load_state_dict(torch.load(MODEL_FILENAME))
    print('Loaded model from disk')
else:
    train(train_loader, net, epochs=1)
    # Save the model to disk
    torch.save(net.state_dict(), MODEL_FILENAME)

Epoch 1: 100%|██████████| 6000/6000 [00:54<00:00, 109.42it/s, loss=0.223]


# Defining testing loop

In [27]:
# Define the testing loop
def test(model: nn.Module, total_iterations: int = None):
    correct = 0
    total = 0

    iterations = 0

    model.eval()

    with torch.no_grad():
        for data in tqdm(test_loader, desc='Testing'):
            x, y = data
            x = x.to(device)
            y = y.to(device)
            output = model(x.view(-1, 784))
            for idx, i in enumerate(output):
                if torch.argmax(i) == y[idx]:
                    correct +=1
                total +=1
            iterations += 1
            if total_iterations is not None and iterations >= total_iterations:
                break
    print(f'Accuracy: {round(correct/total, 3)}')

In [28]:
# Print the weights matrix of the model before quantization
print('Weights before quantization')
print(net.linear1.weight)
print(net.linear1.weight.dtype)

Weights before quantization
Parameter containing:
tensor([[ 0.0046,  0.0240, -0.0246,  ...,  0.0268,  0.0086,  0.0069],
        [-0.0242, -0.0194, -0.0149,  ..., -0.0247, -0.0104, -0.0344],
        [ 0.0268,  0.0618,  0.0137,  ...,  0.0266,  0.0481,  0.0550],
        ...,
        [ 0.0458,  0.0495,  0.0149,  ...,  0.0095,  0.0288, -0.0082],
        [-0.0129, -0.0048,  0.0273,  ...,  0.0076,  0.0076,  0.0071],
        [ 0.0335,  0.0282,  0.0140,  ...,  0.0504,  0.0011,  0.0213]],
       requires_grad=True)
torch.float32


In [29]:
print('Size of the model before quantization')
print_size_of_model(net)

Size of the model before quantization
Size (KB): 360.998


In [30]:
print(f'Accuracy of the model before quantization: ')
test(net)

Accuracy of the model before quantization: 


Testing: 100%|██████████| 1000/1000 [00:03<00:00, 291.99it/s]

Accuracy: 0.962





# insert Min-max observer in the model

In [31]:
class QuantizedVerySimpleNet(nn.Module):
    def __init__(self, hidden_size_1=100, hidden_size_2=100):
        super(QuantizedVerySimpleNet,self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.linear1 = nn.Linear(28*28, hidden_size_1)
        self.linear2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.linear3 = nn.Linear(hidden_size_2, 10)
        self.relu = nn.ReLU()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, img):
        x = img.view(-1, 28*28)
        x = self.quant(x)
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        x = self.linear3(x)
        x = self.dequant(x)
        return x

In [32]:
net_quantized = QuantizedVerySimpleNet().to(device)
# Copy weights from unquantized model
net_quantized.load_state_dict(net.state_dict())
net_quantized.eval()

net_quantized.qconfig = torch.ao.quantization.default_qconfig
net_quantized = torch.ao.quantization.prepare(net_quantized) # Insert observers
net_quantized

QuantizedVerySimpleNet(
  (quant): QuantStub(
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (linear1): Linear(
    in_features=784, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (linear2): Linear(
    in_features=100, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (linear3): Linear(
    in_features=100, out_features=10, bias=True
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (relu): ReLU()
  (dequant): DeQuantStub()
)

# Calibrate the model using the test set

In [33]:
test(net_quantized)

Testing: 100%|██████████| 1000/1000 [00:03<00:00, 299.88it/s]

Accuracy: 0.962





In [34]:
print(f'Check statistics of the various layers')
net_quantized

Check statistics of the various layers


QuantizedVerySimpleNet(
  (quant): QuantStub(
    (activation_post_process): MinMaxObserver(min_val=-0.4242129623889923, max_val=2.821486711502075)
  )
  (linear1): Linear(
    in_features=784, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=-54.20774841308594, max_val=38.45577621459961)
  )
  (linear2): Linear(
    in_features=100, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=-26.76471710205078, max_val=23.914104461669922)
  )
  (linear3): Linear(
    in_features=100, out_features=10, bias=True
    (activation_post_process): MinMaxObserver(min_val=-28.624492645263672, max_val=20.91961669921875)
  )
  (relu): ReLU()
  (dequant): DeQuantStub()
)

# Quantize the model

In [35]:
net_quantized = torch.ao.quantization.convert(net_quantized)
print(f'Check statistics of the various layers')
net_quantized

Check statistics of the various layers


QuantizedVerySimpleNet(
  (quant): Quantize(scale=tensor([0.0256]), zero_point=tensor([17]), dtype=torch.quint8)
  (linear1): QuantizedLinear(in_features=784, out_features=100, scale=0.7296341061592102, zero_point=74, qscheme=torch.per_tensor_affine)
  (linear2): QuantizedLinear(in_features=100, out_features=100, scale=0.39904582500457764, zero_point=67, qscheme=torch.per_tensor_affine)
  (linear3): QuantizedLinear(in_features=100, out_features=10, scale=0.3901110887527466, zero_point=73, qscheme=torch.per_tensor_affine)
  (relu): ReLU()
  (dequant): DeQuantize()
)

In [36]:
# Print the weights matrix of the model after quantization
print('Weights after quantization')
print(torch.int_repr(net_quantized.linear1.weight()))

Weights after quantization
tensor([[ 1,  5, -6,  ...,  6,  2,  2],
        [-5, -4, -3,  ..., -6, -2, -8],
        [ 6, 14,  3,  ...,  6, 11, 12],
        ...,
        [10, 11,  3,  ...,  2,  6, -2],
        [-3, -1,  6,  ...,  2,  2,  2],
        [ 8,  6,  3,  ..., 11,  0,  5]], dtype=torch.int8)


In [37]:
# Compare the dequantized weights and the original weights
print('Original weights: ')
print(net.linear1.weight)
print('')
print(f'Dequantized weights: ')
print(torch.dequantize(net_quantized.linear1.weight()))
print('')

Original weights: 
Parameter containing:
tensor([[ 0.0046,  0.0240, -0.0246,  ...,  0.0268,  0.0086,  0.0069],
        [-0.0242, -0.0194, -0.0149,  ..., -0.0247, -0.0104, -0.0344],
        [ 0.0268,  0.0618,  0.0137,  ...,  0.0266,  0.0481,  0.0550],
        ...,
        [ 0.0458,  0.0495,  0.0149,  ...,  0.0095,  0.0288, -0.0082],
        [-0.0129, -0.0048,  0.0273,  ...,  0.0076,  0.0076,  0.0071],
        [ 0.0335,  0.0282,  0.0140,  ...,  0.0504,  0.0011,  0.0213]],
       requires_grad=True)

Dequantized weights: 
tensor([[ 0.0044,  0.0222, -0.0266,  ...,  0.0266,  0.0089,  0.0089],
        [-0.0222, -0.0177, -0.0133,  ..., -0.0266, -0.0089, -0.0355],
        [ 0.0266,  0.0621,  0.0133,  ...,  0.0266,  0.0488,  0.0532],
        ...,
        [ 0.0443,  0.0488,  0.0133,  ...,  0.0089,  0.0266, -0.0089],
        [-0.0133, -0.0044,  0.0266,  ...,  0.0089,  0.0089,  0.0089],
        [ 0.0355,  0.0266,  0.0133,  ...,  0.0488,  0.0000,  0.0222]])



In [38]:
print('Size of the model after quantization')
print_size_of_model(net_quantized)


Size of the model after quantization
Size (KB): 95.394


In [40]:
print('Testing the model after quantization')
test(net_quantized)

Testing the model after quantization


Testing: 100%|██████████| 1000/1000 [00:03<00:00, 301.33it/s]

Accuracy: 0.962



