In [4]:
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
from tqdm import tqdm

_ = torch.manual_seed(0)

transform = transforms.Compose([transforms.ToTensor() ,

transforms.Normalize((0.1307,),(0.3081,))

])




mnist_trainset = datasets.MNIST(root='./data', train=True, download=False, transform=transform)
mnist_testset = datasets.MNIST(root='./data', train=False, download=False, transform=transform)

train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size=10, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size=10, shuffle=False)

# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# for quantization we have to put device = "cpu"

device = "cpu"

class SimplifiedVGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(SimplifiedVGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 3 * 3, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = SimplifiedVGG16(num_classes=10).to(device)


num_classes = 10
num_epochs = 1
batch_size = 10
learning_rate = 0.005

model =SimplifiedVGG16(num_classes).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  


# Train the model
total_step = len(train_loader)


for epoch in tqdm(range(num_epochs), desc="Epochs"):
    model.train()
    train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    for i, (images, labels) in enumerate(train_loop):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update progress bar
        train_loop.set_postfix(loss=loss.item())

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
            
    # Validation
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        val_loop = tqdm(test_loader, desc="Validation", leave=False)
        for images, labels in val_loop:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

            # Update progress bar
            val_loop.set_postfix(accuracy=f"{100 * correct / total:.2f}%")
    
        print(f'Accuracy of the network on the {total} validation images: {100 * correct / total:.2f}%')

Epochs:   0%|          | 0/1 [00:00<?, ?it/s]



Epoch [1/1], Loss: 0.1304


Epochs: 100%|██████████| 1/1 [02:23<00:00, 143.03s/it]

Accuracy of the network on the 10000 validation images: 97.81%





In [5]:
def test():

    correct = 0
    total = 0

    wrong_counts = [0 for i in range(10)]

    with torch.no_grad():

        for data in tqdm(test_loader , desc='Testing'):

                x,y = data

                x = x.to(device)
                y = y.to(device)
                output = model(x)

                for idx , i in enumerate(output):

                    if torch.argmax(i) == y[idx]:
                        correct += 1
                    else:
                        wrong_counts[y[idx]] +=1
                    total+=1
    print(f'Accuracy: {round(correct/total, 3)}')
    for i in range(len(wrong_counts)):
        print(f'wrong counts for the digit {i}: {wrong_counts[i]}')

test()

Testing: 100%|██████████| 1000/1000 [00:06<00:00, 153.74it/s]

Accuracy: 0.978
wrong counts for the digit 0: 2
wrong counts for the digit 1: 6
wrong counts for the digit 2: 26
wrong counts for the digit 3: 8
wrong counts for the digit 4: 42
wrong counts for the digit 5: 18
wrong counts for the digit 6: 28
wrong counts for the digit 7: 13
wrong counts for the digit 8: 52
wrong counts for the digit 9: 24





In [6]:
# Now lets print the size of model before quantization

def print_size_of_model(model):

    torch.save(model.state_dict() , "temp_vgg.p")
    print("Size (KB) :" , os.path.getsize("temp_vgg.p")/1e3)
    os.remove("temp_vgg.p")

MODEL_FILENAME = "SimplifiedVGG16.pt"

torch.save(model.state_dict() , MODEL_FILENAME)
print("Saved model successfully")
model.load_state_dict(torch.load(MODEL_FILENAME))
print("Successfully loaded model from disk")




Saved model successfully
Successfully loaded model from disk


  model.load_state_dict(torch.load(MODEL_FILENAME))


# Now lets see the weights before quantization

In [15]:
print("Weight before quantization")

# weights of the 1st layer

print(model.features[0].weight)

Weight before quantization
Parameter containing:
tensor([[[[-0.1353, -0.1419, -0.0434],
          [ 0.0388,  0.0791, -0.0228],
          [ 0.1248,  0.0267,  0.0808]]],


        [[[-0.0350, -0.0234,  0.0229],
          [ 0.0282, -0.0279, -0.0343],
          [-0.0321,  0.0044,  0.0125]]],


        [[[ 0.1268,  0.1575,  0.1537],
          [-0.0658, -0.0330,  0.0969],
          [-0.1846, -0.1450, -0.0596]]],


        [[[-0.1512, -0.1831, -0.1003],
          [ 0.0767,  0.0528, -0.0415],
          [ 0.0990,  0.0571,  0.1365]]],


        [[[ 0.1077,  0.0938, -0.0885],
          [ 0.1360, -0.0003, -0.1176],
          [ 0.1051, -0.0671, -0.0822]]],


        [[[ 0.0459, -0.0513,  0.0994],
          [-0.0632, -0.0711,  0.1102],
          [-0.0419,  0.0420,  0.1134]]],


        [[[ 0.1847,  0.2070,  0.2334],
          [-0.0818, -0.0393, -0.1071],
          [-0.1200, -0.2150, -0.0980]]],


        [[[ 0.2195,  0.0905, -0.1054],
          [ 0.1940, -0.0363, -0.1808],
          [ 0.2136,  0.086

# Size of the model before quantization

In [17]:
import os
print("Size of the model before quantization in KB")

print_size_of_model(model)

Size of the model before quantization in KB
Size (KB) : 13325.89


In [18]:
class Quantizedvgg16(nn.Module):

    def __init__(self , num_classes = 10):
        super(Quantizedvgg16,self).__init__()
        self.quant = torch.quantization.QuantStub()

        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 3 * 3, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes),
        )
        self.dequant = torch.quantization.DeQuantStub()


    def forward(self, x):
        x = self.quant(x)
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        x = self.dequant(x)
        return x


model_quantized = Quantizedvgg16(num_classes=10).to(device)

# Copy the weight of unquantized model

model_quantized.load_state_dict(model.state_dict())

model_quantized.eval()

model_quantized.qconfig = torch.ao.quantization.default_qconfig

model_quantized = torch.ao.quantization.prepare(model_quantized)


model_quantized

    








Quantizedvgg16(
  (quant): QuantStub(
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (features): Sequential(
    (0): Conv2d(
      1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
    )
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(
      64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
    )
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(
      128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
    )
    (7): ReLU(inplace=True)
    (8): Conv2d(
      256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation_post_process): Min

In [19]:
def test():

    correct = 0
    total = 0

    wrong_counts = [0 for i in range(10)]

    with torch.no_grad():

        for data in tqdm(test_loader , desc='Testing'):

                x,y = data

                x = x.to(device)
                y = y.to(device)
                output = model_quantized(x)

                for idx , i in enumerate(output):

                    if torch.argmax(i) == y[idx]:
                        correct += 1
                    else:
                        wrong_counts[y[idx]] +=1
                    total+=1
    print(f'Accuracy: {round(correct/total, 3)}')
    for i in range(len(wrong_counts)):
        print(f'wrong counts for the digit {i}: {wrong_counts[i]}')

test()

Testing: 100%|██████████| 1000/1000 [00:06<00:00, 146.71it/s]

Accuracy: 0.978
wrong counts for the digit 0: 2
wrong counts for the digit 1: 6
wrong counts for the digit 2: 26
wrong counts for the digit 3: 8
wrong counts for the digit 4: 42
wrong counts for the digit 5: 18
wrong counts for the digit 6: 28
wrong counts for the digit 7: 13
wrong counts for the digit 8: 52
wrong counts for the digit 9: 24





In [20]:
# Checking the stats of various layers

model_quantized

Quantizedvgg16(
  (quant): QuantStub(
    (activation_post_process): MinMaxObserver(min_val=-0.4242129623889923, max_val=2.821486711502075)
  )
  (features): Sequential(
    (0): Conv2d(
      1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation_post_process): MinMaxObserver(min_val=-2.094406843185425, max_val=3.05881404876709)
    )
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(
      64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation_post_process): MinMaxObserver(min_val=-5.384039878845215, max_val=6.796621322631836)
    )
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(
      128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
      (activation_post_process): MinMaxObserver(min_val=-5.604202747344971, max_val=6.492345333099365)
    )
    (7): ReLU(inplace=True)
    (8)

In [21]:
model_quantized = torch.ao.quantization.convert(model_quantized)

print("Checking the stats of quantized layers")

model_quantized

Checking the stats of quantized layers


Quantizedvgg16(
  (quant): Quantize(scale=tensor([0.0256]), zero_point=tensor([17]), dtype=torch.quint8)
  (features): Sequential(
    (0): QuantizedConv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.040576543658971786, zero_point=52, padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): QuantizedConv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), scale=0.09591072052717209, zero_point=56, padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): QuantizedConv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), scale=0.09524840861558914, zero_point=59, padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): QuantizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), scale=0.12246931344270706, zero_point=68, padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=Fa

# Now weights after quantization

In [24]:
print("Weights after quantization")

print(torch.int_repr(model_quantized.features[0].weight()))

Weights after quantization
tensor([[[[ -69,  -73,  -22],
          [  20,   41,  -12],
          [  64,   14,   41]]],


        [[[ -18,  -12,   12],
          [  14,  -14,  -18],
          [ -16,    2,    6]]],


        [[[  65,   81,   79],
          [ -34,  -17,   50],
          [ -95,  -74,  -31]]],


        [[[ -78,  -94,  -51],
          [  39,   27,  -21],
          [  51,   29,   70]]],


        [[[  55,   48,  -45],
          [  70,    0,  -60],
          [  54,  -34,  -42]]],


        [[[  24,  -26,   51],
          [ -32,  -36,   57],
          [ -21,   22,   58]]],


        [[[  95,  106,  120],
          [ -42,  -20,  -55],
          [ -62, -110,  -50]]],


        [[[ 113,   46,  -54],
          [ 100,  -19,  -93],
          [ 110,   44,  -71]]],


        [[[  97,  111,   63],
          [ -41,  -16,   10],
          [-109,  -69,   -4]]],


        [[[ -21,  -28,  -14],
          [   1,   -9,    8],
          [ -15,  -25,   -2]]],


        [[[ -41,  -54,    3],
   

# Now lets compute the weights of original model vs quantized model

In [27]:
print("original weights : ")

print(model.features[0].weight)

print(' ')



original weights : 
Parameter containing:
tensor([[[[-0.1353, -0.1419, -0.0434],
          [ 0.0388,  0.0791, -0.0228],
          [ 0.1248,  0.0267,  0.0808]]],


        [[[-0.0350, -0.0234,  0.0229],
          [ 0.0282, -0.0279, -0.0343],
          [-0.0321,  0.0044,  0.0125]]],


        [[[ 0.1268,  0.1575,  0.1537],
          [-0.0658, -0.0330,  0.0969],
          [-0.1846, -0.1450, -0.0596]]],


        [[[-0.1512, -0.1831, -0.1003],
          [ 0.0767,  0.0528, -0.0415],
          [ 0.0990,  0.0571,  0.1365]]],


        [[[ 0.1077,  0.0938, -0.0885],
          [ 0.1360, -0.0003, -0.1176],
          [ 0.1051, -0.0671, -0.0822]]],


        [[[ 0.0459, -0.0513,  0.0994],
          [-0.0632, -0.0711,  0.1102],
          [-0.0419,  0.0420,  0.1134]]],


        [[[ 0.1847,  0.2070,  0.2334],
          [-0.0818, -0.0393, -0.1071],
          [-0.1200, -0.2150, -0.0980]]],


        [[[ 0.2195,  0.0905, -0.1054],
          [ 0.1940, -0.0363, -0.1808],
          [ 0.2136,  0.0860, -0.1

In [28]:
print(f"Dequantized weights : ")

print(torch.dequantize(model_quantized.features[0].weight()))

print("")

Dequantized weights : 
tensor([[[[-0.1345, -0.1423, -0.0429],
          [ 0.0390,  0.0799, -0.0234],
          [ 0.1247,  0.0273,  0.0799]]],


        [[[-0.0351, -0.0234,  0.0234],
          [ 0.0273, -0.0273, -0.0351],
          [-0.0312,  0.0039,  0.0117]]],


        [[[ 0.1267,  0.1579,  0.1540],
          [-0.0663, -0.0331,  0.0975],
          [-0.1852, -0.1442, -0.0604]]],


        [[[-0.1520, -0.1832, -0.0994],
          [ 0.0760,  0.0526, -0.0409],
          [ 0.0994,  0.0565,  0.1364]]],


        [[[ 0.1072,  0.0936, -0.0877],
          [ 0.1364,  0.0000, -0.1169],
          [ 0.1052, -0.0663, -0.0819]]],


        [[[ 0.0468, -0.0507,  0.0994],
          [-0.0624, -0.0702,  0.1111],
          [-0.0409,  0.0429,  0.1130]]],


        [[[ 0.1852,  0.2066,  0.2339],
          [-0.0819, -0.0390, -0.1072],
          [-0.1208, -0.2144, -0.0975]]],


        [[[ 0.2202,  0.0897, -0.1052],
          [ 0.1949, -0.0370, -0.1813],
          [ 0.2144,  0.0858, -0.1384]]],


        [

# Now lets compare the size of original model vs quantized model|

In [29]:
print('Size of the model before quantization')
print_size_of_model(model)

Size of the model before quantization
Size (KB) : 13325.89


In [30]:
print('Size of the model after quantization')
print_size_of_model(model_quantized)

Size of the model after quantization
Size (KB) : 3344.358
