In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [4]:
# Hyperparameter
BATCH_SIZE = 32

In [5]:
transformer = transforms.Compose(transforms=[
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5), std=(0.5))
])

train_dataset = torchvision.datasets.EMNIST(root="./data", split='mnist', train=True, download=True, transform=transformer)
trainloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

test_dataset = torchvision.datasets.EMNIST(root="./data", split='mnist', train=False, download=True, transform=transformer)
testloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

Downloading https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip to ./data/EMNIST/raw/gzip.zip


  0%|          | 0/561753746 [00:00<?, ?it/s]

Extracting ./data/EMNIST/raw/gzip.zip to ./data/EMNIST/raw


# Helpers

In [6]:
def train_model(model, criterion, optimizer, trainloader, number_of_epochs=10, device='cpu'):
    steps_per_epoch = len(trainloader)
    model.to(device)

    for epoch in range(number_of_epochs):
        running_loss = 0.0

        for (inputs, labels) in trainloader:
            model.to(device)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # forward pass
            outputs = model(inputs)

            # Calculate loss
            loss = criterion(outputs, labels)

            # backward + optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'[{epoch + 1}] loss: {running_loss / steps_per_epoch:.3f}')

    print('Finished training')


def test_model(model, dataloader):
    n_correct = 0
    n_total = 0

    model.to('cpu')
    model.eval()

    with torch.no_grad():
        for (images, labels) in dataloader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            n_total += labels.size(0)
            n_correct += (predicted == labels).sum().item()

            result = n_correct / n_total * 100

    print(f"Accuracy on test set: {result:.1f}%")


def model_size(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_kb = (param_size + buffer_size) / 1024
    return 'model size: {:.3f}KB'.format(size_all_kb)

def make_prediction(model, x):
    x = torch.unsqueeze(x, 0)
    x = torch.unsqueeze(x, 0)
    
    model.to('cpu')
    model.eval()
    
    pred = model(x)
    _, pred = torch.max(pred, 1)
    
    return int(pred)

# Basic model

In [7]:
class ConvNet(nn.Module):
  def __init__(self):
    super().__init__()

    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    # in_channels = 1, because nn working with grayscale images
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=256, kernel_size=3)
    self.conv2 = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3) 
    # out_channels = number of filters
    
    self.relu = nn.ReLU()
    self.relu1 = nn.ReLU()
    self.relu2 = nn.ReLU()
    
    # in_features = conv2.out_channels * 5 * 5
    # in_features = 64 * 5 * 5 = 1600
    self.linear1 = nn.Linear(in_features=64*5*5, out_features = 500) 
    self.linear2 = nn.Linear(in_features=500, out_features = 250)
    self.linear3 = nn.Linear(in_features=250, out_features=10) 
    # out_features = number of classes

  def forward(self, x):

    x = self.relu1(self.conv1(x))
    x = self.pool(x)

    x = self.relu2(self.conv2(x))
    x = self.pool(x)

    x = torch.flatten(x, 1) 
    x = self.relu(self.linear1(x))
    x = self.relu(self.linear2(x))
    x = self.linear3(x)
    return x
    
model = ConvNet().to(device)

In [8]:
train_model(
    model=model,
    criterion = nn.CrossEntropyLoss(),
    optimizer=optim.Adam(params=model.parameters(), lr=0.001),
    trainloader=trainloader,
    number_of_epochs=10,
    device='cuda'
)

[1] loss: 0.102
[2] loss: 0.042
[3] loss: 0.031
[4] loss: 0.025
[5] loss: 0.020
[6] loss: 0.015
[7] loss: 0.013
[8] loss: 0.011
[9] loss: 0.009
[10] loss: 0.010
Finished training


In [9]:
test_model(
    model=model, 
    dataloader=testloader
)

Accuracy on test set: 99.1%


# Quantized model

In [10]:
class QAT_ConvNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=256, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3)

        self.relu = nn.ReLU()
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()

        # in_features = 64 * 5 * 5 = 1600
        self.linear1 = nn.Linear(in_features=64*5*5, out_features = 500) 
        self.linear2 = nn.Linear(in_features=500, out_features = 250)
        self.linear3 = nn.Linear(in_features=250, out_features=10) 
        # out_features = number of classes

        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):

        x = self.quant(x)
        
        x = self.relu1(self.conv1(x))
        x = self.pool(x)

        x = self.relu2(self.conv2(x))
        x = self.pool(x)

        x = torch.flatten(x, 1)
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        x = self.linear3(x)
        
        x = self.dequant(x)
        return x


# create a model instance
model_fp32 = QAT_ConvNet().to(device)

# model must be set to eval for fusion to work
model_fp32.eval()

# attach a global qconfig, which contains information about what kind
# of observers to attach. Use 'fbgemm' for server inference and
# 'qnnpack' for mobile inference. Other quantization configurations such
# as selecting symmetric or assymetric quantization and MinMax or L2Norm
# calibration techniques can be specified here.
model_fp32.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')

# fuse the activations to preceding layers, where applicable
# this needs to be done manually depending on the model architecture
model_fp32_fused = torch.quantization.fuse_modules(
    model_fp32, [['conv1', 'relu1']])
model_fp32_fused = torch.quantization.fuse_modules(
    model_fp32_fused, [['conv2', 'relu2']])


# Prepare the model for QAT. This inserts observers and fake_quants in
# the model needs to be set to train for QAT logic to work
# the model that will observe weight and activation tensors during calibration.
model_fp32_prepared = torch.quantization.prepare_qat(model_fp32_fused.train())

  reduce_range will be deprecated in a future release of PyTorch."


In [11]:
# Training_loop(model_fp32_prepared)
train_model(
    model=model_fp32_prepared,
    criterion=torch.nn.CrossEntropyLoss(),
    optimizer=optim.Adam(params=model_fp32_prepared.parameters(), lr=0.001),
    trainloader=trainloader,
    number_of_epochs=10,
    device='cuda'
)

  self.is_symmetric_quant,
  self.is_symmetric_quant,


[1] loss: 2.427
[2] loss: 2.303
[3] loss: 2.303
[4] loss: 2.303
[5] loss: 2.303
[6] loss: 2.303
[7] loss: 2.303
[8] loss: 2.303
[9] loss: 2.303
[10] loss: 2.303
Finished training


In [12]:
# Convert the observed model to a quantized model. This does several things:
# quantizes the weights, computes and stores the scale and bias value to be
# used with each activation tensor, fuses modules where appropriate,
# and replaces key operators with quantized implementations.

model_fp32_prepared = model_fp32_prepared.to('cpu')
model_fp32_prepared.eval()
model_int8 = torch.quantization.convert(model_fp32_prepared)

# run the model, relevant calculations will happen in int8
# res = model_int8(input_fp32)

In [13]:
# This function crashed Google Colab (because of calculation on cuda in testing function)
test_model(
    model=model_int8, 
    dataloader=testloader
)

Accuracy on test set: 10.0%


# Size of model

In [14]:
print(f"Size of basic model: {model_size(model)}")

Size of basic model: model size: 4212.266KB


In [15]:
print(f"Size of quantized model: {model_size(model_int8)}")

Size of quantized model: model size: 0.012KB


# Predict one random sample

In [16]:
index = int(torch.randint(low=0, high=10000, size=(1,)))
x, y_true = test_dataset[index]
x = x.reshape([28, 28])

print(f"Correct answer: {y_true}, Predicted: {make_prediction(model_int8, x)}")

Correct answer: 9, Predicted: 3


# Save model

In [17]:
# Save model
torch.save(model.state_dict(), "model_params.pt")
torch.save(model_int8.state_dict(), "model_int8_params.pt")

# Load model
# model = ConvNet()
# model.load_state_dict(torch.load("model_params.pt"))
# model.eval()