# Part 0 naive quantization

## 0.1 Training MLP on MNIST

In [57]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size = 28 * 28
hidden_size = 128
output_size = 10
num_epochs = 10
batch_size = 64
learning_rate = 1e-3

In [3]:
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [4]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = x.view(-1, input_size)  # Flatten the input
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [5]:
model = MLP(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
def train_model(model, train_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)

            outputs = model(data)
            loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
train_model(model, train_loader, num_epochs)

Epoch [1/10], Loss: 0.1648
Epoch [2/10], Loss: 0.1220
Epoch [3/10], Loss: 0.0485
Epoch [4/10], Loss: 0.2655
Epoch [5/10], Loss: 0.0417
Epoch [6/10], Loss: 0.1671
Epoch [7/10], Loss: 0.0230
Epoch [8/10], Loss: 0.0076
Epoch [9/10], Loss: 0.0225
Epoch [10/10], Loss: 0.0906


In [7]:
def evaluate_model(model, test_loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

        accuracy = 100 * correct / total
        print(f'Accuracy of the model on the test images: {accuracy:.2f}%')

In [8]:
%%time
evaluate_model(model, test_loader)

Accuracy of the model on the test images: 97.39%
CPU times: user 3.58 s, sys: 71.2 ms, total: 3.65 s
Wall time: 935 ms


## 0.2 Quantize the weights to int

In [9]:
import matplotlib.pyplot as plt

In [10]:
def quantize(model):
    """
    Return a copy of the model with quantized weights
    """
    quantized_model = MLP(input_size, hidden_size, output_size).to(device)
    quantized_model.load_state_dict(model.state_dict())
    for param in quantized_model.parameters():
        param.data = param.data.round()
    return quantized_model

In [11]:
quantized_model = quantize(model)

In [12]:
%%time
evaluate_model(quantized_model, test_loader)

Accuracy of the model on the test images: 9.86%
CPU times: user 4.07 s, sys: 147 ms, total: 4.22 s
Wall time: 1.54 s


In [13]:
quantized_model.fc1.weight

Parameter containing:
tensor([[0., -0., -0.,  ..., -0., 0., 0.],
        [0., 0., 0.,  ..., 0., -0., -0.],
        [-0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [-0., 0., -0.,  ..., -0., 0., -0.],
        [-0., -0., -0.,  ..., 0., 0., 0.],
        [-0., -0., 0.,  ..., 0., 0., 0.]], requires_grad=True)

In [14]:
model.fc1.weight

Parameter containing:
tensor([[ 0.0137, -0.0107, -0.0048,  ..., -0.0173,  0.0162,  0.0036],
        [ 0.0349,  0.0012,  0.0139,  ...,  0.0238, -0.0149, -0.0024],
        [-0.0243,  0.0102,  0.0047,  ...,  0.0316,  0.0190,  0.0173],
        ...,
        [-0.0046,  0.0220, -0.0069,  ..., -0.0303,  0.0197, -0.0017],
        [-0.0349, -0.0166, -0.0277,  ...,  0.0002,  0.0283,  0.0077],
        [-0.0008, -0.0306,  0.0314,  ...,  0.0357,  0.0314,  0.0005]],
       requires_grad=True)

## 0.3 Testing the quantized model vs the original model

0.975 accuracy on the test set for the original model vs 0.159 for the quantized model

Des valeurs entre 0 et 1, on peut perdre entièrement des informations qui tombent à 0 et donner trop d'imporance à des valeurs qui sont proches de 1.

# Part 1 - quantization statique plus réaliste

## 1. Une unique plage pour toutes les couches

In [15]:
def unique_layer_quantize(model):
    """
    Return a copy of the model with quantized weights
    """
    quantized_model = MLP(input_size, hidden_size, output_size).to(device)
    quantized_model.load_state_dict(model.state_dict())

    min_val = float('inf')
    max_val = - float('inf')
    
    for param in quantized_model.parameters():
        min_val = min(min_val, param.data.min())
        max_val = max(max_val, param.data.max())

    s = (max_val - min_val) / (2**8 - 1)
    for param in quantized_model.parameters():
        param.data = (param.data / s).round()
    
    return quantized_model

In [16]:
unique_layer_quantized_model = unique_layer_quantize(model)

In [17]:
%%time
evaluate_model(unique_layer_quantized_model, test_loader)

Accuracy of the model on the test images: 97.39%
CPU times: user 3.92 s, sys: 113 ms, total: 4.03 s
Wall time: 1.07 s


## 2. Une plage par couche

In [18]:
def per_layer_quantize(model):
    """
    Return a copy of the model with quantized weights
    """
    quantized_model = MLP(input_size, hidden_size, output_size).to(device)
    quantized_model.load_state_dict(model.state_dict())

    for param in quantized_model.parameters():
        min_val = param.data.min()
        max_val = param.data.max()
        s = (max_val - min_val) / (2**8 - 1)
        param.data = (param.data / s).round()
    
    return quantized_model

In [19]:
per_layer_quantized_model = per_layer_quantize(model)

In [20]:
%%time
evaluate_model(per_layer_quantized_model, test_loader)

Accuracy of the model on the test images: 97.58%
CPU times: user 4.08 s, sys: 142 ms, total: 4.22 s
Wall time: 1.29 s


# Part 2 - quantization statique CNN

In [51]:
import torch.nn.functional as F

In [52]:
class MnistCNN(nn.Module):
    def __init__(self):
        super(MnistCNN, self).__init__()
        self.conv1 = nn.Conv2d(1,10,kernel_size=5,stride=1)
        self.conv2 = nn.Conv2d(10,10,kernel_size=5,stride=1)
        self.pool = nn.MaxPool2d(kernel_size=2,stride=2) #2x2 maxpool
        self.fc1 = nn.Linear(4*4*10,100)
        self.fc2 = nn.Linear(100,10)
    
    def forward(self,x):
        x = F.relu(self.conv1(x)) #24x24x10
        x = self.pool(x) #12x12x10
        x = F.relu(self.conv2(x)) #8x8x10
        x = self.pool(x) #4x4x10    
        x = x.view(-1, 4*4*10) #flattening
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [53]:
def train_cnn_model(model, train_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)

            outputs = model(data)
            loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [54]:
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
cnn_model = MnistCNN().to(device)
train_cnn_model(cnn_model, train_loader, num_epochs)

Epoch [1/10], Loss: 2.3200
Epoch [2/10], Loss: 2.2987
Epoch [3/10], Loss: 2.3033
Epoch [4/10], Loss: 2.2910
Epoch [5/10], Loss: 2.2919
Epoch [6/10], Loss: 2.3005
Epoch [7/10], Loss: 2.2925
Epoch [8/10], Loss: 2.3027
Epoch [9/10], Loss: 2.2934
Epoch [10/10], Loss: 2.3088


In [56]:
%%time
evaluate_model(cnn_model, test_loader)

Accuracy of the model on the test images: 10.32%
CPU times: user 5.01 s, sys: 110 ms, total: 5.12 s
Wall time: 1.32 s
