In [16]:
import sys
import copy

import torch
from torch import nn, optim
from torch.utils import data
from torchvision import datasets, transforms

In [17]:
# sys.path.append("/home/matthias/Documents/EmbeddedAI/deep-microcompression/")
sys.path.append("../../")

from development import (
    Sequential,
    Conv2d,
    Linear,
    ReLU,
    MaxPool2d,
    Flatten
)

In [18]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
lenet5_file = f"lenet5_state_dict_{DEVICE}.pth"

LUCKY_NUMBER = 25
torch.manual_seed(LUCKY_NUMBER)
torch.random.manual_seed(LUCKY_NUMBER)
torch.cuda.manual_seed(LUCKY_NUMBER)


In [19]:
DEVICE

'cuda'

In [20]:
data_transform = transforms.Compose([
    transforms.RandomCrop((24, 24)),
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

mnist_train_dataset = datasets.MNIST("./datasets", train=True, download=True, transform=data_transform)
mnist_test_dataset = datasets.MNIST("./datasets", train=False, download=True, transform=data_transform)

mnist_train_loader = data.DataLoader(mnist_train_dataset, batch_size=32, shuffle=True)
mnist_test_loader = data.DataLoader(mnist_test_dataset, batch_size=32)


In [21]:
lenet5_model = Sequential(
    Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=0, bias=True),
    ReLU(),
    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0, bias=True),
    ReLU(),
    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Flatten(),

    Linear(in_features=16*5*5, out_features=84, bias=True),
    ReLU(),
    Linear(in_features=84, out_features=10, bias=True)
).to(DEVICE)

accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()




In [22]:
try:
    # raise RuntimeError
    lenet5_model.load_state_dict(torch.load(lenet5_file, weights_only=True))
    
except (RuntimeError, FileNotFoundError):

    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(lenet5_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    lenet5_model.fit(
        mnist_train_loader, 15, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        device=DEVICE
    )
    torch.save(lenet5_model.state_dict(), lenet5_file)
    

In [23]:
original_acc = lenet5_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)
original_size = lenet5_model.get_size_in_bits()//8
print(f"The original model accuracy is {original_acc*100:.2f}% with size {original_size} bytes.")



  0%|          | 0/313 [00:00<?, ?it/s]

The original model accuracy is 100.00% with size 148424 bytes.





## Original Model


In [24]:
lenet5_model.cpu()

lenet5_mcu_model = copy.deepcopy(lenet5_model)

original_acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
original_size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The original model accuracy is {original_acc*100:.2f}% with size {original_size} bytes.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]


The original model accuracy is 100.00% with size 148424 bytes.
tensor([[ -7.0936,   7.9878,  -6.0555,  -9.2409,  -2.5021,  -3.0447,  -8.0069,
          -6.5767,  -2.5293, -10.4348]])


## Pruned Model 

### sparsity_per_layer = 0.1

In [28]:
sparsity_per_layer = 0.1
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]




The pruned model with sparsity 0.1 accuracy is 96.88%.
The accurancy drop is 3.12% and size drop is 21.90%.
tensor([[-6.8226,  6.4594, -5.5754, -9.7343,  0.9674, -4.1610, -6.0104, -7.8067,
         -2.1789, -7.4688]])


### sparsity_per_layer = 0.2

In [31]:
sparsity_per_layer = 0.2
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]


The pruned model with sparsity 0.2 accuracy is 96.88%.
The accurancy drop is 3.12% and size drop is 34.54%.
tensor([[ -6.3855,   6.5384,  -5.1989, -10.1023,   0.5934,  -4.6097,  -5.1582,
          -7.9441,  -2.1744,  -8.1987]])


### sparsity_per_layer = 0.3

In [30]:
sparsity_per_layer = 0.3
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]


The pruned model with sparsity 0.3 accuracy is 87.50%.
The accurancy drop is 12.50% and size drop is 51.22%.
tensor([[ -2.8619,   0.3677,  -7.5608, -13.8715,  -0.3558,   0.9099,   0.9218,
          -7.6484,  -2.0629,  -6.2257]])


### sparsity_per_layer = 0.4

In [32]:
sparsity_per_layer = 0.4
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]




The pruned model with sparsity 0.4 accuracy is 84.38%.
The accurancy drop is 15.62% and size drop is 61.80%.
tensor([[  0.1158,  -0.4338,  -9.8758, -11.9314,  -2.4157,   0.5015,   0.3331,
          -6.9573,  -1.9771,  -5.5160]])


### sparsity_per_layer = 0.5

In [33]:
sparsity_per_layer = 0.5
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]


The pruned model with sparsity 0.5 accuracy is 53.12%.
The accurancy drop is 46.88% and size drop is 74.24%.
tensor([[-0.4509, -5.8119, -5.9063, -4.8622, -2.8870,  0.5381,  0.7379, -5.9212,
         -1.1492, -1.9270]])


### sparsity_per_layer = 0.6

In [34]:
sparsity_per_layer = 0.6
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

The pruned model with sparsity 0.6 accuracy is 56.25%.
The accurancy drop is 43.75% and size drop is 81.29%.
tensor([[-1.2993, -4.6259, -5.7292, -2.7742, -2.3246,  0.2928,  1.6027, -6.0664,
         -1.4219, -1.5903]])





### sparsity_per_layer = 0.7

In [35]:
sparsity_per_layer = 0.7
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

The pruned model with sparsity 0.7 accuracy is 9.38%.
The accurancy drop is 90.62% and size drop is 89.98%.
tensor([[ 0.2819, -1.0298, -2.0192, -0.5093, -3.0721, -0.1845, -0.5431, -1.7243,
         -0.5745, -0.7265]])





### sparsity_per_layer = 0.8

In [37]:
sparsity_per_layer = 0.8
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

The pruned model with sparsity 0.8 accuracy is 15.62%.
The accurancy drop is 84.38% and size drop is 94.20%.
tensor([[-0.2637, -1.9515, -2.3809, -0.2891, -2.7938,  0.1997, -0.4580, -1.9208,
          1.2074,  0.3487]])





### sparsity_per_layer = 0.9

In [39]:
sparsity_per_layer = 0.9
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]




The pruned model with sparsity 0.9 accuracy is 0.00%.
The accurancy drop is 100.00% and size drop is 98.28%.
tensor([[-0.0228, -0.0563, -0.0336, -0.0563,  0.0521, -0.1073, -0.2085,  0.1049,
          0.3267, -0.0480]])


## Dynamic Quantized Per Tensor

### 8 bit quantization

In [40]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]


The dynamic quantized per tensor model with bitwidth 8 accuracy is 100.00%.
The accurancy drop is 0.00% and size drop is 74.75%.
tensor([[ -7.1223,   8.0149,  -5.9762,  -9.1589,  -2.5781,  -3.0766,  -7.8516,
          -6.5813,  -2.4182, -10.5235]])


### 4 bit quantization

In [43]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]




The dynamic quantized per tensor model with bitwidth 4 accuracy is 100.00%.
The accurancy drop is 0.00% and size drop is 87.37%.
tensor([[-6.4828,  8.1723, -5.1209, -6.9938, -1.5367, -3.8607, -8.3492, -5.2086,
         -1.9306, -7.5333]])


## Dynamic Quantized Per Channel

### 8 bit quantization

In [16]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]


The dynamic quantized per channel model with bitwidth 8 accuracy is 100.00%.
The accurancy drop is -3.12% and size drop is 74.45%.
tensor([[ -6.6999, -11.5344,  -7.1422,   4.7903, -12.2799,   3.7702, -11.3912,
          -6.2070,  -0.5821,  -2.5629]])


UnboundLocalError: cannot access local variable 'layer_def' where it is not associated with a value

## Static Quantized Per Tensor

### 8 bit quantization

In [44]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



  0%|          | 0/313 [00:00<?, ?it/s]


The static quantized per tensor model with bitwidth 8 accuracy is 100.00%.
The accurancy drop is 0.00% and size drop is 74.74%.
tensor([[-34,  63, -27, -46,  -5,  -8, -39, -31,  -3, -55]], dtype=torch.int8)


### 4 bit quantization

In [45]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]


The static quantized per tensor model with bitwidth 4 accuracy is 100.00%.
The accurancy drop is 0.00% and size drop is 87.35%.
tensor([[-2,  6, -2, -4,  1, -1, -2, -1, -1, -3]], dtype=torch.int8)


## Static Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



In [None]:
# lenet5_model.cpu()

# # PRUNED MODEL
# pruned_sparsity = [i/10 for i in range(10)]
# for sparsity in pruned_sparsity:
#     pruned_model = lenet5_model.prune_channel(sparsity)
#     acc = pruned_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = pruned_model.get_size_in_bits()//8
#     print(f"The pruned model with sparsity {sparsity} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

# quantization_bitwidth = [i for i in range(8, 0, -1)]

# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_tensor_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
#     acc = dynamic_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_channel_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
#     acc = dynamic_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_tensor_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_channel_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")



  0%|          | 0/313 [00:00<?, ?it/s]

100%|██████████| 313/313 [00:07<00:00, 39.82it/s]


The pruned model with sparsity 0.0 accuracy is 99.08%.
The accurancy drop is 0.09% and size drop is 0.00%.


100%|██████████| 313/313 [00:07<00:00, 44.52it/s]


The pruned model with sparsity 0.1 accuracy is 95.39%.
The accurancy drop is 3.78% and size drop is 21.90%.


100%|██████████| 313/313 [00:06<00:00, 50.05it/s]


The pruned model with sparsity 0.2 accuracy is 93.77%.
The accurancy drop is 5.40% and size drop is 34.54%.


100%|██████████| 313/313 [00:06<00:00, 48.69it/s]


The pruned model with sparsity 0.3 accuracy is 77.89%.
The accurancy drop is 21.28% and size drop is 51.22%.


100%|██████████| 313/313 [00:06<00:00, 46.19it/s]


The pruned model with sparsity 0.4 accuracy is 70.16%.
The accurancy drop is 29.01% and size drop is 61.80%.


100%|██████████| 313/313 [00:06<00:00, 49.59it/s]


The pruned model with sparsity 0.5 accuracy is 49.64%.
The accurancy drop is 49.53% and size drop is 74.24%.


100%|██████████| 313/313 [00:06<00:00, 48.34it/s]


The pruned model with sparsity 0.6 accuracy is 38.89%.
The accurancy drop is 60.28% and size drop is 81.29%.


100%|██████████| 313/313 [00:06<00:00, 49.55it/s]


The pruned model with sparsity 0.7 accuracy is 6.58%.
The accurancy drop is 92.59% and size drop is 89.98%.


100%|██████████| 313/313 [00:07<00:00, 42.26it/s]


The pruned model with sparsity 0.8 accuracy is 13.32%.
The accurancy drop is 85.85% and size drop is 94.20%.


100%|██████████| 313/313 [00:06<00:00, 51.71it/s]


The pruned model with sparsity 0.9 accuracy is 9.38%.
The accurancy drop is 89.79% and size drop is 98.28%.


100%|██████████| 313/313 [00:07<00:00, 44.03it/s]


The dynamic quantized per tensor model with bitwidth 8 accuracy is 99.11%.
The accurancy drop is 0.06% and size drop is 74.75%.


100%|██████████| 313/313 [00:06<00:00, 46.97it/s]


The dynamic quantized per tensor model with bitwidth 7 accuracy is 99.13%.
The accurancy drop is 0.04% and size drop is 74.75%.


100%|██████████| 313/313 [00:07<00:00, 41.61it/s]


The dynamic quantized per tensor model with bitwidth 6 accuracy is 99.18%.
The accurancy drop is -0.01% and size drop is 74.75%.


100%|██████████| 313/313 [00:06<00:00, 48.17it/s]


The dynamic quantized per tensor model with bitwidth 5 accuracy is 99.04%.
The accurancy drop is 0.13% and size drop is 74.75%.


100%|██████████| 313/313 [00:07<00:00, 43.35it/s]


The dynamic quantized per tensor model with bitwidth 4 accuracy is 98.80%.
The accurancy drop is 0.37% and size drop is 87.37%.


100%|██████████| 313/313 [00:06<00:00, 45.04it/s]


The dynamic quantized per tensor model with bitwidth 3 accuracy is 92.91%.
The accurancy drop is 6.26% and size drop is 87.37%.


100%|██████████| 313/313 [02:41<00:00,  1.94it/s] 


The dynamic quantized per tensor model with bitwidth 2 accuracy is 9.74%.
The accurancy drop is 89.43% and size drop is 93.68%.


100%|██████████| 313/313 [00:06<00:00, 49.06it/s]


The dynamic quantized per tensor model with bitwidth 1 accuracy is 9.80%.
The accurancy drop is 89.37% and size drop is 96.83%.


100%|██████████| 313/313 [00:06<00:00, 45.90it/s]


The dynamic quantized per channel model with bitwidth 8 accuracy is 99.08%.
The accurancy drop is 0.09% and size drop is 74.45%.


100%|██████████| 313/313 [00:07<00:00, 44.11it/s]


The dynamic quantized per channel model with bitwidth 7 accuracy is 99.08%.
The accurancy drop is 0.09% and size drop is 74.45%.


100%|██████████| 313/313 [00:06<00:00, 46.25it/s]


The dynamic quantized per channel model with bitwidth 6 accuracy is 99.14%.
The accurancy drop is 0.03% and size drop is 74.45%.


100%|██████████| 313/313 [00:06<00:00, 47.17it/s]


The dynamic quantized per channel model with bitwidth 5 accuracy is 98.96%.
The accurancy drop is 0.21% and size drop is 74.45%.


100%|██████████| 313/313 [00:06<00:00, 49.57it/s]


The dynamic quantized per channel model with bitwidth 4 accuracy is 98.95%.
The accurancy drop is 0.22% and size drop is 87.07%.


100%|██████████| 313/313 [00:06<00:00, 44.84it/s]


The dynamic quantized per channel model with bitwidth 3 accuracy is 94.07%.
The accurancy drop is 5.10% and size drop is 87.07%.


100%|██████████| 313/313 [00:06<00:00, 47.70it/s]


The dynamic quantized per channel model with bitwidth 2 accuracy is 16.01%.
The accurancy drop is 83.16% and size drop is 93.38%.


100%|██████████| 313/313 [00:06<00:00, 50.70it/s]


The dynamic quantized per channel model with bitwidth 1 accuracy is 9.80%.
The accurancy drop is 89.37% and size drop is 96.53%.


100%|██████████| 313/313 [00:06<00:00, 45.96it/s]


The static quantized per tensor model with bitwidth 8 accuracy is 99.17%.
The accurancy drop is 0.00% and size drop is 74.74%.


100%|██████████| 313/313 [00:07<00:00, 44.04it/s]


The static quantized per tensor model with bitwidth 7 accuracy is 99.05%.
The accurancy drop is 0.12% and size drop is 74.74%.


100%|██████████| 313/313 [00:07<00:00, 43.60it/s]


The static quantized per tensor model with bitwidth 6 accuracy is 99.06%.
The accurancy drop is 0.11% and size drop is 74.74%.


100%|██████████| 313/313 [00:07<00:00, 43.26it/s]


The static quantized per tensor model with bitwidth 5 accuracy is 98.78%.
The accurancy drop is 0.39% and size drop is 74.74%.


100%|██████████| 313/313 [00:07<00:00, 42.30it/s]


The static quantized per tensor model with bitwidth 4 accuracy is 98.00%.
The accurancy drop is 1.17% and size drop is 87.35%.


100%|██████████| 313/313 [00:07<00:00, 43.68it/s]


The static quantized per tensor model with bitwidth 3 accuracy is 59.10%.
The accurancy drop is 40.07% and size drop is 87.35%.


100%|██████████| 313/313 [00:07<00:00, 41.86it/s]


The static quantized per tensor model with bitwidth 2 accuracy is 9.80%.
The accurancy drop is 89.37% and size drop is 93.66%.


100%|██████████| 313/313 [00:07<00:00, 42.43it/s]


The static quantized per tensor model with bitwidth 1 accuracy is 9.80%.
The accurancy drop is 89.37% and size drop is 96.81%.


100%|██████████| 313/313 [00:07<00:00, 40.96it/s]


The static quantized per channel model with bitwidth 8 accuracy is 99.08%.
The accurancy drop is 0.09% and size drop is 74.43%.


100%|██████████| 313/313 [00:07<00:00, 41.59it/s]


The static quantized per channel model with bitwidth 7 accuracy is 99.08%.
The accurancy drop is 0.09% and size drop is 74.43%.


100%|██████████| 313/313 [00:07<00:00, 41.57it/s]


The static quantized per channel model with bitwidth 6 accuracy is 99.01%.
The accurancy drop is 0.16% and size drop is 74.43%.


100%|██████████| 313/313 [00:07<00:00, 41.92it/s]


The static quantized per channel model with bitwidth 5 accuracy is 98.83%.
The accurancy drop is 0.34% and size drop is 74.43%.


100%|██████████| 313/313 [00:07<00:00, 42.20it/s]


The static quantized per channel model with bitwidth 4 accuracy is 98.28%.
The accurancy drop is 0.89% and size drop is 87.05%.


100%|██████████| 313/313 [00:07<00:00, 42.39it/s]


The static quantized per channel model with bitwidth 3 accuracy is 85.02%.
The accurancy drop is 14.15% and size drop is 87.05%.


100%|██████████| 313/313 [00:07<00:00, 43.38it/s]


The static quantized per channel model with bitwidth 2 accuracy is 9.79%.
The accurancy drop is 89.38% and size drop is 93.36%.


100%|██████████| 313/313 [00:06<00:00, 44.89it/s]

The static quantized per channel model with bitwidth 1 accuracy is 9.80%.
The accurancy drop is 89.37% and size drop is 96.51%.



