In [1]:
import sys
import copy

import torch
from torch import nn, optim
from torch.utils import data
from torchvision import datasets, transforms

In [2]:
# sys.path.append("/home/matthias/Documents/EmbeddedAI/deep-microcompression/")
sys.path.append("../../")

from development import (
    Sequential,
    Conv2d,
    Linear,
    ReLU,
    MaxPool2d,
    Flatten
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
lenet5_file = f"lenet5_state_dict_{DEVICE}.pth"

LUCKY_NUMBER = 25
torch.manual_seed(LUCKY_NUMBER)
torch.random.manual_seed(LUCKY_NUMBER)
torch.cuda.manual_seed(LUCKY_NUMBER)


In [4]:
DEVICE

'cuda'

In [5]:
data_transform = transforms.Compose([
    transforms.RandomCrop((24, 24)),
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

mnist_train_dataset = datasets.MNIST("./datasets", train=True, download=True, transform=data_transform)
mnist_test_dataset = datasets.MNIST("./datasets", train=False, download=True, transform=data_transform)

mnist_train_loader = data.DataLoader(mnist_train_dataset, batch_size=32, shuffle=True)
mnist_test_loader = data.DataLoader(mnist_test_dataset, batch_size=32)


In [6]:
lenet5_model = Sequential(
    Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=0, bias=True),
    ReLU(),
    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0, bias=True),
    ReLU(),
    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Flatten(),

    Linear(in_features=16*5*5, out_features=84, bias=True),
    ReLU(),
    Linear(in_features=84, out_features=10, bias=True)
).to(DEVICE)

accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()




In [7]:
try:
    # raise RuntimeError
    lenet5_model.load_state_dict(torch.load(lenet5_file, weights_only=True))
    
except (RuntimeError, FileNotFoundError) as e:
    
    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(lenet5_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    lenet5_model.fit(
        mnist_train_loader, 15, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        device=DEVICE
    )
    torch.save(lenet5_model.state_dict(), lenet5_file)
    

In [8]:
original_acc = lenet5_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)
# original_size = lenet5_model.get_size_in_bits()//8
# print(f"The original model accuracy is {original_acc*100:.2f}% with size {original_size} bytes.")

# 

100%|██████████| 313/313 [00:05<00:00, 59.75it/s]


## Original Model


In [9]:
lenet5_model.cpu()

lenet5_mcu_model = copy.deepcopy(lenet5_model)

original_acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
original_size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The original model accuracy is {original_acc*100:.2f}% with size {original_size} bytes.")
# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:06<00:00, 47.86it/s]

The original model accuracy is 99.11% with size 148424 bytes.





## Pruned Model 

### sparsity_per_layer = 0.1

In [None]:
sparsity_per_layer = 0.1
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:06<00:00, 49.05it/s]


The pruned model with sparsity 0.1 accuracy is 95.12%.


  7%|▋         | 1/15 [01:05<15:17, 65.55s/it]

epoch    0 | train loss 0.0049 | validation loss 0.0035 | train acc 0.9543 | validation acc 0.9692


 13%|█▎        | 2/15 [02:12<14:21, 66.25s/it]

epoch    1 | train loss 0.0044 | validation loss 0.0035 | train acc 0.9607 | validation acc 0.9685


### sparsity_per_layer = 0.2

In [None]:
sparsity_per_layer = 0.2
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:07<00:00, 40.20it/s]


The pruned model with sparsity 0.2 accuracy is 94.65%.


  7%|▋         | 1/15 [01:13<17:10, 73.57s/it]

epoch    0 | train loss 0.0048 | validation loss 0.0034 | train acc 0.9542 | validation acc 0.9669


 13%|█▎        | 2/15 [02:28<16:08, 74.53s/it]

epoch    1 | train loss 0.0043 | validation loss 0.0035 | train acc 0.9609 | validation acc 0.9668


 20%|██        | 3/15 [03:43<14:56, 74.72s/it]

epoch    2 | train loss 0.0042 | validation loss 0.0033 | train acc 0.9625 | validation acc 0.9683


 27%|██▋       | 4/15 [04:57<13:40, 74.55s/it]

epoch    3 | train loss 0.0042 | validation loss 0.0035 | train acc 0.9629 | validation acc 0.9690


 33%|███▎      | 5/15 [06:13<12:27, 74.73s/it]

epoch    4 | train loss 0.0040 | validation loss 0.0034 | train acc 0.9650 | validation acc 0.9692


 40%|████      | 6/15 [07:25<11:05, 73.93s/it]

epoch    5 | train loss 0.0039 | validation loss 0.0032 | train acc 0.9666 | validation acc 0.9704


 47%|████▋     | 7/15 [08:36<09:43, 72.91s/it]

epoch    6 | train loss 0.0037 | validation loss 0.0046 | train acc 0.9684 | validation acc 0.9567


 53%|█████▎    | 8/15 [09:47<08:27, 72.47s/it]

epoch    7 | train loss 0.0038 | validation loss 0.0040 | train acc 0.9681 | validation acc 0.9660


 60%|██████    | 9/15 [11:02<07:19, 73.21s/it]

epoch    8 | train loss 0.0037 | validation loss 0.0043 | train acc 0.9688 | validation acc 0.9638


 67%|██████▋   | 10/15 [12:15<06:05, 73.19s/it]

epoch    9 | train loss 0.0022 | validation loss 0.0018 | train acc 0.9809 | validation acc 0.9824


 73%|███████▎  | 11/15 [13:26<04:49, 72.38s/it]

epoch   10 | train loss 0.0019 | validation loss 0.0019 | train acc 0.9833 | validation acc 0.9838


 80%|████████  | 12/15 [14:38<03:36, 72.30s/it]

epoch   11 | train loss 0.0017 | validation loss 0.0018 | train acc 0.9848 | validation acc 0.9831


 87%|████████▋ | 13/15 [15:54<02:26, 73.34s/it]

epoch   12 | train loss 0.0016 | validation loss 0.0016 | train acc 0.9851 | validation acc 0.9854


 93%|█████████▎| 14/15 [17:08<01:13, 73.74s/it]

epoch   13 | train loss 0.0015 | validation loss 0.0016 | train acc 0.9857 | validation acc 0.9841


100%|██████████| 15/15 [18:22<00:00, 73.53s/it]


epoch   14 | train loss 0.0015 | validation loss 0.0016 | train acc 0.9859 | validation acc 0.9860


100%|██████████| 313/313 [00:07<00:00, 40.27it/s]

The pruned model with sparsity 0.2 accuracy is 98.57%.





### sparsity_per_layer = 0.3

In [None]:
sparsity_per_layer = 0.3
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

100%|██████████| 313/313 [00:07<00:00, 44.24it/s]


The pruned model with sparsity 0.3 accuracy is 83.71%.


  7%|▋         | 1/15 [01:13<17:10, 73.59s/it]

epoch    0 | train loss 0.0050 | validation loss 0.0041 | train acc 0.9537 | validation acc 0.9589


 13%|█▎        | 2/15 [02:24<15:36, 72.06s/it]

epoch    1 | train loss 0.0044 | validation loss 0.0039 | train acc 0.9590 | validation acc 0.9622


 20%|██        | 3/15 [03:37<14:27, 72.27s/it]

epoch    2 | train loss 0.0044 | validation loss 0.0034 | train acc 0.9602 | validation acc 0.9672


 27%|██▋       | 4/15 [04:50<13:19, 72.69s/it]

epoch    3 | train loss 0.0042 | validation loss 0.0041 | train acc 0.9629 | validation acc 0.9667


 33%|███▎      | 5/15 [06:03<12:08, 72.82s/it]

epoch    4 | train loss 0.0042 | validation loss 0.0035 | train acc 0.9634 | validation acc 0.9667


 40%|████      | 6/15 [07:16<10:56, 72.89s/it]

epoch    5 | train loss 0.0041 | validation loss 0.0037 | train acc 0.9645 | validation acc 0.9678


 47%|████▋     | 7/15 [08:28<09:39, 72.46s/it]

epoch    6 | train loss 0.0025 | validation loss 0.0021 | train acc 0.9777 | validation acc 0.9817


 53%|█████▎    | 8/15 [09:38<08:23, 71.96s/it]

epoch    7 | train loss 0.0019 | validation loss 0.0018 | train acc 0.9823 | validation acc 0.9829


 60%|██████    | 9/15 [10:50<07:10, 71.77s/it]

epoch    8 | train loss 0.0018 | validation loss 0.0017 | train acc 0.9825 | validation acc 0.9831


 67%|██████▋   | 10/15 [12:03<06:01, 72.28s/it]

epoch    9 | train loss 0.0017 | validation loss 0.0017 | train acc 0.9839 | validation acc 0.9835


 73%|███████▎  | 11/15 [13:15<04:48, 72.11s/it]

epoch   10 | train loss 0.0016 | validation loss 0.0016 | train acc 0.9849 | validation acc 0.9859


 80%|████████  | 12/15 [14:27<03:36, 72.08s/it]

epoch   11 | train loss 0.0015 | validation loss 0.0015 | train acc 0.9858 | validation acc 0.9850


 87%|████████▋ | 13/15 [15:39<02:24, 72.01s/it]

epoch   12 | train loss 0.0015 | validation loss 0.0016 | train acc 0.9856 | validation acc 0.9842


 93%|█████████▎| 14/15 [16:52<01:12, 72.34s/it]

epoch   13 | train loss 0.0014 | validation loss 0.0016 | train acc 0.9870 | validation acc 0.9853


100%|██████████| 15/15 [18:05<00:00, 72.35s/it]


epoch   14 | train loss 0.0014 | validation loss 0.0015 | train acc 0.9861 | validation acc 0.9866


100%|██████████| 313/313 [00:07<00:00, 42.11it/s]

The pruned model with sparsity 0.3 accuracy is 98.42%.





### sparsity_per_layer = 0.4

In [None]:
sparsity_per_layer = 0.4
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

100%|██████████| 313/313 [00:08<00:00, 38.99it/s]


The pruned model with sparsity 0.4 accuracy is 74.37%.


  7%|▋         | 1/15 [01:13<17:09, 73.52s/it]

epoch    0 | train loss 0.0049 | validation loss 0.0030 | train acc 0.9531 | validation acc 0.9703


 13%|█▎        | 2/15 [02:25<15:46, 72.82s/it]

epoch    1 | train loss 0.0046 | validation loss 0.0036 | train acc 0.9591 | validation acc 0.9669


 20%|██        | 3/15 [03:38<14:32, 72.67s/it]

epoch    2 | train loss 0.0044 | validation loss 0.0040 | train acc 0.9599 | validation acc 0.9636


 27%|██▋       | 4/15 [04:48<13:10, 71.87s/it]

epoch    3 | train loss 0.0043 | validation loss 0.0031 | train acc 0.9616 | validation acc 0.9704


 33%|███▎      | 5/15 [06:03<12:07, 72.73s/it]

epoch    4 | train loss 0.0023 | validation loss 0.0020 | train acc 0.9777 | validation acc 0.9800


 40%|████      | 6/15 [07:17<10:59, 73.23s/it]

epoch    5 | train loss 0.0019 | validation loss 0.0018 | train acc 0.9820 | validation acc 0.9822


 47%|████▋     | 7/15 [08:30<09:46, 73.33s/it]

epoch    6 | train loss 0.0018 | validation loss 0.0017 | train acc 0.9834 | validation acc 0.9832


 53%|█████▎    | 8/15 [09:45<08:34, 73.56s/it]

epoch    7 | train loss 0.0016 | validation loss 0.0016 | train acc 0.9844 | validation acc 0.9857


 60%|██████    | 9/15 [10:59<07:23, 73.92s/it]

epoch    8 | train loss 0.0016 | validation loss 0.0015 | train acc 0.9851 | validation acc 0.9852


 67%|██████▋   | 10/15 [12:14<06:10, 74.04s/it]

epoch    9 | train loss 0.0015 | validation loss 0.0015 | train acc 0.9855 | validation acc 0.9851


 73%|███████▎  | 11/15 [13:27<04:55, 73.90s/it]

epoch   10 | train loss 0.0015 | validation loss 0.0015 | train acc 0.9858 | validation acc 0.9863


 80%|████████  | 12/15 [14:39<03:39, 73.21s/it]

epoch   11 | train loss 0.0014 | validation loss 0.0016 | train acc 0.9855 | validation acc 0.9850


 87%|████████▋ | 13/15 [15:53<02:27, 73.67s/it]

epoch   12 | train loss 0.0013 | validation loss 0.0013 | train acc 0.9871 | validation acc 0.9867


 93%|█████████▎| 14/15 [17:06<01:13, 73.25s/it]

epoch   13 | train loss 0.0013 | validation loss 0.0015 | train acc 0.9874 | validation acc 0.9833


100%|██████████| 15/15 [18:18<00:00, 73.25s/it]


epoch   14 | train loss 0.0013 | validation loss 0.0016 | train acc 0.9868 | validation acc 0.9853


100%|██████████| 313/313 [00:07<00:00, 39.29it/s]

The pruned model with sparsity 0.4 accuracy is 98.47%.





### sparsity_per_layer = 0.5

In [None]:
sparsity_per_layer = 0.5
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:07<00:00, 40.29it/s]


The pruned model with sparsity 0.5 accuracy is 48.18%.


  7%|▋         | 1/15 [01:12<16:59, 72.79s/it]

epoch    0 | train loss 0.0062 | validation loss 0.0051 | train acc 0.9398 | validation acc 0.9497


 13%|█▎        | 2/15 [02:27<15:57, 73.69s/it]

epoch    1 | train loss 0.0051 | validation loss 0.0044 | train acc 0.9513 | validation acc 0.9584


 20%|██        | 3/15 [03:42<14:55, 74.62s/it]

epoch    2 | train loss 0.0046 | validation loss 0.0046 | train acc 0.9561 | validation acc 0.9591


 27%|██▋       | 4/15 [04:55<13:34, 74.01s/it]

epoch    3 | train loss 0.0047 | validation loss 0.0035 | train acc 0.9571 | validation acc 0.9668


 33%|███▎      | 5/15 [06:07<12:12, 73.20s/it]

epoch    4 | train loss 0.0044 | validation loss 0.0041 | train acc 0.9593 | validation acc 0.9598


 40%|████      | 6/15 [07:18<10:50, 72.25s/it]

epoch    5 | train loss 0.0044 | validation loss 0.0049 | train acc 0.9593 | validation acc 0.9546


 47%|████▋     | 7/15 [08:27<09:29, 71.21s/it]

epoch    6 | train loss 0.0044 | validation loss 0.0037 | train acc 0.9601 | validation acc 0.9652


 53%|█████▎    | 8/15 [09:35<08:11, 70.18s/it]

epoch    7 | train loss 0.0027 | validation loss 0.0025 | train acc 0.9746 | validation acc 0.9763


 60%|██████    | 9/15 [10:43<06:58, 69.71s/it]

epoch    8 | train loss 0.0023 | validation loss 0.0023 | train acc 0.9786 | validation acc 0.9782


 67%|██████▋   | 10/15 [11:54<05:50, 70.06s/it]

epoch    9 | train loss 0.0021 | validation loss 0.0021 | train acc 0.9797 | validation acc 0.9804


 73%|███████▎  | 11/15 [13:05<04:40, 70.24s/it]

epoch   10 | train loss 0.0020 | validation loss 0.0020 | train acc 0.9806 | validation acc 0.9825


 80%|████████  | 12/15 [14:16<03:31, 70.40s/it]

epoch   11 | train loss 0.0019 | validation loss 0.0019 | train acc 0.9817 | validation acc 0.9825


 87%|████████▋ | 13/15 [15:25<02:19, 69.99s/it]

epoch   12 | train loss 0.0019 | validation loss 0.0018 | train acc 0.9818 | validation acc 0.9828


 93%|█████████▎| 14/15 [16:32<01:09, 69.11s/it]

epoch   13 | train loss 0.0017 | validation loss 0.0020 | train acc 0.9832 | validation acc 0.9818


100%|██████████| 15/15 [17:40<00:00, 70.70s/it]


epoch   14 | train loss 0.0018 | validation loss 0.0018 | train acc 0.9831 | validation acc 0.9812


100%|██████████| 313/313 [00:07<00:00, 44.71it/s]

The pruned model with sparsity 0.5 accuracy is 98.21%.





### sparsity_per_layer = 0.6

In [None]:
sparsity_per_layer = 0.6
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

100%|██████████| 313/313 [00:07<00:00, 41.80it/s]


The pruned model with sparsity 0.6 accuracy is 33.56%.


  7%|▋         | 1/15 [01:10<16:32, 70.86s/it]

epoch    0 | train loss 0.0068 | validation loss 0.0056 | train acc 0.9326 | validation acc 0.9477


 13%|█▎        | 2/15 [02:19<15:05, 69.68s/it]

epoch    1 | train loss 0.0052 | validation loss 0.0061 | train acc 0.9502 | validation acc 0.9404


 20%|██        | 3/15 [03:27<13:45, 68.77s/it]

epoch    2 | train loss 0.0049 | validation loss 0.0047 | train acc 0.9525 | validation acc 0.9564


 27%|██▋       | 4/15 [04:35<12:35, 68.64s/it]

epoch    3 | train loss 0.0047 | validation loss 0.0044 | train acc 0.9560 | validation acc 0.9595


 33%|███▎      | 5/15 [05:45<11:30, 69.02s/it]

epoch    4 | train loss 0.0046 | validation loss 0.0063 | train acc 0.9565 | validation acc 0.9412


 40%|████      | 6/15 [06:53<10:18, 68.73s/it]

epoch    5 | train loss 0.0048 | validation loss 0.0041 | train acc 0.9561 | validation acc 0.9621


 47%|████▋     | 7/15 [08:03<09:11, 69.00s/it]

epoch    6 | train loss 0.0045 | validation loss 0.0066 | train acc 0.9580 | validation acc 0.9449


 53%|█████▎    | 8/15 [09:13<08:04, 69.28s/it]

epoch    7 | train loss 0.0046 | validation loss 0.0039 | train acc 0.9583 | validation acc 0.9640


 60%|██████    | 9/15 [10:22<06:56, 69.43s/it]

epoch    8 | train loss 0.0045 | validation loss 0.0049 | train acc 0.9577 | validation acc 0.9543


 67%|██████▋   | 10/15 [11:29<05:43, 68.65s/it]

epoch    9 | train loss 0.0045 | validation loss 0.0041 | train acc 0.9587 | validation acc 0.9638


 73%|███████▎  | 11/15 [12:40<04:36, 69.17s/it]

epoch   10 | train loss 0.0043 | validation loss 0.0043 | train acc 0.9608 | validation acc 0.9632


 80%|████████  | 12/15 [13:48<03:26, 68.90s/it]

epoch   11 | train loss 0.0030 | validation loss 0.0024 | train acc 0.9720 | validation acc 0.9761


 87%|████████▋ | 13/15 [14:57<02:18, 69.04s/it]

epoch   12 | train loss 0.0026 | validation loss 0.0024 | train acc 0.9750 | validation acc 0.9768


 93%|█████████▎| 14/15 [16:04<01:08, 68.48s/it]

epoch   13 | train loss 0.0024 | validation loss 0.0024 | train acc 0.9772 | validation acc 0.9765


100%|██████████| 15/15 [17:12<00:00, 68.84s/it]


epoch   14 | train loss 0.0023 | validation loss 0.0022 | train acc 0.9788 | validation acc 0.9790


100%|██████████| 313/313 [00:07<00:00, 44.43it/s]

The pruned model with sparsity 0.6 accuracy is 97.82%.





### sparsity_per_layer = 0.7

In [None]:
sparsity_per_layer = 0.7
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

100%|██████████| 313/313 [00:07<00:00, 43.63it/s]


The pruned model with sparsity 0.7 accuracy is 11.02%.


  7%|▋         | 1/15 [01:10<16:30, 70.73s/it]

epoch    0 | train loss 0.0094 | validation loss 0.0059 | train acc 0.9058 | validation acc 0.9392


 13%|█▎        | 2/15 [02:21<15:21, 70.91s/it]

epoch    1 | train loss 0.0063 | validation loss 0.0051 | train acc 0.9372 | validation acc 0.9464


 20%|██        | 3/15 [03:34<14:21, 71.78s/it]

epoch    2 | train loss 0.0058 | validation loss 0.0053 | train acc 0.9436 | validation acc 0.9517


 27%|██▋       | 4/15 [04:45<13:03, 71.26s/it]

epoch    3 | train loss 0.0056 | validation loss 0.0052 | train acc 0.9462 | validation acc 0.9506


 33%|███▎      | 5/15 [05:55<11:47, 70.79s/it]

epoch    4 | train loss 0.0054 | validation loss 0.0049 | train acc 0.9485 | validation acc 0.9540


 40%|████      | 6/15 [07:08<10:45, 71.71s/it]

epoch    5 | train loss 0.0053 | validation loss 0.0055 | train acc 0.9487 | validation acc 0.9461


 47%|████▋     | 7/15 [08:18<09:28, 71.04s/it]

epoch    6 | train loss 0.0053 | validation loss 0.0046 | train acc 0.9486 | validation acc 0.9557


 53%|█████▎    | 8/15 [09:29<08:17, 71.01s/it]

epoch    7 | train loss 0.0050 | validation loss 0.0040 | train acc 0.9510 | validation acc 0.9613


 60%|██████    | 9/15 [10:38<07:02, 70.45s/it]

epoch    8 | train loss 0.0050 | validation loss 0.0049 | train acc 0.9515 | validation acc 0.9540


 67%|██████▋   | 10/15 [11:45<05:47, 69.55s/it]

epoch    9 | train loss 0.0048 | validation loss 0.0044 | train acc 0.9537 | validation acc 0.9563


 73%|███████▎  | 11/15 [12:54<04:37, 69.30s/it]

epoch   10 | train loss 0.0049 | validation loss 0.0043 | train acc 0.9535 | validation acc 0.9572


 80%|████████  | 12/15 [14:03<03:27, 69.17s/it]

epoch   11 | train loss 0.0036 | validation loss 0.0033 | train acc 0.9644 | validation acc 0.9677


 87%|████████▋ | 13/15 [15:14<02:19, 69.84s/it]

epoch   12 | train loss 0.0032 | validation loss 0.0031 | train acc 0.9686 | validation acc 0.9714


 93%|█████████▎| 14/15 [16:23<01:09, 69.56s/it]

epoch   13 | train loss 0.0031 | validation loss 0.0028 | train acc 0.9704 | validation acc 0.9725


100%|██████████| 15/15 [17:34<00:00, 70.27s/it]


epoch   14 | train loss 0.0030 | validation loss 0.0026 | train acc 0.9712 | validation acc 0.9782


100%|██████████| 313/313 [00:06<00:00, 46.74it/s]

The pruned model with sparsity 0.7 accuracy is 97.26%.





### sparsity_per_layer = 0.8

In [None]:
sparsity_per_layer = 0.8
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


  0%|          | 0/313 [00:00<?, ?it/s]

100%|██████████| 313/313 [00:07<00:00, 43.86it/s]


The pruned model with sparsity 0.8 accuracy is 13.45%.


  7%|▋         | 1/15 [01:14<17:22, 74.49s/it]

epoch    0 | train loss 0.0113 | validation loss 0.0076 | train acc 0.8859 | validation acc 0.9241


 13%|█▎        | 2/15 [02:26<15:50, 73.10s/it]

epoch    1 | train loss 0.0080 | validation loss 0.0068 | train acc 0.9196 | validation acc 0.9269


 20%|██        | 3/15 [03:37<14:26, 72.18s/it]

epoch    2 | train loss 0.0074 | validation loss 0.0062 | train acc 0.9272 | validation acc 0.9376


 27%|██▋       | 4/15 [04:47<13:04, 71.34s/it]

epoch    3 | train loss 0.0068 | validation loss 0.0057 | train acc 0.9332 | validation acc 0.9440


 33%|███▎      | 5/15 [05:57<11:46, 70.66s/it]

epoch    4 | train loss 0.0068 | validation loss 0.0066 | train acc 0.9339 | validation acc 0.9350


 40%|████      | 6/15 [07:07<10:36, 70.68s/it]

epoch    5 | train loss 0.0067 | validation loss 0.0065 | train acc 0.9348 | validation acc 0.9371


 47%|████▋     | 7/15 [08:17<09:23, 70.44s/it]

epoch    6 | train loss 0.0064 | validation loss 0.0053 | train acc 0.9370 | validation acc 0.9470


 53%|█████▎    | 8/15 [09:29<08:15, 70.76s/it]

epoch    7 | train loss 0.0065 | validation loss 0.0056 | train acc 0.9367 | validation acc 0.9457


 53%|█████▎    | 8/15 [09:54<08:40, 74.31s/it]


KeyboardInterrupt: 

### sparsity_per_layer = 0.9

In [None]:
sparsity_per_layer = 0.9
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:07<00:00, 43.58it/s]


The pruned model with sparsity 0.9 accuracy is 9.22%.
The accurancy drop is 89.89% and size drop is 0.00%.
tensor([[-0.0105, -0.0856, -0.0056, -0.1240,  0.0680, -0.1196, -0.1996,  0.1272,
          0.3142, -0.0742]])


## Dynamic Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:06<00:00, 46.58it/s]


The dynamic quantized per tensor model with bitwidth 8 accuracy is 99.18%.
The accurancy drop is -0.07% and size drop is 74.75%.
tensor([[ 11.1611,  -7.6662,  -1.7230,  -9.1078, -10.8982,  -5.0437,  -9.2423,
          -5.6716,  -1.8833,  -7.8008]])


### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:06<00:00, 44.75it/s]


The dynamic quantized per tensor model with bitwidth 4 accuracy is 99.16%.
The accurancy drop is -0.05% and size drop is 87.37%.
tensor([[ 11.1611,  -7.6662,  -1.7230,  -9.1078, -10.8982,  -5.0437,  -9.2423,
          -5.6716,  -1.8833,  -7.8008]])


## Dynamic Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


100%|██████████| 313/313 [00:07<00:00, 44.19it/s]


The dynamic quantized per channel model with bitwidth 8 accuracy is 99.04%.
The accurancy drop is 0.07% and size drop is 74.45%.
tensor([[ 11.1611,  -7.6662,  -1.7230,  -9.1078, -10.8982,  -5.0437,  -9.2423,
          -5.6716,  -1.8833,  -7.8008]])


UnboundLocalError: cannot access local variable 'layer_def' where it is not associated with a value

## Static Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



## Static Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



In [None]:
# lenet5_model.cpu()

# # PRUNED MODEL
# pruned_sparsity = [i/10 for i in range(10)]
# for sparsity in pruned_sparsity:
#     pruned_model = lenet5_model.prune_channel(sparsity)
#     acc = pruned_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = pruned_model.get_size_in_bits()//8
#     print(f"The pruned model with sparsity {sparsity} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

# quantization_bitwidth = [i for i in range(8, 0, -1)]

# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_tensor_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
#     acc = dynamic_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_channel_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
#     acc = dynamic_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_tensor_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_channel_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

