In [1]:
import sys
import os
import copy
import random

from tqdm.auto import tqdm

import torch
from torch import nn, optim
from torch.utils import data
from torchvision import datasets, transforms

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# sys.path.append("/home/matthias/Documents/EmbeddedAI/deep-microcompression/")
sys.path.append("../../")

from development import (
    Sequential,
    BatchNorm2d,
    Conv2d,
    Linear,
    ReLU,
    ReLU6,
    MaxPool2d,
    Flatten, 

    EarlyStopper,

    QUANTIZATION_NONE, 
    DYNAMIC_QUANTIZATION_PER_TENSOR
)

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
lenet5_file = "lenet5_state_dict.pth"
log_compression_details_file = "lenet5_compression_log.csv"

LUCKY_NUMBER = 25
torch.manual_seed(LUCKY_NUMBER)
torch.random.manual_seed(LUCKY_NUMBER)
torch.cuda.manual_seed(LUCKY_NUMBER)

DEVICE


'cuda'

In [4]:
data_transform = transforms.Compose([
    transforms.RandomCrop((24, 24)),
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

mnist_train_dataset = datasets.MNIST("../../../Datasets/", train=True, download=True, transform=data_transform)
mnist_test_dataset = datasets.MNIST("../../../Datasets/", train=False, download=True, transform=data_transform)

mnist_train_loader = data.DataLoader(mnist_train_dataset, batch_size=32, shuffle=True, num_workers=os.cpu_count())
mnist_test_loader = data.DataLoader(mnist_test_dataset, batch_size=32, shuffle=False, num_workers=os.cpu_count())


In [5]:
lenet5_model = Sequential(
    Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, pad=tuple([0]*4), bias=True),
    # BatchNorm2d(num_features=6),
    ReLU(),

    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0, bias=True),
    # BatchNorm2d(num_features=16),
    ReLU(),

    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Flatten(),

    Linear(in_features=16*5*5, out_features=84, bias=False),
    ReLU(),
    Linear(in_features=84, out_features=10, bias=True)
).to(DEVICE)

accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()




In [6]:
# lenet5_model = Sequential(
#     Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=5, pad=[2]*4, bias=False),
#     BatchNorm2d(num_features=3),
#     ReLU6(),
#     Flatten(),
#     Linear(in_features=3*6*6, out_features=10, bias=True)
# ).to(DEVICE)

# accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()




In [7]:
try:
    # raise RuntimeError
    lenet5_model.load_state_dict(torch.load(lenet5_file, weights_only=True), strict=False)
    
except (RuntimeError, FileNotFoundError) as e:
    early_stopper = EarlyStopper(
        metric_name="train_loss",
        min_valid_diff=1e-7,
        mode="min",
        patience=2,
        restore_best_state_dict=True,
    )

    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(lenet5_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    lenet5_model.fit(
        mnist_train_loader, 100, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        metrics={"acc" : accuracy_fun},
        callbacks=[early_stopper],
        device=DEVICE
    )
    torch.save(lenet5_model.state_dict(), lenet5_file)
    

## Original Model


In [8]:
lenet5_mcu_model = copy.deepcopy(lenet5_model)

original_acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100
original_size = lenet5_mcu_model.get_size_in_bits()//8
# print(f"The original model accuracy is {original_acc*100:.2f}% with size {original_size} bytes.")

# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")

# print(lenet5_mcu_model.test(device=DEVICE))
original_acc, original_size

100%|██████████| 313/313 [00:02<00:00, 130.81it/s]


(99.21, 148088)

## Pruned Model 

### sparsity_per_layer = 0.1

In [16]:
sparsity_per_layer = 0.25
RANGE = 10




if not os.path.exists(log_compression_details_file):
    with open(log_compression_details_file, "w") as file:
        file.write(f"sparsity, quantization_type, bitwidth, size, size_ratio, before acc, after acc, before acc_drop, after acc_drop\n")
        # file.write(f"sparsity, quantizaion_type, bitwidth, before acc, after acc\n")

for i in tqdm(range(100)):

    s = random.choice([i/RANGE for i in range(0, RANGE+1, 1)])
    q = random.choice([QUANTIZATION_NONE, DYNAMIC_QUANTIZATION_PER_TENSOR, DYNAMIC_QUANTIZATION_PER_TENSOR, DYNAMIC_QUANTIZATION_PER_TENSOR])
    b = random.choice([4, 8])
    print(f"sample number {i} ->  sparsity = {s}, q_type = {q}, bitwidth = {b}")

    # s = .8
    # q = 0
    # b = 8

    compression_config = {
        "prune_channel" :{
            "sparsity" : s
        },
        "quantization" : {
            "type" : q,
            "bitwidth" : b
        }

    }

    lenet5_mcu_model.cpu()
    compressed_lenet5_mcu_model = lenet5_mcu_model.compress(compression_config, input_shape=(1,1,32,32))
    compressed_lenet5_mcu_model.to(DEVICE)

    before_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

    compressed_lenet5_mcu_model.cpu()
    size = compressed_lenet5_mcu_model.get_size_in_bits()//8
    compressed_lenet5_mcu_model.to(DEVICE)

    print(f"Before training, sparsity = {s}, q_type = {q}, bitwidth = {b} acc = {before_acc:.4f}")

    early_stopper = EarlyStopper(
        metric_name="train_loss",
        min_valid_diff=1e-5,
        mode="min",
        patience=4,
        restore_best_state_dict=True,
    )

    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(compressed_lenet5_mcu_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    compressed_lenet5_mcu_model.fit(
        mnist_train_loader, 
        15, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        metrics={"acc": accuracy_fun},
        verbose = False,
        device=DEVICE,
        compression_config=compression_config,
        callbacks = [early_stopper]
    )
    after_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

    print(f"After training, sparsity = {i/RANGE:.2f}, bitwidth = {b} acc = {after_acc:.4f}")

    with open(log_compression_details_file, "a") as file:
        file.write(f"{s}, {q}, {b}, {size}, {size/original_size*100:9.4f}, {before_acc:9.4f}, {after_acc:9.4f}, {original_acc-before_acc:9.4f}, {original_acc-after_acc:9.4f}\n")
    # break

  0%|          | 0/100 [00:00<?, ?it/s]

sample number 0 ->  sparsity = 0.5, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 115.38it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 4 acc = 16.4000


 67%|██████▋   | 10/15 [03:54<01:57, 23.47s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.040816055391232176


100%|██████████| 313/313 [00:02<00:00, 141.48it/s]
  1%|          | 1/100 [03:59<6:35:49, 239.90s/it]

After training, sparsity = 0.00, bitwidth = 4 acc = 54.9100
sample number 1 ->  sparsity = 0.8, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 137.54it/s]


Before training, sparsity = 0.8, q_type = 1, bitwidth = 8 acc = 14.6200


100%|██████████| 15/15 [05:05<00:00, 20.37s/it]
100%|██████████| 313/313 [00:02<00:00, 147.83it/s]
  2%|▏         | 2/100 [09:10<7:39:23, 281.26s/it]

After training, sparsity = 0.10, bitwidth = 8 acc = 33.9500
sample number 2 ->  sparsity = 0.5, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 147.35it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 8 acc = 24.3500


 67%|██████▋   | 10/15 [03:28<01:44, 20.88s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.037477165168523785


100%|██████████| 313/313 [00:01<00:00, 157.56it/s]
  3%|▎         | 3/100 [12:43<6:44:22, 250.13s/it]

After training, sparsity = 0.20, bitwidth = 8 acc = 58.4700
sample number 3 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 145.14it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 86.5600


 60%|██████    | 9/15 [03:10<02:06, 21.14s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.004752312171971426


100%|██████████| 313/313 [00:01<00:00, 159.46it/s]
  4%|▍         | 4/100 [15:57<6:05:05, 228.18s/it]

After training, sparsity = 0.30, bitwidth = 4 acc = 94.8000
sample number 4 ->  sparsity = 0.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 143.03it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 8 acc = 99.1000


 33%|███▎      | 5/15 [01:53<03:46, 22.68s/it]


Stopping Training of Sequential with at 1 epoch with best train_loss = 0.0004372085090315295


100%|██████████| 313/313 [00:02<00:00, 145.16it/s]
  5%|▌         | 5/100 [17:55<4:58:19, 188.41s/it]

After training, sparsity = 0.40, bitwidth = 8 acc = 99.1500
sample number 5 ->  sparsity = 0.3, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 148.76it/s]


Before training, sparsity = 0.3, q_type = 0, bitwidth = 4 acc = 60.9100


100%|██████████| 15/15 [04:27<00:00, 17.82s/it]
100%|██████████| 313/313 [00:01<00:00, 158.25it/s]
  6%|▌         | 6/100 [22:27<5:39:30, 216.70s/it]

After training, sparsity = 0.50, bitwidth = 4 acc = 99.1100
sample number 6 ->  sparsity = 1.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 147.60it/s]


Before training, sparsity = 1.0, q_type = 1, bitwidth = 8 acc = 8.8100


 47%|████▋     | 7/15 [02:44<03:08, 23.54s/it]


Stopping Training of Sequential with at 3 epoch with best train_loss = 0.07187908789714177


100%|██████████| 313/313 [00:02<00:00, 146.19it/s]
  7%|▋         | 7/100 [25:16<5:11:51, 201.19s/it]

After training, sparsity = 0.60, bitwidth = 8 acc = 12.1300
sample number 7 ->  sparsity = 0.0, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 149.11it/s]


Before training, sparsity = 0.0, q_type = 0, bitwidth = 4 acc = 99.1400


 47%|████▋     | 7/15 [02:31<02:52, 21.60s/it]


Stopping Training of Sequential with at 3 epoch with best train_loss = 0.0008016392489818827


100%|██████████| 313/313 [00:01<00:00, 160.68it/s]
  8%|▊         | 8/100 [27:51<4:46:09, 186.62s/it]

After training, sparsity = 0.70, bitwidth = 4 acc = 98.8800
sample number 8 ->  sparsity = 0.3, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 146.83it/s]


Before training, sparsity = 0.3, q_type = 0, bitwidth = 4 acc = 60.6500


100%|██████████| 15/15 [04:21<00:00, 17.42s/it]
100%|██████████| 313/313 [00:01<00:00, 162.64it/s]
  9%|▉         | 9/100 [32:17<5:20:27, 211.29s/it]

After training, sparsity = 0.80, bitwidth = 4 acc = 99.0300
sample number 9 ->  sparsity = 0.2, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 147.81it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 8 acc = 93.3400


 53%|█████▎    | 8/15 [02:47<02:26, 20.99s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.0026671191372811638


100%|██████████| 313/313 [00:01<00:00, 159.32it/s]
 10%|█         | 10/100 [35:09<4:58:50, 199.23s/it]

After training, sparsity = 0.90, bitwidth = 8 acc = 96.6100
sample number 10 ->  sparsity = 1.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 146.81it/s]


Before training, sparsity = 1.0, q_type = 1, bitwidth = 8 acc = 8.5100


 60%|██████    | 9/15 [03:06<02:04, 20.70s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.07187913657824198


100%|██████████| 313/313 [00:01<00:00, 162.68it/s]
 11%|█         | 11/100 [38:20<4:51:35, 196.58s/it]

After training, sparsity = 1.00, bitwidth = 8 acc = 12.1400
sample number 11 ->  sparsity = 0.7, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 148.80it/s]


Before training, sparsity = 0.7, q_type = 0, bitwidth = 4 acc = 17.1400


100%|██████████| 15/15 [04:21<00:00, 17.42s/it]
100%|██████████| 313/313 [00:01<00:00, 160.81it/s]
 12%|█▏        | 12/100 [42:45<5:19:03, 217.54s/it]

After training, sparsity = 1.10, bitwidth = 4 acc = 97.3500
sample number 12 ->  sparsity = 0.6, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 146.80it/s]


Before training, sparsity = 0.6, q_type = 1, bitwidth = 8 acc = 22.4900


 60%|██████    | 9/15 [03:05<02:03, 20.58s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.03806790921588739


100%|██████████| 313/313 [00:02<00:00, 156.14it/s]
 13%|█▎        | 13/100 [45:55<5:03:07, 209.05s/it]

After training, sparsity = 1.20, bitwidth = 8 acc = 58.4600
sample number 13 ->  sparsity = 0.4, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 149.02it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 4 acc = 47.4900


 73%|███████▎  | 11/15 [03:44<01:21, 20.38s/it]


Stopping Training of Sequential with at 7 epoch with best train_loss = 0.02700058546513319


100%|██████████| 313/313 [00:01<00:00, 162.04it/s]
 14%|█▍        | 14/100 [49:43<5:07:59, 214.88s/it]

After training, sparsity = 1.30, bitwidth = 4 acc = 70.3800
sample number 14 ->  sparsity = 0.4, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 132.73it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 4 acc = 47.5200


 80%|████████  | 12/15 [04:21<01:05, 21.77s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.02689203179329634


100%|██████████| 313/313 [00:01<00:00, 159.43it/s]
 15%|█▌        | 15/100 [54:09<5:26:07, 230.21s/it]

After training, sparsity = 1.40, bitwidth = 4 acc = 70.6700
sample number 15 ->  sparsity = 0.8, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 144.54it/s]


Before training, sparsity = 0.8, q_type = 0, bitwidth = 8 acc = 14.3800


 73%|███████▎  | 11/15 [03:22<01:13, 18.40s/it]
 15%|█▌        | 15/100 [57:33<5:26:12, 230.27s/it]


KeyboardInterrupt: 

In [None]:
 sparsity_per_layer = 0.1
lenet5_model.to("cpu")
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
# acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
# print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.to(DEVICE)
lenet5_mcu_model.fit(
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device=DEVICE,
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.2

In [None]:
sparsity_per_layer = 0.2
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.3

In [None]:
sparsity_per_layer = 0.3
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.4

In [None]:
sparsity_per_layer = 0.4
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.5

In [None]:
sparsity_per_layer = 0.5
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.6

In [None]:
sparsity_per_layer = 0.6
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.7

In [None]:
sparsity_per_layer = 0.7
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.8

In [None]:
sparsity_per_layer = 0.8
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.9

In [None]:
sparsity_per_layer = 0.9
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Dynamic Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Dynamic Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Static Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



## Static Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



In [None]:
# lenet5_model.cpu()

# # PRUNED MODEL
# pruned_sparsity = [i/10 for i in range(10)]
# for sparsity in pruned_sparsity:
#     pruned_model = lenet5_model.prune_channel(sparsity)
#     acc = pruned_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = pruned_model.get_size_in_bits()//8
#     print(f"The pruned model with sparsity {sparsity} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

# quantization_bitwidth = [i for i in range(8, 0, -1)]

# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_tensor_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
#     acc = dynamic_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_channel_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
#     acc = dynamic_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_tensor_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_channel_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

