In [1]:
import sys
import os
import copy
import random

from tqdm.auto import tqdm

import torch
from torch import nn, optim
from torch.utils import data
from torchvision import datasets, transforms

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# sys.path.append("/home/matthias/Documents/EmbeddedAI/deep-microcompression/")
sys.path.append("../../")

from development import (
    Sequential,
    BatchNorm2d,
    Conv2d,
    Linear,
    ReLU,
    ReLU6,
    MaxPool2d,
    Flatten, 

    EarlyStopper,

    QUANTIZATION_NONE, 
    DYNAMIC_QUANTIZATION_PER_TENSOR
)

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
lenet5_file = "lenet5_state_dict.pth"
log_compression_details_file = "lenet5_compression_log.csv"

LUCKY_NUMBER = 25
torch.manual_seed(LUCKY_NUMBER)
torch.random.manual_seed(LUCKY_NUMBER)
torch.cuda.manual_seed(LUCKY_NUMBER)

DEVICE


'cpu'

In [4]:
%ls ../../../Datasets/MNIST/MNIST/


ls: cannot access '../../../Datasets/MNIST/MNIST/': No such file or directory


In [5]:
data_transform = transforms.Compose([
    transforms.RandomCrop((24, 24)),
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

mnist_train_dataset = datasets.MNIST("../../../Datasets/", train=True, download=True, transform=data_transform)
mnist_test_dataset = datasets.MNIST("../../../Datasets/", train=False, download=True, transform=data_transform)

mnist_train_loader = data.DataLoader(mnist_train_dataset, batch_size=32, shuffle=True, num_workers=os.cpu_count())
mnist_test_loader = data.DataLoader(mnist_test_dataset, batch_size=32, shuffle=False, num_workers=os.cpu_count())


In [6]:
lenet5_model = Sequential(
    Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, pad=tuple([0]*4), bias=True),
    # BatchNorm2d(num_features=6),
    ReLU(),

    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0, bias=True),
    # BatchNorm2d(num_features=16),
    ReLU(),

    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Flatten(),

    Linear(in_features=16*5*5, out_features=84, bias=False),
    ReLU(),
    Linear(in_features=84, out_features=10, bias=True)
).to(DEVICE)

accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()




In [7]:
# lenet5_model = Sequential(
#     Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=5, pad=[2]*4, bias=False),
#     BatchNorm2d(num_features=3),
#     ReLU6(),
#     Flatten(),
#     Linear(in_features=3*6*6, out_features=10, bias=True)
# ).to(DEVICE)

# accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()




In [8]:
try:
    # raise RuntimeError
    lenet5_model.load_state_dict(torch.load(lenet5_file, weights_only=True), strict=False)
    
except (RuntimeError, FileNotFoundError) as e:
    print(e)
    early_stopper = EarlyStopper(
        metric_name="train_loss",
        min_valid_diff=1e-7,
        mode="min",
        patience=2,
        restore_best_state_dict=True,
    )

    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(lenet5_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    lenet5_model.fit(
        mnist_train_loader, 100, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        metrics={"acc" : accuracy_fun},
        callbacks=[early_stopper],
        device=DEVICE
    )
    torch.save(lenet5_model.state_dict(), lenet5_file)
    

Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.


  1%|          | 1/100 [00:32<52:56, 32.09s/it]

epoch    0 | train loss 0.0104 | validation loss 0.0037 | train acc 0.8948 | validation acc 0.9609


  2%|▏         | 2/100 [00:59<47:36, 29.15s/it]

epoch    1 | train loss 0.0035 | validation loss 0.0027 | train acc 0.9648 | validation acc 0.9710


  3%|▎         | 3/100 [01:28<47:33, 29.41s/it]

epoch    2 | train loss 0.0026 | validation loss 0.0026 | train acc 0.9735 | validation acc 0.9739


  4%|▍         | 4/100 [01:58<47:12, 29.51s/it]

epoch    3 | train loss 0.0022 | validation loss 0.0016 | train acc 0.9781 | validation acc 0.9829


  5%|▌         | 5/100 [02:26<46:05, 29.11s/it]

epoch    4 | train loss 0.0019 | validation loss 0.0016 | train acc 0.9806 | validation acc 0.9838


  6%|▌         | 6/100 [02:53<44:07, 28.16s/it]

epoch    5 | train loss 0.0017 | validation loss 0.0015 | train acc 0.9826 | validation acc 0.9841


  7%|▋         | 7/100 [03:20<43:06, 27.81s/it]

epoch    6 | train loss 0.0015 | validation loss 0.0013 | train acc 0.9849 | validation acc 0.9863


  8%|▊         | 8/100 [03:46<42:00, 27.40s/it]

epoch    7 | train loss 0.0015 | validation loss 0.0015 | train acc 0.9847 | validation acc 0.9850


  9%|▉         | 9/100 [04:12<40:41, 26.83s/it]

epoch    8 | train loss 0.0014 | validation loss 0.0012 | train acc 0.9859 | validation acc 0.9895


 10%|█         | 10/100 [04:37<39:31, 26.35s/it]

epoch    9 | train loss 0.0013 | validation loss 0.0012 | train acc 0.9872 | validation acc 0.9885


 11%|█         | 11/100 [05:03<38:51, 26.20s/it]

epoch   10 | train loss 0.0012 | validation loss 0.0013 | train acc 0.9877 | validation acc 0.9866


 12%|█▏        | 12/100 [05:28<37:59, 25.90s/it]

epoch   11 | train loss 0.0012 | validation loss 0.0013 | train acc 0.9879 | validation acc 0.9868


 13%|█▎        | 13/100 [05:53<37:06, 25.60s/it]

epoch   12 | train loss 0.0008 | validation loss 0.0009 | train acc 0.9919 | validation acc 0.9905


 14%|█▍        | 14/100 [06:18<36:24, 25.40s/it]

epoch   13 | train loss 0.0007 | validation loss 0.0008 | train acc 0.9934 | validation acc 0.9914


 15%|█▌        | 15/100 [06:43<35:51, 25.32s/it]

epoch   14 | train loss 0.0007 | validation loss 0.0009 | train acc 0.9931 | validation acc 0.9921


 16%|█▌        | 16/100 [07:10<36:01, 25.73s/it]

epoch   15 | train loss 0.0006 | validation loss 0.0008 | train acc 0.9932 | validation acc 0.9923


 17%|█▋        | 17/100 [07:34<34:45, 25.13s/it]

epoch   16 | train loss 0.0007 | validation loss 0.0009 | train acc 0.9933 | validation acc 0.9909


 18%|█▊        | 18/100 [07:57<33:47, 24.73s/it]

epoch   17 | train loss 0.0006 | validation loss 0.0008 | train acc 0.9938 | validation acc 0.9925


 19%|█▉        | 19/100 [08:22<33:11, 24.58s/it]

epoch   18 | train loss 0.0006 | validation loss 0.0009 | train acc 0.9939 | validation acc 0.9913


 20%|██        | 20/100 [08:45<32:16, 24.21s/it]

epoch   19 | train loss 0.0006 | validation loss 0.0007 | train acc 0.9944 | validation acc 0.9920


 21%|██        | 21/100 [09:09<31:35, 24.00s/it]

epoch   20 | train loss 0.0006 | validation loss 0.0008 | train acc 0.9946 | validation acc 0.9912


 22%|██▏       | 22/100 [09:32<31:09, 23.97s/it]

epoch   21 | train loss 0.0006 | validation loss 0.0008 | train acc 0.9941 | validation acc 0.9918


 22%|██▏       | 22/100 [09:56<35:14, 27.11s/it]

epoch   22 | train loss 0.0006 | validation loss 0.0008 | train acc 0.9938 | validation acc 0.9928
Stopping Training of Sequential with at 20 epoch with best train_loss = 0.0005869271132407144





## Original Model


In [9]:
lenet5_mcu_model = copy.deepcopy(lenet5_model)

original_acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100
original_size = lenet5_mcu_model.get_size_in_bits()//8
# print(f"The original model accuracy is {original_acc*100:.2f}% with size {original_size} bytes.")

# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")

# print(lenet5_mcu_model.test(device=DEVICE))
original_acc, original_size

100%|██████████| 313/313 [00:02<00:00, 105.30it/s]


(99.22, 148088)

## Pruned Model 

### sparsity_per_layer = 0.1

In [10]:
sparsity_per_layer = 0.25
RANGE = 10

        


if not os.path.exists(log_compression_details_file):
    with open(log_compression_details_file, "w") as file:
        file.write(f"sparsity, quantization_type, bitwidth, size, size_ratio, before acc, after acc, before acc_drop, after acc_drop\n")
        # file.write(f"sparsity, quantizaion_type, bitwidth, before acc, after acc\n")

for i in tqdm(range(100)):

    s = random.choice([i/RANGE for i in range(0, RANGE+1, 1)])
    q = random.choice([QUANTIZATION_NONE, DYNAMIC_QUANTIZATION_PER_TENSOR, DYNAMIC_QUANTIZATION_PER_TENSOR, DYNAMIC_QUANTIZATION_PER_TENSOR])
    b = random.choice([4, 8])
    print(f"sample number {i} ->  sparsity = {s}, q_type = {q}, bitwidth = {b}")

    # s = .8
    # q = 0
    # b = 8

    compression_config = {
        "prune_channel" :{
            "sparsity" : s
        },
        "quantization" : {
            "type" : q,
            "bitwidth" : b
        }

    }

    lenet5_mcu_model.cpu()
    compressed_lenet5_mcu_model = lenet5_mcu_model.compress(compression_config, input_shape=(1,1,32,32))
    compressed_lenet5_mcu_model.to(DEVICE)

    before_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

    compressed_lenet5_mcu_model.cpu()
    size = compressed_lenet5_mcu_model.get_size_in_bits()//8
    compressed_lenet5_mcu_model.to(DEVICE)

    print(f"Before training, sparsity = {s}, q_type = {q}, bitwidth = {b} acc = {before_acc:.4f}")

    early_stopper = EarlyStopper(
        metric_name="train_loss",
        min_valid_diff=1e-5,
        mode="min",
        patience=4,
        restore_best_state_dict=True,
    )

    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(compressed_lenet5_mcu_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    compressed_lenet5_mcu_model.fit(
        mnist_train_loader, 
        15, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        metrics={"acc": accuracy_fun},
        verbose = False,
        device=DEVICE,
        compression_config=compression_config,
        callbacks = [early_stopper]
    )
    after_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

    print(f"After training, sparsity = {i/RANGE:.2f}, bitwidth = {b} acc = {after_acc:.4f}")

    with open(log_compression_details_file, "a") as file:
        file.write(f"{s}, {q}, {b}, {size}, {size/original_size*100:9.4f}, {before_acc:9.4f}, {after_acc:9.4f}, {original_acc-before_acc:9.4f}, {original_acc-after_acc:9.4f}\n")
    # break

  0%|          | 0/100 [00:00<?, ?it/s]

sample number 0 ->  sparsity = 0.1, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.29it/s]


Before training, sparsity = 0.1, q_type = 0, bitwidth = 8 acc = 97.8200


100%|██████████| 15/15 [05:51<00:00, 23.40s/it]
100%|██████████| 313/313 [00:02<00:00, 133.46it/s]
  1%|          | 1/100 [05:55<9:47:03, 355.79s/it]

After training, sparsity = 0.00, bitwidth = 8 acc = 99.1800
sample number 1 ->  sparsity = 0.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 128.78it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 8 acc = 99.1500


 27%|██▋       | 4/15 [01:59<05:28, 29.86s/it]


Stopping Training of Sequential with at 0 epoch with best train_loss = 0.0005885589403833971


100%|██████████| 313/313 [00:02<00:00, 137.40it/s]
  2%|▏         | 2/100 [08:00<5:58:39, 219.59s/it]

After training, sparsity = 0.10, bitwidth = 8 acc = 99.1700
sample number 2 ->  sparsity = 0.7, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.17it/s]


Before training, sparsity = 0.7, q_type = 1, bitwidth = 4 acc = 14.7000


 73%|███████▎  | 11/15 [04:45<01:43, 25.94s/it]


Stopping Training of Sequential with at 7 epoch with best train_loss = 0.06484621526400249


100%|██████████| 313/313 [00:02<00:00, 139.05it/s]
  3%|▎         | 3/100 [12:49<6:46:56, 251.72s/it]

After training, sparsity = 0.20, bitwidth = 4 acc = 24.4900
sample number 3 ->  sparsity = 0.2, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.46it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 8 acc = 96.8500


 53%|█████▎    | 8/15 [03:35<03:08, 26.98s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.0017739665254640083


100%|██████████| 313/313 [00:02<00:00, 136.70it/s]
  4%|▍         | 4/100 [16:30<6:23:04, 239.42s/it]

After training, sparsity = 0.30, bitwidth = 8 acc = 98.0000
sample number 4 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 132.41it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.3900


 53%|█████▎    | 8/15 [03:35<03:08, 26.96s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.0024001432311643537


100%|██████████| 313/313 [00:02<00:00, 138.37it/s]
  5%|▌         | 5/100 [20:10<6:08:11, 232.55s/it]

After training, sparsity = 0.40, bitwidth = 4 acc = 97.6000
sample number 5 ->  sparsity = 0.4, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 132.22it/s]


Before training, sparsity = 0.4, q_type = 0, bitwidth = 8 acc = 57.9400


100%|██████████| 15/15 [05:50<00:00, 23.35s/it]
100%|██████████| 313/313 [00:02<00:00, 138.53it/s]
  6%|▌         | 6/100 [26:05<7:09:31, 274.16s/it]

After training, sparsity = 0.50, bitwidth = 8 acc = 98.7700
sample number 6 ->  sparsity = 0.6, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.22it/s]


Before training, sparsity = 0.6, q_type = 0, bitwidth = 4 acc = 26.6400


100%|██████████| 15/15 [05:49<00:00, 23.28s/it]
100%|██████████| 313/313 [00:02<00:00, 136.24it/s]
  7%|▋         | 7/100 [31:59<7:45:21, 300.24s/it]

After training, sparsity = 0.60, bitwidth = 4 acc = 98.2300
sample number 7 ->  sparsity = 0.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.17it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 4 acc = 98.6900


 40%|████      | 6/15 [02:48<04:12, 28.05s/it]


Stopping Training of Sequential with at 2 epoch with best train_loss = 0.000789507336853785


100%|██████████| 313/313 [00:02<00:00, 137.68it/s]
  8%|▊         | 8/100 [34:52<6:38:14, 259.72s/it]

After training, sparsity = 0.70, bitwidth = 4 acc = 98.9700
sample number 8 ->  sparsity = 0.5, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.90it/s]


Before training, sparsity = 0.5, q_type = 0, bitwidth = 8 acc = 23.4400


100%|██████████| 15/15 [05:50<00:00, 23.39s/it]
100%|██████████| 313/313 [00:02<00:00, 139.16it/s]
  9%|▉         | 9/100 [40:48<7:19:18, 289.65s/it]

After training, sparsity = 0.80, bitwidth = 8 acc = 98.3900
sample number 9 ->  sparsity = 1.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 135.95it/s]


Before training, sparsity = 1.0, q_type = 1, bitwidth = 8 acc = 9.7400


 53%|█████▎    | 8/15 [03:33<03:06, 26.71s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.07141990222533544


100%|██████████| 313/313 [00:02<00:00, 137.77it/s]
 10%|█         | 10/100 [44:26<6:41:25, 267.62s/it]

After training, sparsity = 0.90, bitwidth = 8 acc = 14.1400
sample number 10 ->  sparsity = 0.3, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 130.48it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 4 acc = 56.1900


100%|██████████| 15/15 [05:58<00:00, 23.92s/it]
100%|██████████| 313/313 [00:02<00:00, 138.33it/s]
 11%|█         | 11/100 [50:30<7:20:31, 296.98s/it]

After training, sparsity = 1.00, bitwidth = 4 acc = 83.1600
sample number 11 ->  sparsity = 0.4, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.65it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 4 acc = 55.4500


100%|██████████| 15/15 [05:57<00:00, 23.86s/it]
100%|██████████| 313/313 [00:02<00:00, 135.22it/s]
 12%|█▏        | 12/100 [56:32<7:44:52, 316.96s/it]

After training, sparsity = 1.10, bitwidth = 4 acc = 78.8700
sample number 12 ->  sparsity = 0.9, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 132.78it/s]


Before training, sparsity = 0.9, q_type = 1, bitwidth = 4 acc = 19.1900


 53%|█████▎    | 8/15 [03:34<03:07, 26.83s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.06906819607814153


100%|██████████| 313/313 [00:02<00:00, 135.52it/s]
 13%|█▎        | 13/100 [1:00:12<6:56:44, 287.41s/it]

After training, sparsity = 1.20, bitwidth = 4 acc = 18.9400
sample number 13 ->  sparsity = 0.2, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.06it/s]


Before training, sparsity = 0.2, q_type = 0, bitwidth = 4 acc = 97.2100


 80%|████████  | 12/15 [05:05<01:16, 25.45s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.0006114612519836555


100%|██████████| 313/313 [00:02<00:00, 121.00it/s]
 14%|█▍        | 14/100 [1:05:22<7:01:54, 294.36s/it]

After training, sparsity = 1.30, bitwidth = 4 acc = 99.0500
sample number 14 ->  sparsity = 0.3, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.13it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 4 acc = 55.8700


100%|██████████| 15/15 [05:59<00:00, 23.97s/it]
100%|██████████| 313/313 [00:02<00:00, 134.23it/s]
 15%|█▌        | 15/100 [1:11:26<7:26:50, 315.42s/it]

After training, sparsity = 1.40, bitwidth = 4 acc = 82.5700
sample number 15 ->  sparsity = 0.1, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 132.20it/s]


Before training, sparsity = 0.1, q_type = 1, bitwidth = 8 acc = 97.6700


 73%|███████▎  | 11/15 [04:47<01:44, 26.09s/it]


Stopping Training of Sequential with at 7 epoch with best train_loss = 0.0014350885208812543


100%|██████████| 313/313 [00:02<00:00, 137.54it/s]
 16%|█▌        | 16/100 [1:16:18<7:11:36, 308.29s/it]

After training, sparsity = 1.50, bitwidth = 8 acc = 98.4900
sample number 16 ->  sparsity = 0.5, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 131.73it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 4 acc = 20.9400


 87%|████████▋ | 13/15 [05:35<00:51, 25.77s/it]


Stopping Training of Sequential with at 9 epoch with best train_loss = 0.03597152451872826


100%|██████████| 313/313 [00:02<00:00, 136.56it/s]
 17%|█▋        | 17/100 [1:21:58<7:19:33, 317.75s/it]

After training, sparsity = 1.60, bitwidth = 4 acc = 60.7700
sample number 17 ->  sparsity = 0.3, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.65it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 4 acc = 56.2700


 87%|████████▋ | 13/15 [05:34<00:51, 25.77s/it]


Stopping Training of Sequential with at 9 epoch with best train_loss = 0.016771014294773342


100%|██████████| 313/313 [00:02<00:00, 137.05it/s]
 18%|█▊        | 18/100 [1:27:37<7:23:14, 324.32s/it]

After training, sparsity = 1.70, bitwidth = 4 acc = 82.6700
sample number 18 ->  sparsity = 0.1, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.66it/s]


Before training, sparsity = 0.1, q_type = 1, bitwidth = 4 acc = 96.3900


 60%|██████    | 9/15 [04:00<02:40, 26.67s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.001968830264398518


100%|██████████| 313/313 [00:02<00:00, 132.29it/s]
 19%|█▉        | 19/100 [1:31:42<6:45:35, 300.43s/it]

After training, sparsity = 1.80, bitwidth = 4 acc = 97.9800
sample number 19 ->  sparsity = 1.0, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 136.31it/s]


Before training, sparsity = 1.0, q_type = 0, bitwidth = 8 acc = 9.7400


100%|██████████| 15/15 [05:48<00:00, 23.22s/it]
100%|██████████| 313/313 [00:02<00:00, 139.71it/s]
 20%|██        | 20/100 [1:37:35<7:01:34, 316.18s/it]

After training, sparsity = 1.90, bitwidth = 8 acc = 27.9800
sample number 20 ->  sparsity = 0.7, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 135.09it/s]


Before training, sparsity = 0.7, q_type = 1, bitwidth = 8 acc = 15.1800


 73%|███████▎  | 11/15 [04:46<01:44, 26.07s/it]


Stopping Training of Sequential with at 7 epoch with best train_loss = 0.06572988899548848


100%|██████████| 313/313 [00:02<00:00, 132.48it/s]
 21%|██        | 21/100 [1:42:27<6:46:34, 308.79s/it]

After training, sparsity = 2.00, bitwidth = 8 acc = 24.5900
sample number 21 ->  sparsity = 0.2, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.13it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 8 acc = 96.9200


 53%|█████▎    | 8/15 [03:35<03:08, 26.96s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.001786159169941675


100%|██████████| 313/313 [00:02<00:00, 137.25it/s]
 22%|██▏       | 22/100 [1:46:07<6:06:56, 282.27s/it]

After training, sparsity = 2.10, bitwidth = 8 acc = 97.9500
sample number 22 ->  sparsity = 0.4, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 126.64it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 4 acc = 55.3300


 87%|████████▋ | 13/15 [05:35<00:51, 25.78s/it]


Stopping Training of Sequential with at 9 epoch with best train_loss = 0.02043263270954291


100%|██████████| 313/313 [00:02<00:00, 137.42it/s]
 23%|██▎       | 23/100 [1:51:47<6:24:27, 299.58s/it]

After training, sparsity = 2.20, bitwidth = 4 acc = 78.4000
sample number 23 ->  sparsity = 0.3, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.39it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 8 acc = 60.2000


 93%|█████████▎| 14/15 [05:58<00:25, 25.62s/it]


Stopping Training of Sequential with at 10 epoch with best train_loss = 0.01481762630417943


100%|██████████| 313/313 [00:02<00:00, 135.57it/s]
 24%|██▍       | 24/100 [1:57:50<6:43:43, 318.73s/it]

After training, sparsity = 2.30, bitwidth = 8 acc = 84.0000
sample number 24 ->  sparsity = 0.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.08it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 8 acc = 99.2100


 80%|████████  | 12/15 [05:12<01:18, 26.03s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.000566816997187713


100%|██████████| 313/313 [00:02<00:00, 135.43it/s]
 25%|██▌       | 25/100 [2:03:07<6:37:47, 318.24s/it]

After training, sparsity = 2.40, bitwidth = 8 acc = 99.2600
sample number 25 ->  sparsity = 0.5, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 127.92it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 8 acc = 23.4400


100%|██████████| 15/15 [05:58<00:00, 23.93s/it]
100%|██████████| 313/313 [00:02<00:00, 137.32it/s]
 26%|██▌       | 26/100 [2:09:11<6:49:19, 331.88s/it]

After training, sparsity = 2.50, bitwidth = 8 acc = 60.4000
sample number 26 ->  sparsity = 0.5, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 132.05it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 8 acc = 23.7200


 80%|████████  | 12/15 [05:10<01:17, 25.90s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.03669930265446504


100%|██████████| 313/313 [00:02<00:00, 133.43it/s]
 27%|██▋       | 27/100 [2:14:27<6:37:51, 327.01s/it]

After training, sparsity = 2.60, bitwidth = 8 acc = 59.1700
sample number 27 ->  sparsity = 1.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.43it/s]


Before training, sparsity = 1.0, q_type = 1, bitwidth = 4 acc = 9.7400


 53%|█████▎    | 8/15 [03:33<03:06, 26.69s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.07151231505870818


100%|██████████| 313/313 [00:02<00:00, 137.98it/s]
 28%|██▊       | 28/100 [2:18:05<5:53:14, 294.37s/it]

After training, sparsity = 2.70, bitwidth = 4 acc = 14.8900
sample number 28 ->  sparsity = 0.1, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.44it/s]


Before training, sparsity = 0.1, q_type = 1, bitwidth = 4 acc = 96.2100


 47%|████▋     | 7/15 [03:12<03:40, 27.51s/it]


Stopping Training of Sequential with at 3 epoch with best train_loss = 0.001981406274092539


100%|██████████| 313/313 [00:02<00:00, 120.55it/s]
 29%|██▉       | 29/100 [2:21:23<5:13:58, 265.33s/it]

After training, sparsity = 2.80, bitwidth = 4 acc = 98.0400
sample number 29 ->  sparsity = 0.5, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.33it/s]


Before training, sparsity = 0.5, q_type = 0, bitwidth = 4 acc = 23.7600


100%|██████████| 15/15 [05:51<00:00, 23.42s/it]
100%|██████████| 313/313 [00:02<00:00, 136.99it/s]
 30%|███       | 30/100 [2:27:19<5:41:17, 292.53s/it]

After training, sparsity = 2.90, bitwidth = 4 acc = 98.6100
sample number 30 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 122.09it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.6100


 53%|█████▎    | 8/15 [03:36<03:09, 27.01s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.00238723942017726


100%|██████████| 313/313 [00:02<00:00, 137.41it/s]
 31%|███       | 31/100 [2:31:00<5:11:43, 271.07s/it]

After training, sparsity = 3.00, bitwidth = 4 acc = 97.4100
sample number 31 ->  sparsity = 0.3, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 119.97it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 4 acc = 56.2900


 93%|█████████▎| 14/15 [05:58<00:25, 25.63s/it]


Stopping Training of Sequential with at 10 epoch with best train_loss = 0.016753305385510127


100%|██████████| 313/313 [00:02<00:00, 135.27it/s]
 32%|███▏      | 32/100 [2:37:03<5:38:44, 298.90s/it]

After training, sparsity = 3.10, bitwidth = 4 acc = 83.1600
sample number 32 ->  sparsity = 0.1, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.46it/s]


Before training, sparsity = 0.1, q_type = 1, bitwidth = 4 acc = 96.1000


 67%|██████▋   | 10/15 [04:23<02:11, 26.32s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.0019463665115258967


100%|██████████| 313/313 [00:02<00:00, 136.39it/s]
 33%|███▎      | 33/100 [2:41:31<5:23:23, 289.61s/it]

After training, sparsity = 3.20, bitwidth = 4 acc = 97.9900
sample number 33 ->  sparsity = 0.5, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.62it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 8 acc = 23.0800


 87%|████████▋ | 13/15 [05:33<00:51, 25.68s/it]


Stopping Training of Sequential with at 9 epoch with best train_loss = 0.03679333200057348


100%|██████████| 313/313 [00:02<00:00, 138.53it/s]
 34%|███▍      | 34/100 [2:47:10<5:34:40, 304.26s/it]

After training, sparsity = 3.30, bitwidth = 8 acc = 58.9500
sample number 34 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 129.13it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.2600


 67%|██████▋   | 10/15 [04:23<02:11, 26.35s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.0023962142030824907


100%|██████████| 313/313 [00:02<00:00, 138.74it/s]
 35%|███▌      | 35/100 [2:51:38<5:17:54, 293.45s/it]

After training, sparsity = 3.40, bitwidth = 4 acc = 97.3500
sample number 35 ->  sparsity = 0.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.76it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 4 acc = 98.4700


 67%|██████▋   | 10/15 [04:23<02:11, 26.39s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.0007945449390168505


100%|██████████| 313/313 [00:02<00:00, 137.85it/s]
 36%|███▌      | 36/100 [2:56:07<5:05:03, 285.99s/it]

After training, sparsity = 3.50, bitwidth = 4 acc = 99.0800
sample number 36 ->  sparsity = 0.7, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.49it/s]


Before training, sparsity = 0.7, q_type = 1, bitwidth = 8 acc = 15.2500


 60%|██████    | 9/15 [03:58<02:39, 26.51s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.065889085962375


100%|██████████| 313/313 [00:02<00:00, 134.62it/s]
 37%|███▋      | 37/100 [3:00:10<4:46:50, 273.19s/it]

After training, sparsity = 3.60, bitwidth = 8 acc = 24.4400
sample number 37 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 129.76it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.5800


 60%|██████    | 9/15 [03:59<02:39, 26.59s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.0024244915310603875


100%|██████████| 313/313 [00:02<00:00, 139.33it/s]
 38%|███▊      | 38/100 [3:04:14<4:33:14, 264.43s/it]

After training, sparsity = 3.70, bitwidth = 4 acc = 97.4400
sample number 38 ->  sparsity = 0.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.12it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 8 acc = 99.2400


 33%|███▎      | 5/15 [02:24<04:48, 28.86s/it]


Stopping Training of Sequential with at 1 epoch with best train_loss = 0.0005621588824065233


100%|██████████| 313/313 [00:02<00:00, 134.74it/s]
 39%|███▉      | 39/100 [3:06:43<3:53:39, 229.82s/it]

After training, sparsity = 3.80, bitwidth = 8 acc = 99.2900
sample number 39 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 126.91it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.4500


 80%|████████  | 12/15 [05:11<01:17, 25.93s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.0024129673541833955


100%|██████████| 313/313 [00:02<00:00, 137.82it/s]
 40%|████      | 40/100 [3:11:59<4:15:40, 255.67s/it]

After training, sparsity = 3.90, bitwidth = 4 acc = 97.4900
sample number 40 ->  sparsity = 0.7, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 126.15it/s]


Before training, sparsity = 0.7, q_type = 1, bitwidth = 8 acc = 15.2400


100%|██████████| 15/15 [05:58<00:00, 23.88s/it]
100%|██████████| 313/313 [00:02<00:00, 135.63it/s]
 41%|████      | 41/100 [3:18:02<4:43:05, 287.89s/it]

After training, sparsity = 4.00, bitwidth = 8 acc = 25.0100
sample number 41 ->  sparsity = 0.4, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.35it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 8 acc = 58.1900


 80%|████████  | 12/15 [05:10<01:17, 25.85s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.017886665488779544


100%|██████████| 313/313 [00:02<00:00, 133.10it/s]
 42%|████▏     | 42/100 [3:23:17<4:46:10, 296.03s/it]

After training, sparsity = 4.10, bitwidth = 8 acc = 80.8800
sample number 42 ->  sparsity = 1.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 128.53it/s]


Before training, sparsity = 1.0, q_type = 1, bitwidth = 4 acc = 9.7400


 53%|█████▎    | 8/15 [03:33<03:07, 26.75s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.0715126963376999


100%|██████████| 313/313 [00:02<00:00, 136.22it/s]
 43%|████▎     | 43/100 [3:26:56<4:19:12, 272.86s/it]

After training, sparsity = 4.20, bitwidth = 4 acc = 14.6800
sample number 43 ->  sparsity = 0.9, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 129.40it/s]


Before training, sparsity = 0.9, q_type = 0, bitwidth = 4 acc = 18.0300


100%|██████████| 15/15 [05:48<00:00, 23.20s/it]
100%|██████████| 313/313 [00:02<00:00, 138.12it/s]
 44%|████▍     | 44/100 [3:32:49<4:37:02, 296.84s/it]

After training, sparsity = 4.30, bitwidth = 4 acc = 86.7500
sample number 44 ->  sparsity = 0.8, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.39it/s]


Before training, sparsity = 0.8, q_type = 1, bitwidth = 4 acc = 15.0200


 67%|██████▋   | 10/15 [04:21<02:10, 26.17s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.06604219512939453


100%|██████████| 313/313 [00:02<00:00, 137.23it/s]
 45%|████▌     | 45/100 [3:37:15<4:23:43, 287.69s/it]

After training, sparsity = 4.40, bitwidth = 4 acc = 24.2900
sample number 45 ->  sparsity = 0.3, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.11it/s]


Before training, sparsity = 0.3, q_type = 0, bitwidth = 4 acc = 60.4600


100%|██████████| 15/15 [05:52<00:00, 23.47s/it]
100%|██████████| 313/313 [00:02<00:00, 134.84it/s]
 46%|████▌     | 46/100 [3:43:12<4:37:33, 308.41s/it]

After training, sparsity = 4.50, bitwidth = 4 acc = 98.8400
sample number 46 ->  sparsity = 0.3, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 135.52it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 8 acc = 60.2500


100%|██████████| 15/15 [05:58<00:00, 23.90s/it]
100%|██████████| 313/313 [00:02<00:00, 136.75it/s]
 47%|████▋     | 47/100 [3:49:15<4:46:55, 324.81s/it]

After training, sparsity = 4.60, bitwidth = 8 acc = 84.8200
sample number 47 ->  sparsity = 0.3, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 127.01it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 8 acc = 60.2100


 80%|████████  | 12/15 [05:10<01:17, 25.88s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.01489437128379941


100%|██████████| 313/313 [00:02<00:00, 130.85it/s]
 48%|████▊     | 48/100 [3:54:30<4:39:05, 322.03s/it]

After training, sparsity = 4.70, bitwidth = 8 acc = 84.5500
sample number 48 ->  sparsity = 0.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.54it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 4 acc = 98.6500


 47%|████▋     | 7/15 [03:11<03:39, 27.39s/it]


Stopping Training of Sequential with at 3 epoch with best train_loss = 0.0007982280006428482


100%|██████████| 313/313 [00:02<00:00, 136.05it/s]
 49%|████▉     | 49/100 [3:57:47<4:01:42, 284.36s/it]

After training, sparsity = 4.80, bitwidth = 4 acc = 98.9900
sample number 49 ->  sparsity = 0.9, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.23it/s]


Before training, sparsity = 0.9, q_type = 1, bitwidth = 4 acc = 19.3700


 67%|██████▋   | 10/15 [04:21<02:10, 26.10s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.06905851039489111


100%|██████████| 313/313 [00:02<00:00, 131.07it/s]
 50%|█████     | 50/100 [4:02:13<3:52:19, 278.80s/it]

After training, sparsity = 4.90, bitwidth = 4 acc = 18.7200
sample number 50 ->  sparsity = 0.9, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.06it/s]


Before training, sparsity = 0.9, q_type = 1, bitwidth = 4 acc = 19.0100


 67%|██████▋   | 10/15 [04:22<02:11, 26.22s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.06902083293398222


100%|██████████| 313/313 [00:02<00:00, 138.06it/s]
 51%|█████     | 51/100 [4:06:40<3:44:45, 275.21s/it]

After training, sparsity = 5.00, bitwidth = 4 acc = 18.2000
sample number 51 ->  sparsity = 0.9, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 130.20it/s]


Before training, sparsity = 0.9, q_type = 1, bitwidth = 8 acc = 17.9500


 53%|█████▎    | 8/15 [03:33<03:07, 26.72s/it]


Stopping Training of Sequential with at 4 epoch with best train_loss = 0.06804542516271274


100%|██████████| 313/313 [00:02<00:00, 137.33it/s]
 52%|█████▏    | 52/100 [4:10:18<3:26:33, 258.21s/it]

After training, sparsity = 5.10, bitwidth = 8 acc = 20.9900
sample number 52 ->  sparsity = 1.0, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 126.78it/s]


Before training, sparsity = 1.0, q_type = 0, bitwidth = 8 acc = 9.7400


100%|██████████| 15/15 [05:49<00:00, 23.27s/it]
100%|██████████| 313/313 [00:02<00:00, 132.62it/s]
 53%|█████▎    | 53/100 [4:16:12<3:44:45, 286.93s/it]

After training, sparsity = 5.20, bitwidth = 8 acc = 28.6500
sample number 53 ->  sparsity = 0.6, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.31it/s]


Before training, sparsity = 0.6, q_type = 0, bitwidth = 4 acc = 26.7600


100%|██████████| 15/15 [05:51<00:00, 23.45s/it]
100%|██████████| 313/313 [00:02<00:00, 137.92it/s]
 54%|█████▍    | 54/100 [4:22:08<3:55:57, 307.77s/it]

After training, sparsity = 5.30, bitwidth = 4 acc = 98.4100
sample number 54 ->  sparsity = 0.5, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.28it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 8 acc = 23.2600


 67%|██████▋   | 10/15 [04:23<02:11, 26.37s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.03674712311923504


100%|██████████| 313/313 [00:02<00:00, 137.29it/s]
 55%|█████▌    | 55/100 [4:26:37<3:41:57, 295.95s/it]

After training, sparsity = 5.40, bitwidth = 8 acc = 60.0100
sample number 55 ->  sparsity = 0.1, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.83it/s]


Before training, sparsity = 0.1, q_type = 1, bitwidth = 4 acc = 95.9600


 67%|██████▋   | 10/15 [04:23<02:11, 26.36s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.00192719975936537


100%|██████████| 313/313 [00:02<00:00, 138.65it/s]
 56%|█████▌    | 56/100 [4:31:05<3:30:56, 287.65s/it]

After training, sparsity = 5.50, bitwidth = 4 acc = 97.9800
sample number 56 ->  sparsity = 0.8, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.91it/s]


Before training, sparsity = 0.8, q_type = 1, bitwidth = 4 acc = 14.8700


 67%|██████▋   | 10/15 [04:22<02:11, 26.28s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.06601963401436806


100%|██████████| 313/313 [00:02<00:00, 137.17it/s]
 57%|█████▋    | 57/100 [4:35:33<3:21:49, 281.60s/it]

After training, sparsity = 5.60, bitwidth = 4 acc = 24.0200
sample number 57 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.48it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.2900


 87%|████████▋ | 13/15 [05:34<00:51, 25.74s/it]


Stopping Training of Sequential with at 9 epoch with best train_loss = 0.0024051418361642086


100%|██████████| 313/313 [00:02<00:00, 132.00it/s]
 58%|█████▊    | 58/100 [4:41:12<3:29:15, 298.95s/it]

After training, sparsity = 5.70, bitwidth = 4 acc = 97.2600
sample number 58 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 127.01it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.3500


 67%|██████▋   | 10/15 [04:23<02:11, 26.32s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.002377177127386676


100%|██████████| 313/313 [00:02<00:00, 138.37it/s]
 59%|█████▉    | 59/100 [4:45:40<3:17:56, 289.67s/it]

After training, sparsity = 5.80, bitwidth = 4 acc = 97.3900
sample number 59 ->  sparsity = 0.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 130.51it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 8 acc = 99.2500


 27%|██▋       | 4/15 [02:00<05:30, 30.03s/it]


Stopping Training of Sequential with at 0 epoch with best train_loss = 0.0006072483831372059


100%|██████████| 313/313 [00:02<00:00, 137.14it/s]
 60%|██████    | 60/100 [4:47:45<2:40:09, 240.23s/it]

After training, sparsity = 5.90, bitwidth = 8 acc = 99.2000
sample number 60 ->  sparsity = 0.4, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.88it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 8 acc = 58.0900


100%|██████████| 15/15 [05:59<00:00, 23.99s/it]
100%|██████████| 313/313 [00:02<00:00, 134.37it/s]
 61%|██████    | 61/100 [4:53:49<3:00:24, 277.55s/it]

After training, sparsity = 6.00, bitwidth = 8 acc = 82.1600
sample number 61 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 132.36it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.1100


 67%|██████▋   | 10/15 [04:23<02:11, 26.36s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.00242004320222574


100%|██████████| 313/313 [00:02<00:00, 128.70it/s]
 62%|██████▏   | 62/100 [4:58:18<2:54:03, 274.83s/it]

After training, sparsity = 6.10, bitwidth = 4 acc = 97.4200
sample number 62 ->  sparsity = 0.3, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 129.31it/s]


Before training, sparsity = 0.3, q_type = 0, bitwidth = 8 acc = 60.3200


100%|██████████| 15/15 [05:50<00:00, 23.39s/it]
100%|██████████| 313/313 [00:02<00:00, 133.40it/s]
 63%|██████▎   | 63/100 [5:04:14<3:04:26, 299.11s/it]

After training, sparsity = 6.20, bitwidth = 8 acc = 98.7700
sample number 63 ->  sparsity = 0.8, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.61it/s]


Before training, sparsity = 0.8, q_type = 1, bitwidth = 8 acc = 14.4100


100%|██████████| 15/15 [05:57<00:00, 23.84s/it]
100%|██████████| 313/313 [00:02<00:00, 137.08it/s]
 64%|██████▍   | 64/100 [5:10:16<3:10:50, 318.08s/it]

After training, sparsity = 6.30, bitwidth = 8 acc = 25.5500
sample number 64 ->  sparsity = 0.2, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 127.54it/s]


Before training, sparsity = 0.2, q_type = 0, bitwidth = 8 acc = 97.1300


100%|██████████| 15/15 [05:51<00:00, 23.42s/it]
100%|██████████| 313/313 [00:02<00:00, 132.89it/s]
 65%|██████▌   | 65/100 [5:16:12<3:12:13, 329.53s/it]

After training, sparsity = 6.40, bitwidth = 8 acc = 99.1000
sample number 65 ->  sparsity = 0.9, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.60it/s]


Before training, sparsity = 0.9, q_type = 1, bitwidth = 4 acc = 19.3700


 73%|███████▎  | 11/15 [04:45<01:43, 25.93s/it]


Stopping Training of Sequential with at 7 epoch with best train_loss = 0.06901559114257495


100%|██████████| 313/313 [00:02<00:00, 136.84it/s]
 66%|██████▌   | 66/100 [5:21:02<3:00:00, 317.65s/it]

After training, sparsity = 6.50, bitwidth = 4 acc = 19.1800
sample number 66 ->  sparsity = 0.4, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 131.72it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 8 acc = 58.4400


 93%|█████████▎| 14/15 [05:59<00:25, 25.65s/it]


Stopping Training of Sequential with at 10 epoch with best train_loss = 0.0178375964636604


100%|██████████| 313/313 [00:02<00:00, 126.40it/s]
 67%|██████▋   | 67/100 [5:27:06<3:02:20, 331.54s/it]

After training, sparsity = 6.60, bitwidth = 8 acc = 80.5800
sample number 67 ->  sparsity = 0.8, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 135.56it/s]


Before training, sparsity = 0.8, q_type = 0, bitwidth = 8 acc = 13.7800


100%|██████████| 15/15 [05:50<00:00, 23.37s/it]
100%|██████████| 313/313 [00:02<00:00, 134.75it/s]
 68%|██████▊   | 68/100 [5:33:02<3:00:37, 338.67s/it]

After training, sparsity = 6.70, bitwidth = 8 acc = 96.1900
sample number 68 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.18it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.6700


 67%|██████▋   | 10/15 [04:24<02:12, 26.45s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.0024002515448257327


100%|██████████| 313/313 [00:02<00:00, 135.51it/s]
 69%|██████▉   | 69/100 [5:37:31<2:44:13, 317.84s/it]

After training, sparsity = 6.80, bitwidth = 4 acc = 97.4400
sample number 69 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 130.94it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.5400


 73%|███████▎  | 11/15 [04:47<01:44, 26.16s/it]


Stopping Training of Sequential with at 7 epoch with best train_loss = 0.0023708078098871434


100%|██████████| 313/313 [00:02<00:00, 133.58it/s]
 70%|███████   | 70/100 [5:42:23<2:35:08, 310.27s/it]

After training, sparsity = 6.90, bitwidth = 4 acc = 97.5800
sample number 70 ->  sparsity = 0.3, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 130.98it/s]


Before training, sparsity = 0.3, q_type = 0, bitwidth = 4 acc = 60.0600


100%|██████████| 15/15 [05:51<00:00, 23.47s/it]
100%|██████████| 313/313 [00:02<00:00, 137.77it/s]
 71%|███████   | 71/100 [5:48:20<2:36:41, 324.21s/it]

After training, sparsity = 7.00, bitwidth = 4 acc = 98.8200
sample number 71 ->  sparsity = 0.1, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 118.26it/s]


Before training, sparsity = 0.1, q_type = 1, bitwidth = 8 acc = 97.8400


 60%|██████    | 9/15 [04:00<02:40, 26.77s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.0014670277809045121


100%|██████████| 313/313 [00:02<00:00, 135.96it/s]
 72%|███████▏  | 72/100 [5:52:26<2:20:20, 300.73s/it]

After training, sparsity = 7.10, bitwidth = 8 acc = 98.3500
sample number 72 ->  sparsity = 0.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 129.87it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 4 acc = 98.7800


 40%|████      | 6/15 [02:48<04:13, 28.14s/it]


Stopping Training of Sequential with at 2 epoch with best train_loss = 0.0007787325321391108


100%|██████████| 313/313 [00:02<00:00, 135.27it/s]
 73%|███████▎  | 73/100 [5:55:20<1:58:10, 262.61s/it]

After training, sparsity = 7.20, bitwidth = 4 acc = 99.0500
sample number 73 ->  sparsity = 0.6, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 115.47it/s]


Before training, sparsity = 0.6, q_type = 1, bitwidth = 4 acc = 24.8500


100%|██████████| 15/15 [05:58<00:00, 23.93s/it]
100%|██████████| 313/313 [00:02<00:00, 134.45it/s]
 74%|███████▍  | 74/100 [6:01:24<2:06:59, 293.05s/it]

After training, sparsity = 7.30, bitwidth = 4 acc = 53.9400
sample number 74 ->  sparsity = 0.6, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 129.62it/s]


Before training, sparsity = 0.6, q_type = 0, bitwidth = 4 acc = 26.8100


100%|██████████| 15/15 [05:50<00:00, 23.38s/it]
100%|██████████| 313/313 [00:02<00:00, 136.17it/s]
 75%|███████▌  | 75/100 [6:07:19<2:09:54, 311.77s/it]

After training, sparsity = 7.40, bitwidth = 4 acc = 98.3400
sample number 75 ->  sparsity = 0.8, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 116.78it/s]


Before training, sparsity = 0.8, q_type = 0, bitwidth = 8 acc = 13.7200


100%|██████████| 15/15 [05:50<00:00, 23.34s/it]
100%|██████████| 313/313 [00:02<00:00, 136.58it/s]
 76%|███████▌  | 76/100 [6:13:14<2:09:55, 324.81s/it]

After training, sparsity = 7.50, bitwidth = 8 acc = 96.0900
sample number 76 ->  sparsity = 0.0, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 129.12it/s]


Before training, sparsity = 0.0, q_type = 0, bitwidth = 8 acc = 99.3100


100%|██████████| 15/15 [05:52<00:00, 23.52s/it]
100%|██████████| 313/313 [00:02<00:00, 132.08it/s]
 77%|███████▋  | 77/100 [6:19:12<2:08:17, 334.69s/it]

After training, sparsity = 7.60, bitwidth = 8 acc = 99.2400
sample number 77 ->  sparsity = 0.5, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.61it/s]


Before training, sparsity = 0.5, q_type = 0, bitwidth = 4 acc = 23.0600


100%|██████████| 15/15 [05:51<00:00, 23.45s/it]
100%|██████████| 313/313 [00:02<00:00, 135.21it/s]
 78%|███████▊  | 78/100 [6:25:09<2:05:06, 341.22s/it]

After training, sparsity = 7.70, bitwidth = 4 acc = 98.6400
sample number 78 ->  sparsity = 0.2, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 132.15it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 8 acc = 97.1300


 67%|██████▋   | 10/15 [04:24<02:12, 26.43s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.0017811485328226506


100%|██████████| 313/313 [00:02<00:00, 136.21it/s]
 79%|███████▉  | 79/100 [6:29:38<1:51:51, 319.58s/it]

After training, sparsity = 7.80, bitwidth = 8 acc = 98.0400
sample number 79 ->  sparsity = 1.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 135.23it/s]


Before training, sparsity = 1.0, q_type = 1, bitwidth = 4 acc = 9.7400


 40%|████      | 6/15 [02:46<04:09, 27.74s/it]


Stopping Training of Sequential with at 2 epoch with best train_loss = 0.07153036953210831


100%|██████████| 313/313 [00:02<00:00, 130.66it/s]
 80%|████████  | 80/100 [6:32:29<1:31:41, 275.07s/it]

After training, sparsity = 7.90, bitwidth = 4 acc = 14.5900
sample number 80 ->  sparsity = 1.0, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.38it/s]


Before training, sparsity = 1.0, q_type = 0, bitwidth = 4 acc = 9.7400


100%|██████████| 15/15 [05:50<00:00, 23.37s/it]
100%|██████████| 313/313 [00:02<00:00, 136.05it/s]
 81%|████████  | 81/100 [6:38:24<1:34:43, 299.11s/it]

After training, sparsity = 8.00, bitwidth = 4 acc = 28.8000
sample number 81 ->  sparsity = 0.6, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 129.89it/s]


Before training, sparsity = 0.6, q_type = 1, bitwidth = 4 acc = 24.9300


 80%|████████  | 12/15 [05:12<01:18, 26.03s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.04310357688764731


100%|██████████| 313/313 [00:02<00:00, 132.23it/s]
 82%|████████▏ | 82/100 [6:43:41<1:31:21, 304.54s/it]

After training, sparsity = 8.10, bitwidth = 4 acc = 51.8800
sample number 82 ->  sparsity = 1.0, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 132.00it/s]


Before training, sparsity = 1.0, q_type = 0, bitwidth = 8 acc = 9.7400


100%|██████████| 15/15 [05:51<00:00, 23.41s/it]
100%|██████████| 313/313 [00:02<00:00, 136.45it/s]
 83%|████████▎ | 83/100 [6:49:37<1:30:38, 319.93s/it]

After training, sparsity = 8.20, bitwidth = 8 acc = 27.9700
sample number 83 ->  sparsity = 0.2, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 132.73it/s]


Before training, sparsity = 0.2, q_type = 1, bitwidth = 4 acc = 95.1900


 47%|████▋     | 7/15 [03:12<03:39, 27.50s/it]


Stopping Training of Sequential with at 3 epoch with best train_loss = 0.0024367216873603563


100%|██████████| 313/313 [00:02<00:00, 132.11it/s]
 84%|████████▍ | 84/100 [6:52:55<1:15:30, 283.14s/it]

After training, sparsity = 8.30, bitwidth = 4 acc = 97.5300
sample number 84 ->  sparsity = 0.1, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 128.80it/s]


Before training, sparsity = 0.1, q_type = 1, bitwidth = 8 acc = 97.8000


 60%|██████    | 9/15 [04:00<02:40, 26.76s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.0014361177437773828


100%|██████████| 313/313 [00:02<00:00, 135.99it/s]
 85%|████████▌ | 85/100 [6:57:00<1:07:58, 271.89s/it]

After training, sparsity = 8.40, bitwidth = 8 acc = 98.4400
sample number 85 ->  sparsity = 0.3, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 132.28it/s]


Before training, sparsity = 0.3, q_type = 1, bitwidth = 8 acc = 60.9000


 73%|███████▎  | 11/15 [04:47<01:44, 26.16s/it]


Stopping Training of Sequential with at 7 epoch with best train_loss = 0.014857118554910023


100%|██████████| 313/313 [00:02<00:00, 132.07it/s]
 86%|████████▌ | 86/100 [7:01:53<1:04:53, 278.09s/it]

After training, sparsity = 8.50, bitwidth = 8 acc = 84.5400
sample number 86 ->  sparsity = 0.5, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.43it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 8 acc = 23.5800


100%|██████████| 15/15 [05:58<00:00, 23.91s/it]
100%|██████████| 313/313 [00:02<00:00, 137.75it/s]
 87%|████████▋ | 87/100 [7:07:56<1:05:47, 303.68s/it]

After training, sparsity = 8.60, bitwidth = 8 acc = 60.5300
sample number 87 ->  sparsity = 0.9, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 112.33it/s]


Before training, sparsity = 0.9, q_type = 1, bitwidth = 4 acc = 19.3000


 60%|██████    | 9/15 [03:57<02:38, 26.44s/it]


Stopping Training of Sequential with at 5 epoch with best train_loss = 0.06902700161337852


100%|██████████| 313/313 [00:02<00:00, 135.41it/s]
 88%|████████▊ | 88/100 [7:11:59<57:06, 285.51s/it]  

After training, sparsity = 8.70, bitwidth = 4 acc = 18.8800
sample number 88 ->  sparsity = 0.4, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 127.41it/s]


Before training, sparsity = 0.4, q_type = 1, bitwidth = 4 acc = 55.1800


 67%|██████▋   | 10/15 [04:24<02:12, 26.46s/it]


Stopping Training of Sequential with at 6 epoch with best train_loss = 0.020557275403290987


100%|██████████| 313/313 [00:02<00:00, 134.96it/s]
 89%|████████▉ | 89/100 [7:16:29<51:27, 280.68s/it]

After training, sparsity = 8.80, bitwidth = 4 acc = 77.9800
sample number 89 ->  sparsity = 0.0, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 133.89it/s]


Before training, sparsity = 0.0, q_type = 1, bitwidth = 8 acc = 99.2000


 33%|███▎      | 5/15 [02:24<04:48, 28.88s/it]


Stopping Training of Sequential with at 1 epoch with best train_loss = 0.000557124045934385


100%|██████████| 313/313 [00:02<00:00, 131.64it/s]
 90%|█████████ | 90/100 [7:18:58<40:12, 241.24s/it]

After training, sparsity = 8.90, bitwidth = 8 acc = 99.1000
sample number 90 ->  sparsity = 0.0, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 133.66it/s]


Before training, sparsity = 0.0, q_type = 0, bitwidth = 4 acc = 99.1800


100%|██████████| 15/15 [05:59<00:00, 23.98s/it]
100%|██████████| 313/313 [00:02<00:00, 137.06it/s]
 91%|█████████ | 91/100 [7:25:02<41:43, 278.18s/it]

After training, sparsity = 9.00, bitwidth = 4 acc = 99.1200
sample number 91 ->  sparsity = 0.0, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.74it/s]


Before training, sparsity = 0.0, q_type = 0, bitwidth = 4 acc = 99.3200


100%|██████████| 15/15 [05:53<00:00, 23.58s/it]
100%|██████████| 313/313 [00:02<00:00, 137.22it/s]
 92%|█████████▏| 92/100 [7:31:01<40:18, 302.26s/it]

After training, sparsity = 9.10, bitwidth = 4 acc = 99.2800
sample number 92 ->  sparsity = 0.9, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 128.62it/s]


Before training, sparsity = 0.9, q_type = 1, bitwidth = 8 acc = 18.0100


 80%|████████  | 12/15 [05:09<01:17, 25.82s/it]


Stopping Training of Sequential with at 8 epoch with best train_loss = 0.06800669422547022


100%|██████████| 313/313 [00:02<00:00, 137.66it/s]
 93%|█████████▎| 93/100 [7:36:15<35:41, 305.96s/it]

After training, sparsity = 9.20, bitwidth = 8 acc = 21.4400
sample number 93 ->  sparsity = 0.5, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 134.64it/s]


Before training, sparsity = 0.5, q_type = 0, bitwidth = 4 acc = 23.5000


100%|██████████| 15/15 [05:53<00:00, 23.57s/it]
100%|██████████| 313/313 [00:02<00:00, 134.34it/s]
 94%|█████████▍| 94/100 [7:42:14<32:09, 321.66s/it]

After training, sparsity = 9.30, bitwidth = 4 acc = 98.6400
sample number 94 ->  sparsity = 1.0, q_type = 1, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 132.49it/s]


Before training, sparsity = 1.0, q_type = 1, bitwidth = 4 acc = 9.7400


 47%|████▋     | 7/15 [03:11<03:39, 27.38s/it]


Stopping Training of Sequential with at 3 epoch with best train_loss = 0.07151553703943889


100%|██████████| 313/313 [00:02<00:00, 136.76it/s]
 95%|█████████▌| 95/100 [7:45:30<23:40, 284.09s/it]

After training, sparsity = 9.40, bitwidth = 4 acc = 14.9900
sample number 95 ->  sparsity = 0.5, q_type = 0, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 131.46it/s]


Before training, sparsity = 0.5, q_type = 0, bitwidth = 8 acc = 23.7800


100%|██████████| 15/15 [05:52<00:00, 23.47s/it]
100%|██████████| 313/313 [00:02<00:00, 133.23it/s]
 96%|█████████▌| 96/100 [7:51:27<20:23, 305.94s/it]

After training, sparsity = 9.50, bitwidth = 8 acc = 98.7300
sample number 96 ->  sparsity = 0.6, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 125.21it/s]


Before training, sparsity = 0.6, q_type = 1, bitwidth = 8 acc = 26.9600


100%|██████████| 15/15 [05:59<00:00, 23.94s/it]
100%|██████████| 313/313 [00:02<00:00, 138.01it/s]
 97%|█████████▋| 97/100 [7:57:31<16:10, 323.36s/it]

After training, sparsity = 9.60, bitwidth = 8 acc = 53.8200
sample number 97 ->  sparsity = 0.6, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 135.87it/s]


Before training, sparsity = 0.6, q_type = 1, bitwidth = 8 acc = 26.6000


100%|██████████| 15/15 [06:08<00:00, 24.54s/it]
100%|██████████| 313/313 [00:02<00:00, 132.62it/s]
 98%|█████████▊| 98/100 [8:03:44<11:16, 338.20s/it]

After training, sparsity = 9.70, bitwidth = 8 acc = 51.6300
sample number 98 ->  sparsity = 0.5, q_type = 1, bitwidth = 8


100%|██████████| 313/313 [00:02<00:00, 134.14it/s]


Before training, sparsity = 0.5, q_type = 1, bitwidth = 8 acc = 23.3900


 93%|█████████▎| 14/15 [05:58<00:25, 25.64s/it]


Stopping Training of Sequential with at 10 epoch with best train_loss = 0.03673108915388584


100%|██████████| 313/313 [00:02<00:00, 138.39it/s]
 99%|█████████▉| 99/100 [8:09:47<05:45, 345.83s/it]

After training, sparsity = 9.80, bitwidth = 8 acc = 58.3100
sample number 99 ->  sparsity = 0.4, q_type = 0, bitwidth = 4


100%|██████████| 313/313 [00:02<00:00, 128.78it/s]


Before training, sparsity = 0.4, q_type = 0, bitwidth = 4 acc = 58.0200


100%|██████████| 15/15 [06:07<00:00, 24.51s/it]
100%|██████████| 313/313 [00:02<00:00, 134.91it/s]
100%|██████████| 100/100 [8:16:00<00:00, 297.60s/it]


After training, sparsity = 9.90, bitwidth = 4 acc = 98.8300


In [11]:
 sparsity_per_layer = 0.1
lenet5_model.to("cpu")
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
# acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
# print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.to(DEVICE)
lenet5_mcu_model.fit(
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device=DEVICE,
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


IndentationError: unexpected indent (970928641.py, line 1)

### sparsity_per_layer = 0.2

In [None]:
sparsity_per_layer = 0.2
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.3

In [None]:
sparsity_per_layer = 0.3
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.4

In [None]:
sparsity_per_layer = 0.4
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.5

In [None]:
sparsity_per_layer = 0.5
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.6

In [None]:
sparsity_per_layer = 0.6
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.7

In [None]:
sparsity_per_layer = 0.7
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.8

In [None]:
sparsity_per_layer = 0.8
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.9

In [None]:
sparsity_per_layer = 0.9
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Dynamic Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Dynamic Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Static Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



## Static Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



In [None]:
# lenet5_model.cpu()

# # PRUNED MODEL
# pruned_sparsity = [i/10 for i in range(10)]
# for sparsity in pruned_sparsity:
#     pruned_model = lenet5_model.prune_channel(sparsity)
#     acc = pruned_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = pruned_model.get_size_in_bits()//8
#     print(f"The pruned model with sparsity {sparsity} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

# quantization_bitwidth = [i for i in range(8, 0, -1)]

# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_tensor_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
#     acc = dynamic_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_channel_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
#     acc = dynamic_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_tensor_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_channel_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

