In [1]:
import sys
# sys.path.append("/home/matthias/Documents/EmbeddedAI/deep-microcompression/")
sys.path.append("../../")

from development import (
    Sequential,
    AvgPool2d,
    BatchNorm2d,
    Conv2d,
    Linear,
    ReLU,
    ReLU6,
    MaxPool2d,
    Flatten, 

    EarlyStopper,
    quantize_per_tensor_assy,
    quantize_per_tensor_sy,
    dequantize_per_tensor_assy,
    dequantize_per_tensor_sy,
    
    QuantizationGranularity,
    QuantizationScaleType,
    QuantizationScheme
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sys
import os
import copy
import random

from tqdm.auto import tqdm

import torch
from torch import nn, optim
from torch.utils import data
from torchvision import datasets, transforms

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
lenet5_file = f"lenet5_state_dict_{DEVICE}.pth"
log_compression_details_file = "lenet5_compression_log.csv"

LUCKY_NUMBER = 25
torch.manual_seed(LUCKY_NUMBER)
torch.random.manual_seed(LUCKY_NUMBER)
torch.cuda.manual_seed(LUCKY_NUMBER)

DEVICE


'cpu'

In [4]:
input_shape = (1, 28, 28)

data_transform = transforms.Compose([
    transforms.RandomCrop((24, 24)),
    transforms.Resize(input_shape[1:]),
    transforms.ToTensor(),
])

mnist_train_dataset = datasets.MNIST("../../../Datasets/", train=True, download=True, transform=data_transform)
mnist_test_dataset = datasets.MNIST("../../../Datasets/", train=False, download=True, transform=data_transform)

mnist_train_loader = data.DataLoader(mnist_train_dataset, batch_size=32, shuffle=True, num_workers=os.cpu_count())
mnist_test_loader = data.DataLoader(mnist_test_dataset, batch_size=32, shuffle=False, num_workers=os.cpu_count())


In [5]:
lenet5_model = Sequential(
    Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, pad=tuple([2]*4), bias=False),
    # BatchNorm2d(num_features=6),
    ReLU6(),

    MaxPool2d(kernel_size=2, stride=2, padding=0),

    Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0, bias=True),
    # BatchNorm2d(num_features=16),
    ReLU(),
    # ReLU6(),

    MaxPool2d(kernel_size=2, stride=2, padding=0),
    # AvgPool2d(kernel_size=2, stride=2, padding=0),

    Flatten(),
    
    Linear(in_features=16*5*5, out_features=84, bias=False),
    ReLU6(),
    Linear(in_features=84, out_features=10, bias=True)
).to(DEVICE)

accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()




In [6]:
# lenet5_model = Sequential(
#     Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=5, pad=[2]*4, bias=True),
#     # BatchNorm2d(num_features=3),
#     ReLU(),
#     Conv2d(in_channels=6, out_channels=3, kernel_size=1, stride=1, pad=[0]*4, bias=False),
#     MaxPool2d(kernel_size=2, stride=2, padding=0),
#     Flatten(),
#     Linear(in_features=3*3*3, out_features=84, bias=False),
#     ReLU(),
#     Linear(in_features=84, out_features=10, bias=True)
# ).to(DEVICE)

# accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()

In [7]:
# lenet5_model = Sequential(
#     Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=5, pad=[2]*4, bias=False),
#     # BatchNorm2d(num_features=3),
#     ReLU(),
#     Flatten(),
#     Linear(in_features=3*6*6, out_features=10, bias=True)
# ).to(DEVICE)

# accuracy_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()

In [8]:
try:
    # raise RuntimeError
    lenet5_model.load_state_dict(torch.load(lenet5_file, weights_only=True), strict=True)
    
except (RuntimeError, FileNotFoundError) as e:
    early_stopper = EarlyStopper(
        metric_name="train_loss",
        min_valid_diff=1e-7,
        mode="min",
        patience=2,
        restore_best_state_dict=True,
    )

    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(lenet5_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    lenet5_model.fit(
        mnist_train_loader, 2, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        metrics={"acc" : accuracy_fun},
        callbacks=[early_stopper],
        device=DEVICE
    )
    torch.save(lenet5_model.state_dict(), lenet5_file)
    

## Original Model


In [9]:
lenet5_mcu_model = copy.deepcopy(lenet5_model)

original_acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)
original_size = lenet5_mcu_model.get_size_in_bits()//8
print(original_acc*100)
print(f"The original model accuracy is {original_acc*100:.2f}% with size {original_size} bytes.")

# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(input_shape=input_shape, var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")

print(lenet5_mcu_model.test(device=DEVICE))
original_acc, original_size

  0%|          | 0/313 [00:00<?, ?it/s]

100%|██████████| 313/313 [00:03<00:00, 82.60it/s] 


99.33
The original model accuracy is 99.33% with size 148064 bytes.
tensor([[-10.5499,  -3.5355,   9.8397,  -3.2319,  -9.1715, -12.8788,  -4.4893,
           0.5832,  -4.2970,  -7.2553]])


(0.9933, 148064)

In [10]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7ad091945820>

In [None]:
sp = .1
s = QuantizationScheme.DYNAMIC
g = QuantizationGranularity.PER_TENSOR
b = 4

# for i in range(0, 11):
#     sp = i/10

# for i in [8, 4, 2]:

#     b = i

RANGE = 10
for i in range(1000):
    sp = random.choice([i/RANGE for i in range(0, RANGE+1, 1)])
    s = random.choice([QuantizationScheme.NONE, QuantizationScheme.DYNAMIC, QuantizationScheme.DYNAMIC, QuantizationScheme.STATIC, QuantizationScheme.STATIC])
    g = random.choice([None, QuantizationGranularity.PER_CHANNEL, QuantizationGranularity.PER_TENSOR, QuantizationGranularity.PER_TENSOR, QuantizationGranularity.PER_TENSOR])
    b = random.choice([None, 2, 4, 4, 8, 8])

    print(sp, s, g, b)
    compression_config = {
        
        "prune_channel" :{
            "sparsity" : sp,
            "metric" : "l2"
        },

        "quantize" : {
            "scheme" : s,
            "granularity": g,
            "bitwidth" : b
        }

    }


        # compressed_lenet5_mcu_model = lenet5_mcu_model.init_compress(compression_config, input_shape=input_shape, calibration_data=next(iter(mnist_test_loader))[0].to(DEVICE))
    # lenet5_mcu_model.cpu()
    try:
        compressed_lenet5_mcu_model = lenet5_mcu_model.init_compress(compression_config, input_shape=input_shape, calibration_data=next(iter(mnist_test_loader))[0].to(DEVICE))
    except ValueError:
        continue
    compressed_lenet5_mcu_model.to(DEVICE)

    before_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100
    size = compressed_lenet5_mcu_model.get_size_in_bits()//8
    compressed_lenet5_mcu_model.to(DEVICE)

    print(f"Before training, sparsity = {sp}, scheme = {s}, granularity = {g}, bitwidth = {b} acc = {before_acc:.4f} size = {size/original_size*100:9.4f} {100 - size/original_size*100:9.4f}")
    # # compressed_lenet5_mcu_model.convert_to_c(input_shape=input_shape, var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")
    compressed_lenet5_mcu_model.to(DEVICE)
    # print(compressed_lenet5_mcu_model.test(device=DEVICE))

    # print("#"*40, "Training", "#"*40)
    # early_stopper = EarlyStopper(
    #     metric_name="validation_acc",
    #     min_valid_diff=.001,
    #     mode="min",
    #     patience=3,
    #     restore_best_state_dict=True,
    # )

    # criterion_fun = nn.CrossEntropyLoss()
    # # optimizion_fun = optim.SGD(compressed_lenet5_mcu_model.parameters(), lr=1.e-3)
    # optimizion_fun = optim.Adam(compressed_lenet5_mcu_model.parameters(), lr=1.e-3)
    # lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    # compressed_lenet5_mcu_model.fit(
    #     mnist_train_loader, 
    #     15, 
    #     criterion_fun, optimizion_fun, lr_scheduler,
    #     validation_dataloader=mnist_test_loader, 
    #     metrics={"acc": accuracy_fun},
    #     verbose = True,
    #     device=DEVICE,
    #     callbacks = [early_stopper]
    # )
    # after_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100
    # print(f"After training, sparsity = {sp}, scheme = {s}, granularity = {g}, bitwidth = {b} acc = {after_acc:.4f}")

0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 2


0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 60.65it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 13.7500 size =    0.4235   99.5765
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:08<00:00, 34.95it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 35.2700 size =    6.4384   93.5616
0.5 QuantizationScheme.STATIC None None





0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:08<00:00, 35.53it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 98.8400 size =   25.0135   74.9865
0.8 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 61.84it/s]


Before training, sparsity = 0.8, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 20.5200 size =    1.4413   98.5587
0.1 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:08<00:00, 35.15it/s]

Before training, sparsity = 0.1, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 98.7800 size =   21.4130   78.5870
0.6 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:08<00:00, 36.50it/s]

Before training, sparsity = 0.6, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 35.0400 size =    2.3470   97.6530
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:06<00:00, 48.57it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 97.6300 size =   16.5827   83.4173
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:07<00:00, 44.58it/s]

Before training, sparsity = 0.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 26.6000 size =   25.0000   75.0000
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 2





1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:04<00:00, 62.95it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 13.1500 size =    0.0648   99.9352
0.6 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 34.30it/s]

Before training, sparsity = 0.6, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 27.2700 size =    4.6784   95.3216
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None





0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 56.25it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 97.5200 size =    8.2924   91.7076
0.3 QuantizationScheme.STATIC None 4
0.7 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 2
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 56.70it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 65.8900 size =    9.7120   90.2880
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None
0.1 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 54.22it/s]

Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 66.7400 size =    9.7120   90.2880
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:05<00:00, 52.26it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 13.3000 size =    0.2121   99.7879
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 56.09it/s]

Before training, sparsity = 0.3, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 95.0000 size =   13.4651   86.5349
0.7 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8





0.3 QuantizationScheme.DYNAMIC None None
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 4


100%|██████████| 313/313 [00:06<00:00, 45.99it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 4 acc = 12.5800 size =    0.2121   99.7879
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:08<00:00, 35.26it/s]

Before training, sparsity = 1.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 13.7200 size =    0.0473   99.9527
0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:08<00:00, 35.50it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 64.6200 size =    4.8695   95.1305
0.7 QuantizationScheme.STATIC None 4
0.0 QuantizationScheme.STATIC None 8
1.0 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 8
0.8 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 52.43it/s]

Before training, sparsity = 0.8, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 21.6300 size =    0.7206   99.2794
0.1 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:05<00:00, 57.44it/s]


Before training, sparsity = 0.1, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 98.6100 size =   10.7001   89.2999
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 34.68it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    6.2642   93.7358
0.2 QuantizationScheme.DYNAMIC None 8
0.5 QuantizationScheme.STATIC None 8
0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.30it/s]


Before training, sparsity = 0.8, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 18.4100 size =    0.7341   99.2659
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:06<00:00, 49.25it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 10.6000 size =    0.0338   99.9662
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 2


100%|██████████| 313/313 [00:07<00:00, 40.85it/s]

Before training, sparsity = 0.3, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 2 acc = 18.0200 size =    3.3675   96.6325
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:09<00:00, 33.82it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 37.8700 size =    3.2263   96.7737
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL None
0.9 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 54.02it/s]

Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 25.2700 size =    1.2920   98.7080
0.1 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:09<00:00, 32.18it/s]

Before training, sparsity = 0.1, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 98.7200 size =   21.4130   78.5870
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4





0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 54.42it/s]

Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 97.3700 size =   16.5827   83.4173
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None





0.8 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 2
0.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.4 QuantizationScheme.DYNAMIC None 8
0.9 QuantizationScheme.STATIC None None
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:06<00:00, 49.02it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.1081   99.8919
0.2 QuantizationScheme.NONE None None


100%|██████████| 313/313 [00:03<00:00, 98.98it/s] 


Before training, sparsity = 0.2, scheme = QuantizationScheme.NONE, granularity = None, bitwidth = None acc = 97.5700 size =   66.3308   33.6692
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 60.27it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    3.3675   96.6325
0.1 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 61.11it/s]


Before training, sparsity = 0.1, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 98.6500 size =   10.7001   89.2999
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 55.07it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 41.9500 size =    3.2128   96.7872
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4





0.3 QuantizationScheme.DYNAMIC None None
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 4


100%|██████████| 313/313 [00:06<00:00, 45.67it/s]

Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 4 acc = 17.2900 size =    2.3335   97.6665
0.8 QuantizationScheme.DYNAMIC None 4





0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.99it/s]

Before training, sparsity = 0.4, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 66.5700 size =    4.8695   95.1305
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None





0.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 60.44it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.6477   99.3523
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:06<00:00, 46.83it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 13.4300 size =    2.5833   97.4167
0.9 QuantizationScheme.STATIC None 8
0.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 32.53it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    1.6202   98.3798
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:05<00:00, 57.32it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 13.9700 size =    0.4235   99.5765
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:11<00:00, 28.44it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 22.2400 size =    1.3055   98.6945
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:08<00:00, 38.14it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 70.7500 size =    4.8560   95.1440
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 2
0.9 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 55.23it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 13.4000 size =    0.2121   99.7879
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:05<00:00, 53.82it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 25.0800 size =    1.2920   98.7080
0.6 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:10<00:00, 31.00it/s]

Before training, sparsity = 0.6, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 35.9400 size =    2.3470   97.6530
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8





0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 33.51it/s]

Before training, sparsity = 0.8, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 21.2600 size =    1.4548   98.5452
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:09<00:00, 34.48it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 93.7300 size =    8.3059   91.6941
0.8 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 8
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 31.98it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 96.9400 size =   16.5962   83.4038
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.1 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:07<00:00, 39.77it/s]

Before training, sparsity = 0.1, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 98.9100 size =   21.3995   78.6005
0.6 QuantizationScheme.DYNAMIC None 2





0.6 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.1 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:11<00:00, 27.00it/s]


Before training, sparsity = 0.1, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 98.8500 size =   21.4130   78.5870
0.2 QuantizationScheme.STATIC None 2
0.8 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
1.0 QuantizationScheme.STATIC None 2
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
0.4 QuantizationScheme.DYNAMIC None 8
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 62.33it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 70.8600 size =    4.8560   95.1440
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 58.93it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 94.7200 size =   13.4651   86.5349
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.80it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 36.8600 size =    3.2263   96.7737
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 54.91it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    6.2507   93.7493
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None
0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 34.67it/s]


Before training, sparsity = 0.8, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 19.7900 size =    0.7341   99.2659
0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:08<00:00, 34.79it/s]

Before training, sparsity = 0.4, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 65.8500 size =    4.8695   95.1305
0.7 QuantizationScheme.DYNAMIC None 8





0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:07<00:00, 43.73it/s]


Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 8.0600 size =    4.6649   95.3351
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 59.69it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 99.3000 size =   25.0000   75.0000
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:04<00:00, 63.58it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 13.2000 size =    0.2121   99.7879
0.8 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None





0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:08<00:00, 35.98it/s]

Before training, sparsity = 0.4, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    2.4429   97.5571
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8





0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:08<00:00, 35.58it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 99.0700 size =   25.0135   74.9865
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 33.45it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 35.6500 size =    6.4384   93.5616
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None





0.6 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 33.24it/s]


Before training, sparsity = 0.6, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    1.1812   98.8188
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:08<00:00, 34.95it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 87.5500 size =    6.7464   93.2536
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 4


100%|██████████| 313/313 [00:06<00:00, 44.92it/s]

Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 4 acc = 51.7600 size =    8.2924   91.7076
0.9 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4





0.5 QuantizationScheme.DYNAMIC None 8
0.1 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.7 QuantizationScheme.DYNAMIC None 8
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None
0.6 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:06<00:00, 45.74it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 13.1700 size =    2.5833   97.4167
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL None
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 57.51it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 97.4600 size =   16.5827   83.4173
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 59.67it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 97.4600 size =   16.5827   83.4173
0.0 QuantizationScheme.NONE None 4
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 32.36it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 20.5000 size =    0.2256   99.7744
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 2





1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 54.41it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 10.6500 size =    0.0338   99.9662
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 33.08it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.0324   99.9676
0.7 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 8
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 58.88it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 24.6900 size =    1.2920   98.7080
0.5 QuantizationScheme.DYNAMIC None 2
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:06<00:00, 48.50it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 98.3500 size =   12.5000   87.5000
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.1 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 59.01it/s]

Before training, sparsity = 0.1, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    5.3511   94.6489
0.8 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4





0.3 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
0.4 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 2
0.6 QuantizationScheme.DYNAMIC None 4
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 56.59it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.1081   99.8919
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:10<00:00, 30.84it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    4.1610   95.8390
0.5 QuantizationScheme.DYNAMIC None None
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 32.52it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 21.1800 size =    0.2256   99.7744
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 54.40it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 93.7700 size =    6.7329   93.2671
0.4 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 33.62it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 96.8200 size =   16.5962   83.4038
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:07<00:00, 42.47it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 22.5300 size =   16.5827   83.4173
0.1 QuantizationScheme.STATIC None 8
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 32.74it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.6612   99.3388
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:06<00:00, 50.46it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 42.1700 size =    3.2128   96.7872
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 33.45it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 13.8400 size =    0.4370   99.5630
0.7 QuantizationScheme.NONE None 4
0.5 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL None
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.59it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 96.3700 size =   12.5135   87.4865
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 33.90it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 92.5200 size =   13.4786   86.5214
0.1 QuantizationScheme.DYNAMIC None 8
0.1 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 53.91it/s]


Before training, sparsity = 0.1, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    5.3511   94.6489
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 32.00it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    2.4429   97.5571
0.6 QuantizationScheme.NONE None 4
0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 2
0.8 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 2


100%|██████████| 313/313 [00:08<00:00, 38.71it/s]


Before training, sparsity = 0.8, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 2 acc = 9.8000 size =    0.3613   99.6387
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:07<00:00, 40.07it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 9.4400 size =    0.4235   99.5765
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:04<00:00, 63.86it/s]

Before training, sparsity = 0.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 99.2900 size =   25.0000   75.0000
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:05<00:00, 60.60it/s]

Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 70.4200 size =    4.8560   95.1440
0.7 QuantizationScheme.STATIC None 4





0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 32.72it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    3.3810   96.6190
0.1 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.3 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 58.00it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 41.0400 size =    3.2128   96.7872
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 60.12it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 13.4900 size =    0.2121   99.7879
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:05<00:00, 60.06it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 97.3800 size =   16.5827   83.4173
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:07<00:00, 44.33it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 13.2500 size =    2.5833   97.4167
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:06<00:00, 51.17it/s]


Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 30.6900 size =    4.6649   95.3351
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:10<00:00, 29.41it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 39.0800 size =    3.2263   96.7737
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL None
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:10<00:00, 31.23it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 13.1700 size =    0.0783   99.9217
0.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 4


100%|██████████| 313/313 [00:07<00:00, 42.27it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 4 acc = 25.2300 size =    4.8560   95.1440
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 52.97it/s]

Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    1.1677   98.8323
0.8 QuantizationScheme.STATIC None 8





0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 53.47it/s]

Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 71.1000 size =    4.8560   95.1440
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None





0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 33.69it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.1216   99.8784
0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:09<00:00, 33.19it/s]


Before training, sparsity = 0.8, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 21.4900 size =    1.4548   98.5452
1.0 QuantizationScheme.DYNAMIC None 8
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:06<00:00, 51.12it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 98.5200 size =   12.5000   87.5000
0.0 QuantizationScheme.DYNAMIC None None
1.0 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 2
0.3 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.7 QuantizationScheme.DYNAMIC None 8
0.5 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:06<00:00, 45.76it/s]

Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 5.9600 size =    0.0648   99.9352
0.7 QuantizationScheme.DYNAMIC None None





1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 60.99it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.0189   99.9811
0.2 QuantizationScheme.DYNAMIC None 8
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 61.45it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 13.8300 size =    0.4235   99.5765
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None





0.2 QuantizationScheme.DYNAMIC None None
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 56.53it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 95.1300 size =   13.4651   86.5349
0.4 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.1 QuantizationScheme.STATIC None 2
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:06<00:00, 49.19it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 12.9700 size =    0.0648   99.9352
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 57.18it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 97.6300 size =   16.5827   83.4173
1.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:08<00:00, 36.11it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 11.8300 size =    6.4249   93.5751
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None
0.6 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 32.72it/s]

Before training, sparsity = 0.4, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 62.4800 size =    9.7255   90.2745
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL None





0.3 QuantizationScheme.STATIC None 4
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 32.76it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 36.0000 size =    6.4384   93.5616
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None





0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:10<00:00, 31.15it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 21.5700 size =    0.2256   99.7744
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8



100%|██████████| 313/313 [00:06<00:00, 44.92it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 9.1400 size =    0.4235   99.5765
0.1 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 33.19it/s]


Before training, sparsity = 0.1, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 98.7700 size =   21.4130   78.5870
0.8 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR None
1.0 QuantizationScheme.DYNAMIC None 8
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 57.44it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    1.6067   98.3933
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:05<00:00, 58.03it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 13.0800 size =    0.2121   99.7879
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:07<00:00, 44.71it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 12.9700 size =    0.0648   99.9352
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:11<00:00, 26.92it/s]

Before training, sparsity = 0.2, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    4.1610   95.8390
0.5 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:05<00:00, 53.87it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 39.2000 size =    6.4249   93.5751
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:08<00:00, 35.01it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 38.8200 size =    3.2263   96.7737
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 2
0.4 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 32.23it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 66.1400 size =    4.8695   95.1305
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.9 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.2 QuantizationScheme.NONE None 8
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 55.27it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.1081   99.8919
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:10<00:00, 30.61it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 14.9000 size =    0.4370   99.5630
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:05<00:00, 55.60it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 97.1800 size =    8.2924   91.7076
0.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:06<00:00, 50.83it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 99.2200 size =   25.0000   75.0000
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.11it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 35.5800 size =    3.2263   96.7737
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 2


100%|██████████| 313/313 [00:06<00:00, 46.35it/s]

Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 2 acc = 9.8000 size =    0.0189   99.9811
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:09<00:00, 33.62it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 20.3000 size =    0.2256   99.7744
0.2 QuantizationScheme.STATIC None 8
0.9 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 32.11it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 38.2400 size =    3.2263   96.7737
0.1 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 54.86it/s]


Before training, sparsity = 0.1, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 99.0100 size =   21.3995   78.6005
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 34.55it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 99.0400 size =   25.0135   74.9865
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.94it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 88.9800 size =    6.7464   93.2536
0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
1.0 QuantizationScheme.STATIC None 4
0.6 QuantizationScheme.STATIC None 4
1.0 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:06<00:00, 50.11it/s]


Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 30.9800 size =    4.6649   95.3351
1.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:08<00:00, 35.41it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 37.1200 size =    3.2263   96.7737
1.0 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 58.67it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    4.1475   95.8525
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 8
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 8
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:08<00:00, 35.34it/s]

Before training, sparsity = 0.3, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 87.8900 size =    6.7464   93.2536
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8





0.7 QuantizationScheme.DYNAMIC None 8
0.3 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 34.57it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    3.3810   96.6190
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 4


100%|██████████| 313/313 [00:06<00:00, 46.38it/s]


Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 4 acc = 13.4300 size =    0.2121   99.7879
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.90it/s]


Before training, sparsity = 0.0, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 96.5400 size =   12.5135   87.4865
0.6 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 2
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 31.65it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 22.1500 size =    1.3055   98.6945
0.1 QuantizationScheme.STATIC None 4
0.5 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
0.9 QuantizationScheme.STATIC None 8
0.6 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:08<00:00, 35.35it/s]

Before training, sparsity = 0.6, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 34.5200 size =    2.3470   97.6530
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2



100%|██████████| 313/313 [00:09<00:00, 34.55it/s]

Before training, sparsity = 0.2, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    4.1610   95.8390
1.0 QuantizationScheme.STATIC None 8





0.8 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None
0.8 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 33.40it/s]


Before training, sparsity = 0.8, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 18.7900 size =    0.7341   99.2659
0.8 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:04<00:00, 65.04it/s]

Before training, sparsity = 0.8, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 21.3000 size =    0.7206   99.2794
0.7 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8





0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:09<00:00, 34.16it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.6612   99.3388
0.4 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.3 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
0.9 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:09<00:00, 34.67it/s]


Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 37.9600 size =    3.2263   96.7737
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 56.38it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    4.1475   95.8525
0.4 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.8 QuantizationScheme.STATIC None 8
0.8 QuantizationScheme.NONE None 2
0.0 QuantizationScheme.DYNAMIC None None
0.4 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 4
0.1 QuantizationScheme.DYNAMIC None 2
0.8 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 61.91it/s]

Before training, sparsity = 0.8, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 20.9600 size =    1.4413   98.5587
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:05<00:00, 60.10it/s]

Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 97.7300 size =   16.5827   83.4173
0.1 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR None





0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 53.86it/s]


Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 30.8900 size =    4.6649   95.3351
0.4 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 4


100%|██████████| 313/313 [00:07<00:00, 39.20it/s]


Before training, sparsity = 0.4, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 4 acc = 24.8200 size =    4.8560   95.1440
1.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.8 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 2


100%|██████████| 313/313 [00:05<00:00, 59.39it/s]


Before training, sparsity = 0.8, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 2 acc = 9.8000 size =    0.3613   99.6387
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 31.37it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 35.2700 size =    6.4384   93.5616
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8





0.6 QuantizationScheme.DYNAMIC None 4
0.2 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 8


100%|██████████| 313/313 [00:07<00:00, 42.86it/s]


Before training, sparsity = 0.2, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 8 acc = 22.5300 size =   16.5827   83.4173
0.5 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 32.29it/s]

Before training, sparsity = 0.5, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 35.9700 size =    6.4384   93.5616
0.8 QuantizationScheme.STATIC None 2





0.1 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 4
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4


100%|██████████| 313/313 [00:05<00:00, 57.90it/s]

Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 24.6400 size =    1.2920   98.7080
1.0 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 4



100%|██████████| 313/313 [00:06<00:00, 48.65it/s]


Before training, sparsity = 1.0, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 4 acc = 10.6300 size =    0.0338   99.9662
0.0 QuantizationScheme.NONE None 8
0.5 QuantizationScheme.STATIC None None
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 59.01it/s]


Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 31.1000 size =    4.6649   95.3351
1.0 QuantizationScheme.DYNAMIC None 8
0.3 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 4


100%|██████████| 313/313 [00:07<00:00, 41.10it/s]


Before training, sparsity = 0.3, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 4 acc = 49.5600 size =    6.7329   93.2671
0.6 QuantizationScheme.DYNAMIC None 8
0.2 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
0.7 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL 2


100%|██████████| 313/313 [00:07<00:00, 44.49it/s]


Before training, sparsity = 0.7, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_CHANNEL, bitwidth = 2 acc = 9.7500 size =    0.6477   99.3523
0.0 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR None
0.9 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:05<00:00, 57.88it/s]

Before training, sparsity = 0.9, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 13.6300 size =    0.4235   99.5765
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_TENSOR 8



100%|██████████| 313/313 [00:05<00:00, 56.25it/s]


Before training, sparsity = 0.6, scheme = QuantizationScheme.DYNAMIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 31.0600 size =    4.6649   95.3351
0.3 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 8
1.0 QuantizationScheme.NONE None 8
0.8 QuantizationScheme.NONE QuantizationGranularity.PER_CHANNEL 8
0.6 QuantizationScheme.DYNAMIC QuantizationGranularity.PER_CHANNEL None
0.2 QuantizationScheme.NONE QuantizationGranularity.PER_TENSOR 4
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_CHANNEL 8
0.0 QuantizationScheme.NONE None 4
0.6 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 8


100%|██████████| 313/313 [00:09<00:00, 33.25it/s]

Before training, sparsity = 0.6, scheme = QuantizationScheme.STATIC, granularity = QuantizationGranularity.PER_TENSOR, bitwidth = 8 acc = 27.0300 size =    4.6784   95.3216
0.7 QuantizationScheme.STATIC QuantizationGranularity.PER_TENSOR 2



 20%|█▉        | 62/313 [00:02<00:06, 38.08it/s]

In [None]:
(input_scale * weight_scale).view(1, -1, 1, 1)
input_scale * weight_scale

100%|██████████| 313/313 [00:03<00:00, 92.59it/s] 


97.49

In [None]:
s = 0.5
q = 3
b = 8

compression_config = {
    "prune_channel" :{
        "sparsity" : s
    },
    # "quantization" : {
    #     "type" : q,
    #     "bitwidth" : b
    # }

}

lenet5_mcu_model.cpu()
compressed_lenet5_mcu_model = lenet5_mcu_model.compress(compression_config, input_shape=input_shape, input_batch_real=next(iter(mnist_test_loader))[0])
compressed_lenet5_mcu_model.to(DEVICE)

before_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

compressed_lenet5_mcu_model.cpu()
size = compressed_lenet5_mcu_model.get_size_in_bits()//8
compressed_lenet5_mcu_model.to(DEVICE)

print(f"Before training, sparsity = {s}, q_type = {q}, bitwidth = {b} acc = {before_acc:.4f} size = {size/original_size*100:9.4f}")
compressed_lenet5_mcu_model.cpu()
compressed_lenet5_mcu_model.convert_to_c(input_shape=input_shape, var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")
compressed_lenet5_mcu_model.to(DEVICE)
# print(compressed_lenet5_mcu_model.test(device=DEVICE))

early_stopper = EarlyStopper(
    metric_name="train_loss",
    min_valid_diff=1e-5,
    mode="min",
    patience=4,
    restore_best_state_dict=True,
)

criterion_fun = nn.CrossEntropyLoss()
# optimizion_fun = optim.SGD(compressed_lenet5_mcu_model.parameters(), lr=1.e-3)
optimizion_fun = optim.Adam(compressed_lenet5_mcu_model.parameters(), lr=10.e-3)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

compressed_lenet5_mcu_model.fit(
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    verbose = True,
    device=DEVICE,
    compression_config=compression_config,
    input_shape=input_shape, input_batch_real=next(iter(mnist_test_loader))[0],
    callbacks = [early_stopper]
)
after_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100
print(f"After training, sparsity = {s}, q_type = {q}, bitwidth = {b} acc = {after_acc:.4f}")



s = 0.5
q = 3
b = 4

compression_config = {
    "prune_channel" :{
        "sparsity" : s
    },
    "quantization" : {
        "type" : q,
        "bitwidth" : b
    }
}

compressed_lenet5_mcu_model.cpu()
compressed_lenet5_mcu_model = compressed_lenet5_mcu_model.compress(compression_config, input_shape=input_shape, input_batch_real=next(iter(mnist_test_loader))[0])
compressed_lenet5_mcu_model.to(DEVICE)

before_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

compressed_lenet5_mcu_model.cpu()
size = compressed_lenet5_mcu_model.get_size_in_bits()//8
compressed_lenet5_mcu_model.to(DEVICE)

print(f"Before training, sparsity = {s}, q_type = {q}, bitwidth = {b} acc = {before_acc:.4f} size = {size/original_size*100:9.4f}")
compressed_lenet5_mcu_model.cpu()
compressed_lenet5_mcu_model.convert_to_c(input_shape=input_shape, var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")
compressed_lenet5_mcu_model.to(DEVICE)
# print(compressed_lenet5_mcu_model.test(device=DEVICE))

early_stopper = EarlyStopper(
    metric_name="train_loss",
    min_valid_diff=1e-5,
    mode="min",
    patience=4,
    restore_best_state_dict=True,
)

criterion_fun = nn.CrossEntropyLoss()
# optimizion_fun = optim.SGD(compressed_lenet5_mcu_model.parameters(), lr=1.e-3)
optimizion_fun = optim.Adam(compressed_lenet5_mcu_model.parameters(), lr=10.e-3)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

compressed_lenet5_mcu_model.fit(
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    verbose = True,
    device=DEVICE,
    compression_config=compression_config,
    input_shape=input_shape, input_batch_real=next(iter(mnist_test_loader))[0],
    callbacks = [early_stopper]
)
after_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100
print(f"After training, sparsity = {s}, q_type = {q}, bitwidth = {b} acc = {after_acc:.4f}")

AttributeError: 'Sequential' object has no attribute 'compress'

In [None]:
compressed_lenet5_mcu_model.test(device=DEVICE), \
quantize_per_tensor_assy(
    compressed_lenet5_mcu_model.test(device=DEVICE),
    compressed_lenet5_mcu_model[-1].__dict__["_dmc"]["quantization"]["output_scale"],
    compressed_lenet5_mcu_model[-1].__dict__["_dmc"]["quantization"]["output_zero_point"]
)

(tensor([[ -4.2995,  -2.2290,  -1.3500,   7.7069,  -1.1915,  -1.9620, -12.2104,
           -2.9040,  -1.8079,  -1.4910]], device='cuda:0'),
 tensor([[-1,  0,  0,  4,  0,  0, -5,  0,  0,  0]], device='cuda:0',
        dtype=torch.int8))

In [None]:
compressed_lenet5_mcu_model

Sequential(
  (conv2d_0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (relu6_0): ReLU6()
  (maxpool2d_0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2d_1): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (relu_0): ReLU()
  (maxpool2d_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten_0): Flatten(start_dim=1, end_dim=-1)
  (linear_0): Linear(in_features=400, out_features=84, bias=False)
  (relu6_1): ReLU6()
  (linear_1): Linear(in_features=84, out_features=10, bias=True)
)

In [None]:
compressed_lenet5_mcu_model.cpu()
test_input = compressed_lenet5_mcu_model.test_input.clone()

test_input_quant = quantize_per_tensor_assy(
    test_input,
    compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_scale"],
    compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_zero_point"],
    compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["bitwidth"],
)

test_input_real = dequantize_per_tensor_assy(
    test_input_quant, 
    compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_scale"],
    compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_zero_point"],
)

i = 0
# print("original real", test_input[0,0,i])
# print("quant real", test_input_real[0,0,i])
# print("quant", test_input_quant[0,0,i])
# line = torch.clamp(test_input[0,0,i]/compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_scale"] + \
#                    compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_zero_point"], -128, 127)
# print(line)
# print((line- compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_zero_point"]) * compressed_lenet5_mcu_model.__dict__["_dmc"]["quantization"]["input_scale"])


conv0 = compressed_lenet5_mcu_model[0]
test_input_real = conv0(test_input_real)

relu0 = compressed_lenet5_mcu_model[1]
test_input_real = relu0(test_input_real)
i = 5*2
print(test_input_real.size())
print(test_input_real[0,0,i:i+2])
print(
    quantize_per_tensor_assy(
        test_input_real,
        conv0.__dict__["_dmc"]["quantization"]["output_scale"],
        conv0.__dict__["_dmc"]["quantization"]["output_zero_point"]
    )[0,0,i:i+2]
)
avgpool_0 = compressed_lenet5_mcu_model[2]
test_input_real = avgpool_0(test_input_real)

# flatten0 = compressed_lenet5_mcu_model[2]
# test_input_real = flatten0(test_input_real)

# linear0 = compressed_lenet5_mcu_model[3]
# test_input_real = linear0(test_input_real)

next_layer = compressed_lenet5_mcu_model[0]

i = 5
print(test_input_real.size())
print(test_input_real[0,0,i])
print(
    quantize_per_tensor_assy(
        test_input_real,
        next_layer.__dict__["_dmc"]["quantization"]["output_scale"],
        next_layer.__dict__["_dmc"]["quantization"]["output_zero_point"]
    )[0,0,i]
)

# pad_input = nn.functional.pad(
#         test_input_quant.to(torch.int32) - conv0.__dict__["_dmc"]["quantization"]["input_zero_point"], 
#         conv0.pad, 
#         "constant", 
#         conv0.__dict__["_dmc"]["quantization"]["input_zero_point"] - conv0.__dict__["_dmc"]["quantization"]["input_zero_point"]
#     )
# pad_weight = quant_weight.to(torch.int32)

# quant_weight = quantize_per_tensor_sy(
#         conv0.weight, 
#         conv0.__dict__["_dmc"]["quantization"]["weight_scale"],
#         conv0.__dict__["_dmc"]["quantization"]["bitwidth"]
#     )
# test_input_quant = nn.functional.conv2d(
#     nn.functional.pad(
#         test_input_quant.to(torch.int32) - conv0.__dict__["_dmc"]["quantization"]["input_zero_point"], 
#         conv0.pad, 
#         "constant", 
#         conv0.__dict__["_dmc"]["quantization"]["input_zero_point"] - conv0.__dict__["_dmc"]["quantization"]["input_zero_point"]
#     ),
#     quant_weight.to(torch.int32),
#     stride=5,
# )

# print(test_input_quant)
# print(test_input_quant*next_layer.__dict__["_dmc"]["quantization"]["bias_scale"])

# print(
#     quantize_per_tensor_assy(
#         test_input_quant*next_layer.__dict__["_dmc"]["quantization"]["bias_scale"],
#         next_layer.__dict__["_dmc"]["quantization"]["output_scale"],
#         next_layer.__dict__["_dmc"]["quantization"]["output_zero_point"]
#     )
# )

# print(test_input_real)
# print(
#     quantize_per_tensor_assy(
#         test_input_real,
#         next_layer.__dict__["_dmc"]["quantization"]["output_scale"],
#         next_layer.__dict__["_dmc"]["quantization"]["output_zero_point"]
#     )
# )

# print(
#     nn.functional.conv2d(
#         test_input_real, 
#         dequantize_per_tensor_sy(
#             quant_weight,
#                 conv0.__dict__["_dmc"]["quantization"]["weight_scale"],
#             ),
#         stride=5
#         # quantize_per_tensor_sy(
#         #     conv0.weight, 
#         #     conv0.__dict__["_dmc"]["quantization"]["weight_scale"],
#         #     conv0.__dict__["_dmc"]["quantization"]["bitwidth"]
#         # ).to(torch.int32),
#         # stride=5,
#     )

# )
# next_layer.__dict__["_dmc"]["quantization"]["output_zero_point"]
conv0.__dict__["_dmc"]["quantization"]["output_zero_point"]

torch.Size([1, 6, 28, 28])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.]], grad_fn=<SliceBackward0>)
tensor([[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3]], dtype=torch.int8)
torch.Size([1, 6, 14, 14])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       grad_fn=<SelectBackward0>)
tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=torch.int8)


tensor(3, dtype=torch.int8)

## Pruned Model 

### sparsity_per_layer = 0.1

In [None]:
 sparsity_per_layer = 0.25
RANGE = 10




if not os.path.exists(log_compression_details_file):
    with open(log_compression_details_file, "w") as file:
        file.write(f"sparsity, quantization_type, bitwidth, size, size_ratio, before acc, after acc, before acc_drop, after acc_drop\n")
        # file.write(f"sparsity, quantizaion_type, bitwidth, before acc, after acc\n")

for i in tqdm(range(1)):

    # s = random.choice([i/RANGE for i in range(0, RANGE+1, 1)])
    # q = random.choice([QUANTIZATION_NONE, DYNAMIC_QUANTIZATION_PER_TENSOR, DYNAMIC_QUANTIZATION_PER_TENSOR, DYNAMIC_QUANTIZATION_PER_TENSOR])
    # b = random.choice([4, 8])
    # print(f"sample number {i} ->  sparsity = {s}, q_type = {q}, bitwidth = {b}")

    s = 0.
    q = 1
    b = 8

    compression_config = {
        "prune_channel" :{
            "sparsity" : s
        },
        "quantization" : {
            "type" : q,
            "bitwidth" : b
        }

    }

    lenet5_mcu_model.cpu()
    compressed_lenet5_mcu_model = lenet5_mcu_model.compress(compression_config, input_shape=input_shape)
    compressed_lenet5_mcu_model.to(DEVICE)

    before_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

    compressed_lenet5_mcu_model.cpu()
    size = compressed_lenet5_mcu_model.get_size_in_bits()//8
    compressed_lenet5_mcu_model.to(DEVICE)

    print(f"Before training, sparsity = {s}, q_type = {q}, bitwidth = {b} acc = {before_acc:.4f}")

    early_stopper = EarlyStopper(
        metric_name="train_loss",
        min_valid_diff=1e-5,
        mode="min",
        patience=4,
        restore_best_state_dict=True,
    )

    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(compressed_lenet5_mcu_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

    compressed_lenet5_mcu_model.fit(
        mnist_train_loader, 
        15, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=mnist_test_loader, 
        metrics={"acc": accuracy_fun},
        verbose = False,
        device=DEVICE,
        compression_config=compression_config,
        callbacks = [early_stopper]
    )
    after_acc = compressed_lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun, device=DEVICE)*100

    print(f"After training, sparsity = {i/RANGE:.2f}, bitwidth = {b} acc = {after_acc:.4f}")

    with open(log_compression_details_file, "a") as file:
        file.write(f"{s}, {q}, {b}, {size}, {size/original_size*100:9.4f}, {before_acc:9.4f}, {after_acc:9.4f}, {original_acc-before_acc:9.4f}, {original_acc-after_acc:9.4f}\n")
    # break

IndentationError: unexpected indent (1965599166.py, line 1)

In [None]:
 sparsity_per_layer = 0.1
lenet5_model.to("cpu")
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
# acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
# print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.to(DEVICE)
lenet5_mcu_model.fit(
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device=DEVICE,
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.2

In [None]:
sparsity_per_layer = 0.2
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.3

In [None]:
sparsity_per_layer = 0.3
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.4

In [None]:
sparsity_per_layer = 0.4
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.5

In [None]:
sparsity_per_layer = 0.5
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.6

In [None]:
sparsity_per_layer = 0.6
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.7

In [None]:
sparsity_per_layer = 0.7
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.8

In [None]:
sparsity_per_layer = 0.8
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### sparsity_per_layer = 0.9

In [None]:
sparsity_per_layer = 0.9
lenet5_mcu_model = lenet5_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
# size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")
# print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

criterion_fun = nn.CrossEntropyLoss()
optimizion_fun = optim.Adam(lenet5_mcu_model.parameters(), lr=1.e-2)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

lenet5_mcu_model.fit(
    # mnist_test_loader,
    mnist_train_loader, 
    15, 
    criterion_fun, optimizion_fun, lr_scheduler,
    validation_dataloader=mnist_test_loader, 
    metrics={"acc": accuracy_fun},
    device="cpu",
    compression_aware=True
)
lenet5_mcu_model = lenet5_mcu_model.prune_channel(sparsity_per_layer)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
print(f"The pruned model with sparsity {sparsity_per_layer} accuracy is {acc*100:.2f}%.")



# print(lenet5_mcu_model.test())
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
# lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Dynamic Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Dynamic Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")


## Static Quantized Per Tensor

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



## Static Quantized Per Channel

### 8 bit quantization

In [None]:
bitwidth = 8
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



### 4 bit quantization

In [None]:
bitwidth = 4
lenet5_mcu_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
acc = lenet5_mcu_model.evaluate(mnist_test_loader, accuracy_fun)
size = lenet5_mcu_model.get_size_in_bits()//8
print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")
print(lenet5_mcu_model.test())
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./Arduino Nano 33 BLE/src/", include_dir="./Arduino Nano 33 BLE/include/")
lenet5_mcu_model.convert_to_c(var_name="lenet5_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")



In [None]:
# lenet5_model.cpu()

# # PRUNED MODEL
# pruned_sparsity = [i/10 for i in range(10)]
# for sparsity in pruned_sparsity:
#     pruned_model = lenet5_model.prune_channel(sparsity)
#     acc = pruned_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = pruned_model.get_size_in_bits()//8
#     print(f"The pruned model with sparsity {sparsity} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

# quantization_bitwidth = [i for i in range(8, 0, -1)]

# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_tensor_model = lenet5_model.dynamic_quantize_per_tensor(bitwidth)
#     acc = dynamic_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # DYNAMIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     dynamic_quantized_per_channel_model = lenet5_model.dynamic_quantize_per_channel(bitwidth)
#     acc = dynamic_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = dynamic_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The dynamic quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_tensor_model = lenet5_model.static_quantize_per_tensor(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_tensor_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_tensor_model.get_size_in_bits()//8
#     print(f"The static quantized per tensor model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")


# # STATIC QUANTIZED MODEL PER TERSON
# for bitwidth in quantization_bitwidth:
#     static_quantized_per_channel_model = lenet5_model.static_quantize_per_channel(next(iter(mnist_test_loader))[0], bitwidth)
#     acc = static_quantized_per_channel_model.evaluate(mnist_test_loader, accuracy_fun)
#     size = static_quantized_per_channel_model.get_size_in_bits()//8
#     print(f"The static quantized per channel model with bitwidth {bitwidth} accuracy is {acc*100:.2f}%.")
#     print(f"The accurancy drop is {(original_acc - acc)*100:.2f}% and size drop is {(original_size - size)/original_size*100:.2f}%.")

