# # End-to-End: Build → Calibrate → (Re)Train → Compare Accuracy (16×16) → Compare Runtime (various SA)

This notebook follows the same pattern as before, but bundles the whole workflow:

1. **Build models** for two multipliers (`mul8s_acc` and `mul8s_1L2H`) and a fixed SA size (16×16).
2. **Calibrate** both models (percentile histogram, short pass).
3. **(Optional) Re-train** both models briefly (fine-tune) to reduce quantization/approx errors.
4. **Compare accuracy** between the two 16×16 models (calibrated or fine-tuned).
5. **Compare execution time** for a chosen multiplier across multiple SA sizes.


In [1]:
import os, time, timeit
import torch
import torchvision as tv
from torchvision import transforms as T
import pandas as pd
from tqdm import tqdm
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

# Safer dataloaders in constrained environments
torch.multiprocessing.set_sharing_strategy("file_system")

# ---- Global config ----
DEVICE = "cpu"
BATCH_SIZE = 64
NUM_CALIB_BATCHES = 2        # increase to 8–32 for better INT8 quality
SA_ROWS, SA_COLS = 16, 16    # fixed for the accuracy comparison
USE_EXACT = False            # True -> force exact mult; False -> use approx variants

# Two variants to compare (16×16 accuracy)
VARIANTS = ["mul8s_acc", "mul8s_1L2H"]

# (Optional) training settings
DO_FINETUNE = False          # set True to run a brief fine-tune 
FINETUNE_EPOCHS = 1
LR = 1e-4
WD = 0.0

# SA configs for runtime comparison 
SA_CONFIGS = [(8,8), (16,16), (32,8), (8,32)]
RUNTIME_MULT = "mul8s_acc"   # which multiplier to use for the runtime sweep

pd.set_option("display.max_colwidth", 160)


## Data loaders

In [2]:
def val_dataloader(mean = (0.4914, 0.4822, 0.4465), std = (0.2471, 0.2435, 0.2616)):

    transform = T.Compose(
        [
            T.ToTensor(),
            T.Normalize(mean, std),
        ]
    )
    dataset = CIFAR10(root="datasets/cifar10_data", train=False, download=True, transform=transform)
    dataloader = DataLoader(
        dataset,
        batch_size=128,
        num_workers=0,
        drop_last=True,
        pin_memory=False,
    )
    return dataloader

transform = T.Compose(
        [
            T.RandomCrop(32, padding=4),
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            T.Normalize(mean = (0.4914, 0.4822, 0.4465), std = (0.2471, 0.2435, 0.2616)),
        ]
    )

dataset = CIFAR10(root="datasets/cifar10_data", train=True, download=True, transform=transform)

evens = list(range(0, len(dataset), 10))
trainset_1 = torch.utils.data.Subset(dataset, evens)

data = val_dataloader()

# data_t is used for calibration purposes and is a subset of train-set
data_t = DataLoader(trainset_1, batch_size=128,
                                            shuffle=False, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified


## Helpers (evaluate, calibration, amax, finetune)

In [3]:
from pytorch_quantization.nn.modules.tensor_quantizer import TensorQuantizer
from pytorch_quantization import calib
from adapt.approx_layers.systolic_build import precompile_systolic_extensions
from adapt.approx_layers.systolic_utils import swap_to_systolic
import torch
from tqdm import tqdm
import contextlib

@torch.no_grad()
def evaluate(model, loader, device=DEVICE, desc="Eval"):
    model.eval(); model.to(device)
    correct = 0
    total = 0

    model.eval()
    start_time = timeit.default_timer()
    with torch.no_grad():
        for iteraction, (images, labels) in tqdm(enumerate(data), total=len(data)):
            images, labels = images.to("cpu"), labels.to("cpu")
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(timeit.default_timer() - start_time)
    print('Accuracy of the network on the 10000 test images: %.4f %%' % (
    100 * correct / total))
    return 100 * correct/total

@torch.no_grad()
def init_weight_amax_from_weights(model):
    n_set, n_total = 0, 0
    for _, m in model.named_modules():
        q_w = getattr(m, "quantizer_w", None)
        if isinstance(q_w, quant_nn.TensorQuantizer):
            n_total += 1
            if getattr(q_w, "amax", None) is None:
                W = getattr(m, "weight", None)
                if W is not None:
                    q_w._amax = torch.as_tensor(W.detach().abs().max(), dtype=torch.float32)
                    n_set += 1
    print(f"[init_weight_amax_from_weights] set {n_set}/{n_total} weight amax")

# 2) A small helper to attach pre-hooks that "touch" quantizers during calib
def _make_calib_pre_hook(mod):
    @torch.no_grad()
    def _pre(mod_, inputs):
        # inputs is a tuple; x is first positional input tensor
        if not inputs:
            return
        x = inputs[0]
        # Call activation quantizer to collect stats
        q = getattr(mod_, "quantizer", None)
        if isinstance(q, quant_nn.TensorQuantizer) and getattr(q, "_calibrator", None) is not None:
            q(x)
        # Touch weights as well so weight calibrator (if any) can record
        q_w = getattr(mod_, "quantizer_w", None)
        if isinstance(q_w, quant_nn.TensorQuantizer) and getattr(q_w, "_calibrator", None) is not None:
            W = getattr(mod_, "weight", None)
            if W is not None:
                q_w(W)
    return _pre

@contextlib.contextmanager
def attach_calibration_hooks(model):
    """Attach forward-pre-hooks to every systolic layer so quantizers see tensors."""
    hooks = []
    for _, m in model.named_modules():
        # We only hook layers that have our quantizers AND a forward
        has_any_q = isinstance(getattr(m, "quantizer", None), quant_nn.TensorQuantizer) or \
                    isinstance(getattr(m, "quantizer_w", None), quant_nn.TensorQuantizer)
        if has_any_q:
            try:
                h = m.register_forward_pre_hook(_make_calib_pre_hook(m))
                hooks.append(h)
            except Exception:
                pass
    try:
        yield
    finally:
        for h in hooks:
            try:
                h.remove()
            except Exception:
                pass

# 3) Fixed collect_stats using the hooks
def collect_stats(model, data_loader, num_batches=10, device="cpu"):
    """Collect activation histograms. Uses pre-hooks to ensure quantizers see tensors."""
    model.eval()
    model.to(device)

    # Enable calibration (disable quantization) so quantizers record histograms
    for _, module in model.named_modules():
        if isinstance(module, quant_nn.TensorQuantizer):
            if module._calibrator is not None:
                module.disable_quant()
                module.enable_calib()
            else:
                module.disable()

    with torch.no_grad(), attach_calibration_hooks(model):
        for i, (image, _) in enumerate(data_loader):
            image = image.to(device, non_blocking=True)
            _ = model(image)  # pre-hooks will call quantizers here
            if i >= num_batches - 1:
                break

    # Disable calibration (enable quantization for inference)
    for _, module in model.named_modules():
        if isinstance(module, quant_nn.TensorQuantizer):
            if module._calibrator is not None:
                module.enable_quant()
                module.disable_calib()
            else:
                module.enable()

    print("Calibration data collection complete.")

# 4) Compute and sanitize amax 
def compute_amax(model, method="percentile", percentile=99.99, strict=False, fallback=1.0):
    n_loaded, n_fixed = 0, 0
    for name, module in model.named_modules():
        if isinstance(module, quant_nn.TensorQuantizer):
            if module._calibrator is not None:
                try:
                    if isinstance(module._calibrator, calib.MaxCalibrator):
                        module.load_calib_amax(strict=strict)
                    else:
                        module.load_calib_amax(method=method, percentile=percentile, strict=strict)
                    n_loaded += 1
                except RuntimeError:
                    pass
            # sanitize
            amax = getattr(module, "amax", None)
            if (amax is None) or (torch.isnan(amax)) or (float(amax) == 0.0):
                module._amax = torch.tensor(float(fallback), dtype=torch.float32)
                n_fixed += 1
            print(F"{name:40}: {module}")
    print(f"Loaded calibrated amax values. loaded={n_loaded}, sanitized={n_fixed}")
    model.cpu()

In [4]:
from pytorch_quantization import nn as quant_nn
from pytorch_quantization import calib
from adapt.references.classification.train import train_one_epoch, load_data
def finetune_one_epoch(model, loader, device=DEVICE, lr=1e-4, wd=0.0):
    from pytorch_quantization import nn as quant_nn
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
    
    # finetune the model for one epoch based on data_t subset 
    train_one_epoch(model, criterion, optimizer, loader, device, 0, 1)

## Accuracy comparison ( systolic array size 16×16) for two multipliers

In [5]:
# === Accuracy comparison @16x16 using resnet50_systolic ===
rows = []

try:
    from models.resnet_systolic import resnet50_systolic
except Exception:
    from examples.models.resnet_systolic import resnet50_systolic

for axx_mult in VARIANTS:  # z.B. ["mul8s_acc", "mul8s_1L2H"]
    print(f"=== resnet50_systolic @16x16: {axx_mult} ===")

    # 1) Precompile 
    precompile_systolic_extensions(
        axx_mult=axx_mult,
        use_exact_variants=(USE_EXACT,),
        sa_rows=16, sa_cols=16,
        verbose=False
    )

    model = resnet50_systolic(pretrained=True, axx_mult=axx_mult, use_exact=USE_EXACT)
    model.eval()  

    # 3) calibration
    with torch.no_grad():
        #init_weight_amax_from_weights(model)  
        stats = collect_stats(model, data_t, num_batches=2)
        amax = compute_amax(model, method="percentile", percentile=99.99)
    
    # optional - test different calibration methods
    #amax = compute_amax(model, method="mse")
    #amax = compute_amax(model, method="entropy")


    acc_cal = evaluate(model, data, DEVICE, desc=f"Eval resnet50_systolic ({axx_mult}) calibrated")
    rows.append({"Variant": axx_mult, "Type": "calibrated", "Accuracy %": acc_cal})

    import torch.nn as nn

    if DO_FINETUNE:
        
        for epoch in range(FINETUNE_EPOCHS):
            finetune_one_epoch(model, data_t, lr=LR, wd=WD)
        model.eval()
        acc_ft = evaluate(model, data, 'cpu',desc=f"Eval resnet50_systolic ({axx_mult}) finetuned")
        rows.append({"Variant": axx_mult, "Type": "finetuned", "Accuracy %": acc_ft})

df_acc = pd.DataFrame(rows).sort_values(by=["Variant", "Type"]).reset_index(drop=True)
df_acc


=== resnet50_systolic @16x16: mul8s_acc ===
Pre-compiling systolic extensions for mul8s_acc...
  Mode: approx
    • linear (r16×c16)
    • conv2d (r16×c16)
Pre-compilation complete! Models will now load instantly.
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_acc, exact=False, SA=

W1109 22:56:11.721399 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.722329 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.722698 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.723043 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.723437 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.723791 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.724147 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.724495 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.724864 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.725259 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.725633 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1109 22:56:11.756426 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.756732 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.757040 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.757330 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.757620 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.757904 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.758187 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.758488 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.758776 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.759063 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 22:56:11.759372 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1109 22:56:11.799958 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.800517 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.801116 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.801682 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.802239 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.802794 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.803343 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.803899 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.804484 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 22:56:11.805102 140511505602368

Calibration data collection complete.
conv1.quantizer                         : TensorQuantizer(8bit per-tensor amax=2.1255 calibrator=HistogramCalibrator quant)
conv1.quantizer_w                       : TensorQuantizer(8bit per-tensor amax=0.1625 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer                : TensorQuantizer(8bit per-tensor amax=0.6236 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0804 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2153 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0355 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2416 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0544 calibrat

100%|███████████████████████████████████████████| 78/78 [48:33<00:00, 37.35s/it]

2913.214075219992
Accuracy of the network on the 10000 test images: 93.6498 %
=== resnet50_systolic @16x16: mul8s_1L2H ===
Pre-compiling systolic extensions for mul8s_1L2H...
  Mode: approx
    • linear (r16×c16)





    • conv2d (r16×c16)
Pre-compilation complete! Models will now load instantly.
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exa

W1109 23:45:31.566247 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.566863 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.567243 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.567556 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.567908 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.568291 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.568665 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.569074 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.569480 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.569818 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.570209 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1109 23:45:31.608962 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.609355 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.609710 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.610060 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.610431 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.610807 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.611179 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.611558 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.611917 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.612283 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1109 23:45:31.612644 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1109 23:45:31.674472 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.675251 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.676008 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.676821 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.677601 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.678378 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.679055 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.679596 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.680127 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1109 23:45:31.680663 140511505602368

Calibration data collection complete.
conv1.quantizer                         : TensorQuantizer(8bit per-tensor amax=2.1255 calibrator=HistogramCalibrator quant)
conv1.quantizer_w                       : TensorQuantizer(8bit per-tensor amax=0.1625 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer                : TensorQuantizer(8bit per-tensor amax=0.6256 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0804 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2144 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0355 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2372 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0544 calibrat

100%|███████████████████████████████████████████| 78/78 [48:07<00:00, 37.02s/it]

2887.380255897995
Accuracy of the network on the 10000 test images: 82.6623 %





Unnamed: 0,Variant,Type,Accuracy %
0,mul8s_1L2H,calibrated,82.66226
1,mul8s_acc,calibrated,93.64984


## Runtime comparison across SA sizes (same multiplier)

In [6]:
# We compare elapsed time for a short pass (10 batches) and full evaluation.
results = []

for (r, c) in SA_CONFIGS:
    print(f"=== Runtime: {RUNTIME_MULT} @ SA {r}x{c} ===")
    precompile_systolic_extensions(axx_mult=RUNTIME_MULT, use_exact_variants=(USE_EXACT,),
                                   sa_rows=r, sa_cols=c, verbose=False)
    
    model = resnet50_systolic(pretrained=True, axx_mult=axx_mult, use_exact=USE_EXACT,sa_rows=r, sa_cols=c)
    model.eval() 

    # Calibrate quickly (reuse same approach for fair comparison)
    with torch.no_grad():
        stats = collect_stats(model, data_t, num_batches=2)
        amax = compute_amax(model, method="percentile", percentile=99.99)    

    # Warmup
    xb, yb = next(iter(data))
    _ = model(xb)

    # Short run (10 batches)
    iters = 10
    start = timeit.default_timer()
    with torch.no_grad():
        for i, (x, _) in enumerate(data):
            _ = model(x)
            if i >= iters - 1:
                break
    t_small = timeit.default_timer() - start

    # Full eval timing
    start = timeit.default_timer()
    acc = evaluate(model, data, desc=f"Eval runtime SA {r}x{c}")
    t_full = timeit.default_timer() - start

    results.append({
        "sa_rows": r,
        "sa_cols": c,
        "accuracy %": acc,
        "time_10_batches_sec": t_small,
        "time_full_eval_sec": t_full,
    })

df_rt = pd.DataFrame(results).sort_values(by=["sa_rows","sa_cols"]).reset_index(drop=True)
df_rt


=== Runtime: mul8s_acc @ SA 8x8 ===
Pre-compiling systolic extensions for mul8s_acc...
  Mode: approx
    • linear (r8×c8)
    • conv2d (r8×c8)
Pre-compilation complete! Models will now load instantly.
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x8
✓ Loaded systolic 

W1110 00:34:38.784360 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.785006 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.785623 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.785949 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.786425 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.786763 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.787105 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.787488 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.787899 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.788851 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.789342 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 00:34:38.847416 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.849559 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.850072 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.850532 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.850883 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.851232 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.851575 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.851920 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.852258 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.852573 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 00:34:38.852882 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 00:34:38.902639 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.903575 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.904795 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.905853 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.906650 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.907408 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.908141 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.909008 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.909872 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 00:34:38.910739 140511505602368

Calibration data collection complete.
conv1.quantizer                         : TensorQuantizer(8bit per-tensor amax=2.1255 calibrator=HistogramCalibrator quant)
conv1.quantizer_w                       : TensorQuantizer(8bit per-tensor amax=0.1625 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer                : TensorQuantizer(8bit per-tensor amax=0.6284 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0804 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2148 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0355 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2408 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0544 calibrat

100%|███████████████████████████████████████████| 78/78 [48:33<00:00, 37.35s/it]


2913.2589172069565
Accuracy of the network on the 10000 test images: 82.6322 %
=== Runtime: mul8s_acc @ SA 16x16 ===
Pre-compiling systolic extensions for mul8s_acc...
  Mode: approx
    • linear (r16×c16)
    • conv2d (r16×c16)
Pre-compilation complete! Models will now load instantly.
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=16x16
✓ Loaded systolic conv2d kernel: mul8s_1L2H

W1110 01:30:09.889962 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.890531 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.890904 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.891244 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.891607 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.891974 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.892364 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.893326 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.893754 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.894114 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.894486 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 01:30:09.934780 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.935157 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.935513 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.935805 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.936089 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.936408 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.936837 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.937220 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.937512 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.937847 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 01:30:09.938169 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 01:30:09.987018 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.987565 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.988165 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.988955 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.989707 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.990428 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.991122 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.991805 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.992480 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 01:30:09.993412 140511505602368

Calibration data collection complete.
conv1.quantizer                         : TensorQuantizer(8bit per-tensor amax=2.1255 calibrator=HistogramCalibrator quant)
conv1.quantizer_w                       : TensorQuantizer(8bit per-tensor amax=0.1625 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer                : TensorQuantizer(8bit per-tensor amax=0.6102 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0804 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2118 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0355 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2406 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0544 calibrat

100%|███████████████████████████████████████████| 78/78 [48:11<00:00, 37.07s/it]

2891.5949548639474
Accuracy of the network on the 10000 test images: 82.5321 %
=== Runtime: mul8s_acc @ SA 32x8 ===
Pre-compiling systolic extensions for mul8s_acc...
  Mode: approx
    • linear (r32×c8)





    • conv2d (r32×c8)
Pre-compilation complete! Models will now load instantly.
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=32x8
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=3

W1110 02:26:07.914930 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.915636 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.916093 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.916422 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.916798 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.917155 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.917540 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.917905 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.918276 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.918670 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.919056 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 02:26:07.956231 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.956597 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.957030 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.957439 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.957890 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.958314 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.958611 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.958884 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.959146 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.959413 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 02:26:07.959674 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 02:26:08.010156 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.010806 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.011392 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.011968 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.012531 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.013099 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.013670 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.014227 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.014784 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 02:26:08.015330 140511505602368

Calibration data collection complete.
conv1.quantizer                         : TensorQuantizer(8bit per-tensor amax=2.1255 calibrator=HistogramCalibrator quant)
conv1.quantizer_w                       : TensorQuantizer(8bit per-tensor amax=0.1625 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer                : TensorQuantizer(8bit per-tensor amax=0.6144 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0804 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2157 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0355 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2420 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0544 calibrat

100%|███████████████████████████████████████████| 78/78 [48:20<00:00, 37.19s/it]

2900.727436061017
Accuracy of the network on the 10000 test images: 82.5321 %
=== Runtime: mul8s_acc @ SA 8x32 ===
Pre-compiling systolic extensions for mul8s_acc...
  Mode: approx
    • linear (r8×c32)





    • conv2d (r8×c32)
Pre-compilation complete! Models will now load instantly.
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8x32
✓ Loaded systolic conv2d kernel: mul8s_1L2H, exact=False, SA=8

W1110 03:22:22.894865 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.895585 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.895973 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.896524 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.896927 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.897255 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.897606 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.898334 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.898712 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.899238 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.899705 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 03:22:22.932885 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.933247 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.933597 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.933949 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.934287 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.934620 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.934948 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.935307 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.935695 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.936330 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator
W1110 03:22:22.936858 140511505602368 tensor_quantizer.py:173] Disable HistogramCalibrator

W1110 03:22:22.990981 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.991612 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.992199 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.992830 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.993439 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.994024 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.994629 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.995213 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.995835 140511505602368 tensor_quantizer.py:237] Load calibrated amax, shape=torch.Size([]).
W1110 03:22:22.996454 140511505602368

Calibration data collection complete.
conv1.quantizer                         : TensorQuantizer(8bit per-tensor amax=2.1255 calibrator=HistogramCalibrator quant)
conv1.quantizer_w                       : TensorQuantizer(8bit per-tensor amax=0.1625 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer                : TensorQuantizer(8bit per-tensor amax=0.6045 calibrator=HistogramCalibrator quant)
layer1.0.conv1.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0804 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2141 calibrator=HistogramCalibrator quant)
layer1.0.conv2.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0355 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer                : TensorQuantizer(8bit per-tensor amax=0.2437 calibrator=HistogramCalibrator quant)
layer1.0.conv3.quantizer_w              : TensorQuantizer(8bit per-tensor amax=0.0544 calibrat

100%|███████████████████████████████████████████| 78/78 [48:20<00:00, 37.18s/it]

2900.0329998249654
Accuracy of the network on the 10000 test images: 82.5220 %





Unnamed: 0,sa_rows,sa_cols,accuracy %,time_10_batches_sec,time_full_eval_sec
0,8,8,82.632212,372.673052,2913.261563
1,8,32,82.522035,366.461684,2900.035655
2,16,16,82.532051,369.97651,2891.597569
3,32,8,82.532051,374.428506,2900.730295
