In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# ---------- Toy CNN ----------
class SmallCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv = nn.Conv2d(3, 16, 3, stride=2, padding=1)  # 112x112
        self.bn   = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.fc   = nn.Linear(16 * 112 * 112, num_classes)

    def forward(self, x):
        x = self.relu(self.bn(self.conv(x)))
        x = x.flatten(1)
        return self.fc(x)


def calib_loader(n_batches=32, bs=4, shape=(3, 224, 224), device="cpu"):
    for _ in range(n_batches):
        yield torch.randn(bs, *shape, device=device), None


def fuse_smallcnn(m: nn.Module):
    m.eval()
    torch.ao.quantization.fuse_modules(m, [["conv", "bn", "relu"]], inplace=True)
    return m


# -------------------- A) Eager PTQ --------------------
def eager_ptq(model: nn.Module):
    from torch.ao.quantization import (
        QuantStub, DeQuantStub, get_default_qconfig, prepare, convert
    )
    torch.backends.quantized.engine = "fbgemm"

    class QuantWrapper(nn.Module):
        def __init__(self, m):
            super().__init__()
            self.q = QuantStub(); self.m = m; self.dq = DeQuantStub()
        def forward(self, x): return self.dq(self.m(self.q(x)))

    model = model.to("cpu").eval()
    fuse_smallcnn(model)
    qmodel = QuantWrapper(model)
    qmodel.qconfig = get_default_qconfig(torch.backends.quantized.engine)

    prepare(qmodel, inplace=True)
    with torch.no_grad():
        for x, _ in calib_loader(n_batches=32):
            qmodel(x)

    convert(qmodel, inplace=True)
    return qmodel


# -------------------- B) FX PTQ --------------------
def fx_ptq(model: nn.Module):
    from torch.ao.quantization import get_default_qconfig
    from torch.ao.quantization.quantize_fx import QConfigMapping, prepare_fx, convert_fx

    torch.backends.quantized.engine = "fbgemm"
    model = model.to("cpu").eval()

    if isinstance(getattr(model, "conv", None), nn.Conv2d) and \
       isinstance(getattr(model, "bn",   None), nn.BatchNorm2d) and \
       isinstance(getattr(model, "relu", None), nn.ReLU):
        torch.ao.quantization.fuse_modules(model, [["conv", "bn", "relu"]], inplace=True)

    qconfig = get_default_qconfig(torch.backends.quantized.engine)
    qmap = QConfigMapping().set_global(qconfig)

    example_inputs = (torch.randn(1, 3, 224, 224),)
    prepared = prepare_fx(model, qmap, example_inputs=example_inputs)

    with torch.no_grad():
        for x, _ in calib_loader(n_batches=32):
            prepared(x)

    quantized = convert_fx(prepared)
    return quantized






# ---------- run demo ----------
if __name__ == "__main__":
    import copy
    torch.manual_seed(0)

    base = SmallCNN()

    print("\nEager PTQ:")
    qm_eager = eager_ptq(copy.deepcopy(base))
    print("OK:", qm_eager(torch.randn(1,3,224,224)).shape)

    print("\nFX PTQ:")
    qm_fx = fx_ptq(copy.deepcopy(base))
    print("OK:", qm_fx(torch.randn(1,3,224,224)).shape)

    print("\nPT2E PTQ:")




Eager PTQ:




OK: torch.Size([1, 10])

FX PTQ:
OK: torch.Size([1, 10])

PT2E PTQ:


In [2]:

from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e, convert_pt2e


In [6]:
import os, time
import torch
import torch.nn as nn

# Make timings more stable
torch.set_num_threads(max(1, os.cpu_count() // 2))
torch.backends.quantized.engine = "fbgemm"  # use "qnnpack" on ARM

# ---------------- Model ----------------
class SmallCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv = nn.Conv2d(3, 16, 3, stride=2, padding=1)  # 112x112 for 224x224 input
        self.bn   = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.fc   = nn.Linear(16 * 112 * 112, num_classes)

    def forward(self, x):
        x = self.relu(self.bn(self.conv(x)))
        x = x.flatten(1)
        return self.fc(x)

def fuse_smallcnn(m: nn.Module):
    m.eval()
    # fuse conv-bn-relu -> conv_relu (quant-friendly)
    torch.ao.quantization.fuse_modules(m, [["conv", "bn", "relu"]], inplace=True)
    return m

# ------------- Calibration data -------------
def calib_loader(n_batches=32, bs=4, shape=(3, 224, 224), device="cpu"):
    for _ in range(n_batches):
        yield torch.randn(bs, *shape, device=device), None

# ------------- FX static PTQ -------------
def fx_ptq(model: nn.Module):
    from torch.ao.quantization import get_default_qconfig
    from torch.ao.quantization.quantize_fx import QConfigMapping, prepare_fx, convert_fx

    model = model.to("cpu").eval()
    # optional fusion (important for best int8 patterns)
    if isinstance(getattr(model, "conv", None), nn.Conv2d) and \
       isinstance(getattr(model, "bn",   None), nn.BatchNorm2d) and \
       isinstance(getattr(model, "relu", None), nn.ReLU):
        fuse_smallcnn(model)

    qconfig = get_default_qconfig(torch.backends.quantized.engine)
    qmap = QConfigMapping().set_global(qconfig)

    example_inputs = (torch.randn(1, 3, 224, 224),)
    prepared = prepare_fx(model, qmap, example_inputs=example_inputs)

    # Calibrate observers
    with torch.no_grad():
        for x, _ in calib_loader(n_batches=32):
            prepared(x)

    quantized = convert_fx(prepared)
    return quantized

# ------------- Benchmark -------------
@torch.inference_mode()
def benchmark(model: nn.Module, bs=8, iters=100, warmup=10, device="cpu"):
    model = model.to(device).eval()
    x = torch.randn(bs, 3, 224, 224, device=device)
    # warmup
    for _ in range(warmup):
        model(x)
    # measure
    t0 = time.perf_counter()
    for _ in range(iters):
        model(x)
    dt = time.perf_counter() - t0
    latency_ms = (dt / iters) * 1000.0
    throughput = (bs * iters) / dt
    return latency_ms, throughput

if __name__ == "__main__":
    torch.manual_seed(0)

    base = SmallCNN().eval().to("cpu")

    # FP32
    fp32_lat, fp32_ips = benchmark(base, bs=8, iters=100, warmup=10)
    print(f"FP32   : {fp32_lat:7.2f} ms/iter   |  {fp32_ips:8.1f} imgs/s")

    # INT8 (FX PTQ)
    int8_model = fx_ptq(SmallCNN().eval())  # fresh copy for fair compare
    int8_lat, int8_ips = benchmark(int8_model, bs=8, iters=100, warmup=10)
    print(f"INT8 PTQ: {int8_lat:7.2f} ms/iter   |  {int8_ips:8.1f} imgs/s")

    # Quick sanity: same output shape
    out_fp32 = base(torch.randn(1,3,224,224))
    out_int8 = int8_model(torch.randn(1,3,224,224))
    print("Output shapes:", out_fp32.shape, out_int8.shape)


FP32   :   10.01 ms/iter   |     798.9 imgs/s
INT8 PTQ:    8.35 ms/iter   |     957.9 imgs/s
Output shapes: torch.Size([1, 10]) torch.Size([1, 10])


In [4]:
from checkpoints_utils.checkpoint_load import load_model_from_checkpoint
from  .pruning.tensor_prunning  import global_unstructured_prune,make_pruning_permanent
from .report_utils.measure_latency import measure_latency
from .report_utils.report_sparsity import report_sparsity
from .pruning.channel_pruning import prune_model_channels
from .pruning.tensor_prunning import threshold_prune
from  omegaconf import OmegaConf
from .pruning.channel_pruning import progressive_channel_pruning
cfg = OmegaConf.load("/home/temp/MyDir/Projects/aspdfpwjfpwejfwpefwef/Drons/tb_logs_big/efficientnet/version_6/hparams.yaml")


model = load_model_from_checkpoint(checkpoint_path="tb_logs_big/efficientnet/version_0/checkpoints/epoch=1-step=88.ckpt")

ModuleNotFoundError: No module named 'Drons'

Значит eficientnet с квантимщайцией по скорости как mobilnet с  эмитауией квантизщацйией если пеолциятся выбить тоже качастов на moblinet что и на efiicrentnet ,то победв