# Post-Training Quantization (PTQ)

This tutorial demonstrates how to quantize a trained YOLO-NAS model to INT8 using PTQ with calibration data.

## Prerequisites

You need a trained checkpoint and calibration data (typically the validation set).

In [None]:
from pathlib import Path
from modern_yolonas import yolo_nas_s
from modern_yolonas.data import YOLODetectionDataset, load_dataset_config
from modern_yolonas.data.transforms import Compose, LetterboxResize, Normalize
from modern_yolonas.training import extract_model_state_dict, DetectionDataModule
from modern_yolonas.quantization import (
    prepare_model_ptq, run_calibration, convert_quantized, export_quantized_onnx,
)

# Load trained model
model = yolo_nas_s(pretrained=True)  # or load from checkpoint
# sd = extract_model_state_dict("runs/hardhat/last.ckpt")
# model.load_state_dict(sd)
model.eval()

# Setup calibration data
# cfg = load_dataset_config("path/to/data.yaml")
# val_transforms = Compose([LetterboxResize(target_size=640), Normalize()])
# val_ds = YOLODetectionDataset(root=cfg.root, split=cfg.val_split, transforms=val_transforms)
# data_module = DetectionDataModule(train_dataset=val_ds, val_dataset=val_ds, batch_size=8, num_workers=2)
# val_loader = data_module.val_dataloader()

# Prepare model for PTQ (inserts observers)
ptq_model = prepare_model_ptq(model)
print("PTQ model prepared with observers")

# Calibrate on validation set
# run_calibration(ptq_model, val_loader, num_batches=20, device="cpu")
# print("Calibration complete")

In [None]:
# Convert to quantized model
quantized_model = convert_quantized(ptq_model)
print("Model converted to INT8")

# Export quantized ONNX
ptq_onnx_path = "model_ptq_int8.onnx"
export_quantized_onnx(quantized_model, ptq_onnx_path, input_size=640)

ptq_size = Path(ptq_onnx_path).stat().st_size / (1024 * 1024)
print(f"\nPTQ ONNX exported to: {ptq_onnx_path}")
print(f"PTQ model size: {ptq_size:.1f} MB")