# Quantized PTQ4SAM Inference (No PTQ)
Load a quantized SAM predictor from a .pth file and run inference/evaluation on the COCO val2017 test split without running the PTQ pipeline.

## 1. Import Libraries and Configure Device

In [5]:
import os
import json
import glob
import subprocess
from pathlib import Path

# Paths
REPO_ROOT = "/home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM"
CONFIG_PATH = os.path.join(REPO_ROOT, "projects/configs/yolox/yolo_l-sam-vit-b.py")
PREDICTOR_PTH = os.path.join(REPO_ROOT, "result/yolox_l_vitb_w6a6/quant_sam_predictor.pth")
WORK_DIR = os.path.join(REPO_ROOT, "result/yolox_l_vitb_w6a6")
CONDA_ENV = os.path.join(REPO_ROOT, ".ptq4sam")

# Subset settings
NUM_IMAGES = 10
SUBSET_ANN = os.path.join(WORK_DIR, f"instances_val2017_{NUM_IMAGES}.json")

print("Repo:", REPO_ROOT)
print("Predictor:", PREDICTOR_PTH)
print("Work dir:", WORK_DIR)
print("Subset ann:", SUBSET_ANN)


Repo: /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM
Predictor: /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/quant_sam_predictor.pth
Work dir: /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6
Subset ann: /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/instances_val2017_10.json


## 2. Load Quantized Model from `.pth`

In [None]:
import os
import urllib.request
import http.cookiejar

# Verify the quantized predictor exists (no PTQ here)
GDRIVE_FILE_ID = "103Y5SadTarO4obWzNnWb0jVDnVzrGn3j"


def _download_gdrive(file_id, dest_path, chunk_size=1024 * 1024):
    url = f"https://drive.google.com/uc?export=download&id={file_id}"
    cookie_jar = http.cookiejar.CookieJar()
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie_jar))

    response = opener.open(url)
    token = None
    for c in cookie_jar:
        if c.name.startswith("download_warning"):
            token = c.value
            break
    if token:
        response = opener.open(url + f"&confirm={token}")

    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    with open(dest_path, "wb") as f:
        while True:
            chunk = response.read(chunk_size)
            if not chunk:
                break
            f.write(chunk)


if not os.path.isfile(PREDICTOR_PTH):
    print("Quantized predictor not found. Downloading from Google Drive...")
    _download_gdrive(GDRIVE_FILE_ID, PREDICTOR_PTH)

if not os.path.isfile(PREDICTOR_PTH):
    raise FileNotFoundError(f"Missing predictor pth: {PREDICTOR_PTH}")

# Build the command to evaluate using the ptq4sam environment
cmd = [
    "conda", "run", "-p", CONDA_ENV,
    "python", "ptq4sam/solver/test_quant.py",
    "--config", CONFIG_PATH,
    "--load_sam_path", PREDICTOR_PTH,
    "--work-dir", WORK_DIR,
    "--eval", "segm",
    "--cfg-options", f"data.test.ann_file={SUBSET_ANN}",
]

print("Eval command:")
print(" ".join(cmd))


Eval command:
conda run -p /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/.ptq4sam python ptq4sam/solver/test_quant.py --config /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/projects/configs/yolox/yolo_l-sam-vit-b.py --load_sam_path /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/quant_sam_predictor.pth --work-dir /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6 --eval segm --cfg-options data.test.ann_file=/home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/instances_val2017_10.json


## 3. Define Dataset and DataLoader for Test Set

In [None]:
# Quick dataset sanity checks (COCO val2017) + make a 10-image subset
import urllib.request
import zipfile

ann_path = os.path.join(REPO_ROOT, "data/coco/annotations/instances_val2017.json")
img_dir = os.path.join(REPO_ROOT, "data/coco/val2017")

data_root = os.path.join(REPO_ROOT, "data/coco")
ann_zip = os.path.join(data_root, "annotations_trainval2017.zip")
val_zip = os.path.join(data_root, "val2017.zip")

# Auto-download COCO val2017 + annotations if missing
if not os.path.isfile(ann_path):
    os.makedirs(data_root, exist_ok=True)
    if not os.path.isfile(ann_zip):
        print("Downloading COCO annotations...")
        urllib.request.urlretrieve(
            "http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
            ann_zip,
        )
    print("Extracting annotations...")
    with zipfile.ZipFile(ann_zip, "r") as zf:
        zf.extractall(data_root)

if not os.path.isdir(img_dir):
    os.makedirs(data_root, exist_ok=True)
    if not os.path.isfile(val_zip):
        print("Downloading COCO val2017 images...")
        urllib.request.urlretrieve(
            "http://images.cocodataset.org/zips/val2017.zip",
            val_zip,
        )
    print("Extracting val2017 images...")
    with zipfile.ZipFile(val_zip, "r") as zf:
        zf.extractall(data_root)

if not os.path.isfile(ann_path):
    raise FileNotFoundError(f"Missing COCO annotations: {ann_path}")
if not os.path.isdir(img_dir):
    raise FileNotFoundError(f"Missing COCO images dir: {img_dir}")

if not os.path.isfile(SUBSET_ANN):
    with open(ann_path, "r") as f:
        coco = json.load(f)
    images = coco.get("images", [])[:NUM_IMAGES]
    image_ids = {img["id"] for img in images}
    annotations = [ann for ann in coco.get("annotations", []) if ann.get("image_id") in image_ids]
    subset = {
        "info": coco.get("info", {}),
        "licenses": coco.get("licenses", []),
        "images": images,
        "annotations": annotations,
        "categories": coco.get("categories", []),
    }
    os.makedirs(os.path.dirname(SUBSET_ANN), exist_ok=True)
    with open(SUBSET_ANN, "w") as f:
        json.dump(subset, f)

print("COCO val2017 looks present.")
print(f"Subset annotations ready: {SUBSET_ANN}")


COCO val2017 looks present.
Subset annotations ready: /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/instances_val2017_10.json


## 4. Run Inference and Collect Predictions

In [8]:
# Run inference/evaluation using the quantized predictor (no PTQ)
run_env = os.environ.copy()
run_env["PYTHONPATH"] = REPO_ROOT

result = subprocess.run(cmd, cwd=REPO_ROOT, env=run_env, capture_output=True, text=True)
print("Return code:", result.returncode)
print("--- STDOUT ---")
print(result.stdout)
print("--- STDERR ---")
print(result.stderr)

if result.returncode != 0:
    raise RuntimeError("Inference failed; see logs above.")


Return code: 0
--- STDOUT ---
projects.instance_segment_anything
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
loading annotations into memory...
Done (t=15.99s)
creating index...
index created!
the length of cali data is 32.
load checkpoint from local path: ./ckpt/yolox_l.pth
[                                                  ] 0/10, elapsed: 0s, ETA:
[>>>                               ] 1/10, 0.1 task/s, elapsed: 7s, ETA:    64s
[>>>>>>                            ] 2/10, 0.3 task/s, elapsed: 8s, ETA:    31s
[>>>>>>>>>>                        ] 3/10, 0.4 task/s, elapsed: 8s, ETA:    20s
[>>>>>>>>>>>>>                     ] 4/10, 0.4 task/s, elapsed: 9s, ETA:    14s
[>>>>>>>>>>>>>>>>                 ] 5/10, 0.5 task/s, elapsed: 10s, ETA:    10s
[>>>>>>>>>>>>>>>>>>>              ] 6/10, 0.6 task/s, elapsed: 10s, ETA:     7s
[>>>>>>>>>>>>>>>>>>>>>>>          ] 7/10, 0.6 task/s, elapsed: 11s, ETA:     5s
[>>>>>>>>>>>>>>>>>>>>>>>>>>       ] 8/10, 0.7 ta

## 5. Compute Evaluation Metrics

In [9]:
# Load the latest evaluation JSON
json_files = sorted(glob.glob(os.path.join(WORK_DIR, "eval_*.json")))
if not json_files:
    raise FileNotFoundError(f"No eval_*.json found in {WORK_DIR}")

latest_json = json_files[-1]
with open(latest_json, "r") as f:
    metrics = json.load(f)

print("Latest eval:", latest_json)
print(metrics)


Latest eval: /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/eval_20260211_190435.json
{'config': '/home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/projects/configs/yolox/yolo_l-sam-vit-b.py', 'metric': {'segm_mAP': 0.368, 'segm_mAP_50': 0.601, 'segm_mAP_75': 0.367, 'segm_mAP_s': 0.252, 'segm_mAP_m': 0.534, 'segm_mAP_l': 0.435, 'segm_mAP_copypaste': '0.368 0.601 0.367 0.252 0.534 0.435'}}


## 6. Preview Sample Predictions

In [10]:
# Previewing predictions is handled by test_quant.py with --show-dir.
# If you want visualization, re-run with:
#   --show-dir result/yolox_l_vitb_w6a6/vis
print("To visualize results, re-run with --show-dir result/yolox_l_vitb_w6a6/vis")


To visualize results, re-run with --show-dir result/yolox_l_vitb_w6a6/vis


## 7. Export ONNX and Run with ONNX Runtime
This exports the quantized SAM image encoder from the saved predictor. (Exporting the full SAM pipeline requires a custom wrapper with fixed prompt inputs.)

In [14]:
import os
import subprocess
import textwrap

onnx_path = os.path.join(WORK_DIR, "quant_sam_image_encoder.onnx")
export_script = os.path.join(WORK_DIR, "export_onnx_image_encoder.py")

export_code = f"""
import torch
import os
from ptq4sam.quantization.fake_quant import QuantizeBase

predictor_pth = r"{PREDICTOR_PTH}"
onnx_path = r"{onnx_path}"

# Reduce CPU memory pressure and enforce float32
torch.set_num_threads(1)
torch.set_default_dtype(torch.float32)

torch.set_grad_enabled(False)

predictor = torch.load(predictor_pth, map_location="cpu")
image_encoder = predictor.model.image_encoder
image_encoder = image_encoder.float()
image_encoder.eval()

# Ensure required quant buffers exist for ONNX export
for m in image_encoder.modules():
    if isinstance(m, QuantizeBase):
        if not hasattr(m, "scale"):
            m.scale = torch.nn.Parameter(torch.tensor([1.0], dtype=torch.float32))
        else:
            m.scale.data = m.scale.data.float()
        if not hasattr(m, "zero_point"):
            m.zero_point = torch.tensor([0], dtype=torch.int64)
        else:
            m.zero_point = m.zero_point.to(torch.int64)

# SAM image encoder expects 1024x1024 (positional embedding size)
dummy = torch.randn(1, 3, 1024, 1024, dtype=torch.float32)

# Export image encoder only
# Use external data to reduce peak RAM usage
try:
    torch.onnx.export(
        image_encoder,
        dummy,
        onnx_path,
        input_names=["image"],
        output_names=["embeddings"],
        opset_version=12,
        do_constant_folding=False,
        export_params=True,
        keep_initializers_as_inputs=False,
        use_external_data_format=True,
        dynamic_axes={{
            "image": {{2: "height", 3: "width"}},
            "embeddings": {{2: "grid_h", 3: "grid_w"}}
        }}
    )
    print("Saved ONNX:", onnx_path)
except TypeError:
    # Fallback for older PyTorch without external data flag
    torch.onnx.export(
        image_encoder,
        dummy,
        onnx_path,
        input_names=["image"],
        output_names=["embeddings"],
        opset_version=12,
        do_constant_folding=False,
        export_params=True,
        keep_initializers_as_inputs=False,
        dynamic_axes={{
            "image": {{2: "height", 3: "width"}},
            "embeddings": {{2: "grid_h", 3: "grid_w"}}
        }}
    )
    print("Saved ONNX:", onnx_path)
"""

with open(export_script, "w") as f:
    f.write(textwrap.dedent(export_code))

# Run export inside ptq4sam env
export_cmd = [
    "conda", "run", "-p", CONDA_ENV,
    "python", export_script,
]

run_env = os.environ.copy()
run_env["PYTHONPATH"] = REPO_ROOT

print("Export command:", " ".join(export_cmd))
result = subprocess.run(export_cmd, cwd=REPO_ROOT, env=run_env, capture_output=True, text=True)
print("Return code:", result.returncode)
print("--- STDOUT ---")
print(result.stdout)
print("--- STDERR ---")
print(result.stderr)

if result.returncode != 0:
    raise RuntimeError("ONNX export failed; see logs above.")


Export command: conda run -p /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/.ptq4sam python /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/export_onnx_image_encoder.py
Return code: 0
--- STDOUT ---
Saved ONNX: /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/quant_sam_image_encoder.onnx


--- STDERR ---
  if pad_h > 0 or pad_w > 0:
  x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C)
  self.scale.data.clamp_(min=self.eps.item())
  X, self.scale, self.zero_point.item(), self.quant_min, self.quant_max, grad_factor)
  max_rel_dist = int(2 * max(q_size, k_size) - 1)
  max_rel_dist = int(2 * max(q_size, k_size) - 1)
  if rel_pos.shape[0] != max_rel_dist:
  q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0)
  k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0)
  relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_siz

In [15]:
import os
import subprocess
import textwrap

onnx_path = os.path.join(WORK_DIR, "quant_sam_image_encoder.onnx")
run_script = os.path.join(WORK_DIR, "run_onnx_image_encoder.py")

run_code = f"""
import numpy as np
import onnxruntime as ort
import os

onnx_path = r"{onnx_path}"
if not os.path.isfile(onnx_path):
    raise FileNotFoundError(f"Missing ONNX file: {onnx_path}")

sess_opts = ort.SessionOptions()
sess_opts.intra_op_num_threads = 1
sess_opts.inter_op_num_threads = 1

sess = ort.InferenceSession(onnx_path, sess_options=sess_opts, providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name

image = np.random.randn(1, 3, 1024, 1024).astype(np.float32)
outputs = sess.run(None, {{input_name: image}})
print("ONNX output shapes:", [o.shape for o in outputs])
"""

with open(run_script, "w") as f:
    f.write(textwrap.dedent(run_code))

run_cmd = [
    "conda", "run", "-p", CONDA_ENV,
    "python", run_script,
]

print("ONNXRuntime command:", " ".join(run_cmd))
result = subprocess.run(run_cmd, cwd=REPO_ROOT, capture_output=True, text=True)
print("Return code:", result.returncode)
print("--- STDOUT ---")
print(result.stdout)
print("--- STDERR ---")
print(result.stderr)

if result.returncode != 0:
    raise RuntimeError("ONNXRuntime inference failed; see logs above.")


ONNXRuntime command: conda run -p /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/.ptq4sam python /home/jovyan/video-proj-storage/mushfiq_files/PTQ4SAM/PTQ4SAM/result/yolox_l_vitb_w6a6/run_onnx_image_encoder.py
Return code: 0
--- STDOUT ---
ONNX output shapes: [(1, 256, 64, 64)]


--- STDERR ---

