# MobileNetV2: Load, Preprocess, and Run Inference

In [None]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import requests
from io import BytesIO
import os
import numpy as np
import time

In [None]:
model = models.mobilenet_v2(pretrained=True)
model.eval()

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 73.9MB/s]


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [None]:
url = "https://upload.wikimedia.org/wikipedia/commons/2/26/YellowLabradorLooking_new.jpg"
response = requests.get(url)
img = Image.open(BytesIO(response.content))
img.show()

In [None]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
input_tensor = preprocess(img)
input_batch = input_tensor.unsqueeze(0)

In [None]:
with torch.no_grad():
    output = model(input_batch)
predicted_idx = torch.argmax(output[0]).item()

In [None]:
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt -O imagenet_classes.txt
with open("imagenet_classes.txt") as f:
    labels = [line.strip() for line in f.readlines()]
print(f"Predicted label: {labels[predicted_idx]}")

--2025-04-13 20:46:33--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt’


2025-04-13 20:46:33 (16.0 MB/s) - ‘imagenet_classes.txt’ saved [10472/10472]

Predicted label: Labrador retriever


# Phase 2: Convert MobileNetV2 to ONNX and Analyze

In [11]:
try:
    import onnx
except ImportError:
    !pip install onnx
    import onnx


Collecting onnx
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.17.0


In [12]:
dummy_input = torch.randn(1, 3, 224, 224)
onnx_filename = "mobilenetv2.onnx"
torch.onnx.export(model, dummy_input, onnx_filename,
                  input_names=["input"], output_names=["output"],
                  dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
                  opset_version=11)
print("Exported model to ONNX format.")

Exported model to ONNX format.


In [14]:
file_size_mb = os.path.getsize(onnx_filename) / 1e6
print(f"ONNX model size: {file_size_mb:.2f} MB")

ONNX model size: 13.99 MB


In [15]:
!pip install onnxruntime
import onnxruntime as ort
ort_session = ort.InferenceSession(onnx_filename)

Collecting onnxruntime
  Downloading onnxruntime-1.21.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.21.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m66.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected pack

In [16]:
onnx_input = input_batch.numpy()
onnx_output = ort_session.run(None, {"input": onnx_input})
onnx_pred_idx = np.argmax(onnx_output[0])
print(f"ONNX Predicted label: {labels[onnx_pred_idx]}")

ONNX Predicted label: Labrador retriever


In [17]:
# Inference time comparison
start = time.perf_counter()
with torch.no_grad():
    _ = model(input_batch)
end = time.perf_counter()
print(f"PyTorch inference time: {end - start:.4f} sec")
start = time.perf_counter()
_ = ort_session.run(None, {"input": onnx_input})
end = time.perf_counter()
print(f"ONNX inference time: {end - start:.4f} sec")

PyTorch inference time: 0.1750 sec
ONNX inference time: 0.0693 sec


# Phase 3: Quantize ONNX Model for Edge Deployment

In [23]:
from onnxruntime.quantization import quantize_dynamic, QuantType

quantized_model_path = "mobilenetv2_quantized.onnx"
quantize_dynamic(onnx_filename, quantized_model_path, weight_type=QuantType.QUInt8)

print("Quantization complete.")




Quantization complete.


In [19]:
quantized_size_mb = os.path.getsize(quantized_model_path) / 1e6
print(f"Quantized ONNX model size: {quantized_size_mb:.2f} MB")

Quantized ONNX model size: 3.69 MB


In [24]:
# Load and run quantized model
quantized_session = ort.InferenceSession(quantized_model_path)

start = time.perf_counter()
_ = quantized_session.run(None, {"input": onnx_input})
end = time.perf_counter()

print(f"Quantized ONNX inference time: {end - start:.4f} sec")


Quantized ONNX inference time: 0.0715 sec


In [25]:
print("✅ All steps complete!")
print(f"Final Quantized Model Size: {quantized_size_mb:.2f} MB")
print(f"Final Inference Time (Quantized): {end - start:.4f} sec")
print(f"Predicted Label: {labels[onnx_pred_idx]}")

✅ All steps complete!
Final Quantized Model Size: 3.69 MB
Final Inference Time (Quantized): 0.0715 sec
Predicted Label: Labrador retriever
