In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install torch torchvision onnx onnxruntime psutil pillow numpy




In [3]:
import torch
import torchvision.models as models

# Load pre-trained MobileNetV2
model = models.mobilenet_v2(pretrained=True)

# Set model to evaluation mode
model.eval()

print("MobileNetV2 loaded successfully")




MobileNetV2 loaded successfully


In [4]:
import torchvision.transforms as transforms
from PIL import Image

def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])

    img = Image.open(image_path).convert("RGB")
    return transform(img).unsqueeze(0)

# Mount Google Drive (run once)
from google.colab import drive
drive.mount('/content/drive')

# Your image path
image_path = "/content/drive/MyDrive/sample.jpg"

# Load input image
input_tensor = preprocess_image(image_path)

print("Image preprocessed successfully")
print("Input shape:", input_tensor.shape)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Image preprocessed successfully
Input shape: torch.Size([1, 3, 224, 224])


In [5]:
import time
import os
import psutil
import numpy as np

def measure_inference_time(model, input_tensor, runs=10):
    times = []
    with torch.no_grad():
        for _ in range(runs):
            start = time.time()
            _ = model(input_tensor)
            end = time.time()
            times.append((end - start) * 1000)  # ms
    return np.mean(times)


In [6]:
process = psutil.Process(os.getpid())

# Memory before inference
mem_before = process.memory_info().rss / (1024 * 1024)

# Measure inference time
original_inference_time = measure_inference_time(model, input_tensor)

# Memory after inference
mem_after = process.memory_info().rss / (1024 * 1024)

print(f"Original Inference Time (ms): {original_inference_time:.2f}")
print(f"Original Memory Usage (MB): {mem_after - mem_before:.2f}")


Original Inference Time (ms): 50.78
Original Memory Usage (MB): 16.21


In [7]:
# Save model to measure size
torch.save(model.state_dict(), "original_model.pth")

original_model_size = os.path.getsize("original_model.pth") / (1024 * 1024)

print(f"Original Model Size (MB): {original_model_size:.2f}")


Original Model Size (MB): 13.60


In [8]:
print("Accuracy: ~71.8% (ImageNet Top-1, MobileNetV2)")


Accuracy: ~71.8% (ImageNet Top-1, MobileNetV2)


In [9]:
# Convert model to FP16
model_fp16 = model.half()

# Convert input tensor to FP16
input_tensor_fp16 = input_tensor.half()

print("FP16 quantization applied successfully")


FP16 quantization applied successfully


In [10]:
# Memory before optimized inference
process = psutil.Process(os.getpid())
mem_before_opt = process.memory_info().rss / (1024 * 1024)

# Measure inference time for FP16 model
optimized_inference_time = measure_inference_time(model_fp16, input_tensor_fp16)

# Memory after optimized inference
mem_after_opt = process.memory_info().rss / (1024 * 1024)

print(f"Optimized Inference Time (ms): {optimized_inference_time:.2f}")
print(f"Optimized Memory Usage (MB): {mem_after_opt - mem_before_opt:.2f}")


Optimized Inference Time (ms): 697.91
Optimized Memory Usage (MB): 9.09


In [11]:
# Save optimized model to measure size
torch.save(model_fp16.state_dict(), "optimized_model_fp16.pth")

optimized_model_size = os.path.getsize("optimized_model_fp16.pth") / (1024 * 1024)

print(f"Optimized Model Size (MB): {optimized_model_size:.2f}")


Optimized Model Size (MB): 6.85


In [13]:
!pip install --upgrade torch torchvision onnx onnxruntime




In [16]:
!pip install torch==2.0.1 torchvision==0.15.2 onnx==1.14.1 onnxruntime==1.15.1 onnxscript


[31mERROR: Could not find a version that satisfies the requirement torch==2.0.1 (from versions: 2.2.0, 2.2.1, 2.2.2, 2.3.0, 2.3.1, 2.4.0, 2.4.1, 2.5.0, 2.5.1, 2.6.0, 2.7.0, 2.7.1, 2.8.0, 2.9.0, 2.9.1)[0m[31m
[0m[31mERROR: No matching distribution found for torch==2.0.1[0m[31m
[0m

In [17]:
model_fp16
input_tensor_fp16


tensor([[[[0.3823, 0.4851, 0.2966,  ..., 1.2217, 1.0674, 1.0156],
          [0.3481, 0.4680, 0.5020,  ..., 1.2383, 1.0498, 0.9990],
          [0.9473, 1.0674, 1.0674,  ..., 1.2217, 1.0840, 1.0332],
          ...,
          [0.7593, 0.9644, 0.8618,  ..., 1.2217, 1.2217, 1.2725],
          [0.9302, 0.9644, 0.9473,  ..., 0.9644, 1.2725, 1.0840],
          [1.0156, 0.8447, 1.1533,  ..., 1.0156, 1.2725, 0.5195]],

         [[0.3628, 0.4502, 0.2402,  ..., 1.1504, 0.9932, 0.9404],
          [0.3276, 0.4502, 0.4678,  ..., 1.1680, 0.9580, 0.9053],
          [0.9404, 1.0635, 1.0459,  ..., 1.1504, 0.9932, 0.9404],
          ...,
          [0.6953, 0.9053, 0.8003,  ..., 1.2734, 1.3252, 1.3955],
          [0.8706, 0.9053, 0.8882,  ..., 1.0283, 1.3779, 1.2031],
          [0.9580, 0.7827, 1.0977,  ..., 1.0801, 1.3779, 0.6431]],

         [[0.4265, 0.4961, 0.3044,  ..., 1.0889, 0.9321, 0.8970],
          [0.3394, 0.4614, 0.4788,  ..., 1.1064, 0.9492, 0.8970],
          [0.8799, 1.0020, 1.0020,  ..., 1

In [20]:
!pip install onnxscript

torch.onnx.export(
    model_fp16,
    input_tensor_fp16,
    "optimized_model.onnx",
    export_params=True,
    opset_version=11,
    do_constant_folding=True,
    input_names=["input"],
    output_names=["output"]
)

print("ONNX model exported successfully (legacy exporter)")

Collecting onnxscript
  Downloading onnxscript-0.5.7-py3-none-any.whl.metadata (13 kB)
Collecting onnx_ir<2,>=0.1.12 (from onnxscript)
  Downloading onnx_ir-0.1.13-py3-none-any.whl.metadata (3.2 kB)
Downloading onnxscript-0.5.7-py3-none-any.whl (693 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m693.4/693.4 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx_ir-0.1.13-py3-none-any.whl (133 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.1/133.1 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx_ir, onnxscript
Successfully installed onnx_ir-0.1.13 onnxscript-0.5.7


W0105 03:16:48.092000 7871 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 11 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnx/version_converter.py", line 39, in convert_version
    converted_model_str = C.convert_version(model_str, target_version)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: /github/workspace/onnx/version_converter/adapters/axes_input_to_attribute.h:65: adapt: Asserti

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
ONNX model exported successfully (legacy exporter)


In [21]:
onnx_model_size = os.path.getsize("optimized_model.onnx") / (1024 * 1024)
print(f"ONNX Model Size (MB): {onnx_model_size:.2f}")


ONNX Model Size (MB): 0.24


In [22]:
import onnxruntime as ort

session = ort.InferenceSession("optimized_model.onnx")

input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

# Convert input to numpy
input_numpy = input_tensor_fp16.cpu().numpy()

start = time.time()
_ = session.run([output_name], {input_name: input_numpy})
end = time.time()

print("ONNX Inference Time (ms):", (end - start) * 1000)


ONNX Inference Time (ms): 28.142213821411133
