<a href="https://colab.research.google.com/github/DavidAtRedpine/HuggingFaceImageNettoONNX/blob/main/HuggingFaceImageNettoONNX.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HuggingFace ImageNet to ONNX

Convert HuggingFace model trained on ImageNet into the ONNX format

First, import the necessary Python modules

In [None]:
%%capture

! pip install onnx onnxruntime

In [None]:
import cv2
import torch
import torchvision.transforms as transforms
from transformers import AutoModelForImageClassification
from PIL import Image
import onnx
import onnxruntime as ort
from google.colab import files
import requests
from io import BytesIO


# Define your variables

Put the path to your HuggingFace model that was trained on ImageNet. In this example, we are using [resnet-50](https://huggingface.co/microsoft/resnet-50) . Also set the width/height of the images (default 224px), and provide a URL to a sample image to test that the ONNX model works.


In [None]:
pretrained_model_path = "microsoft/resnet-50" #@param {type:"string"}
resolution = 224 #@param {type:"integer"}
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/003_Olive-bellied_Sunbird_in_flight_at_Kibale_forest_National_Park_Photo_by_Giles_Laurent.jpg/1280px-003_Olive-bellied_Sunbird_in_flight_at_Kibale_forest_National_Park_Photo_by_Giles_Laurent.jpg" #@param {type:"string"}
export_model_name = "model.onnx" #@param {type:"string"}



# Convert the model to ONNX

In [None]:
# Load the model
model = AutoModelForImageClassification.from_pretrained(pretrained_model_path)
model.eval()  # Set to evaluation mode

# Get the list of labels from the model's configuration
labels = list(model.config.id2label.values())

# Define the preprocessing transformation
# mean=[0.485, 0.456, 0.406]: These are the average values of the
# pixel intensities for the red, green, and blue channels, respectively.
# std=[0.229, 0.224, 0.225]: These are the standard deviations of the
# pixel intensities for the red, green, and blue channels, respectively.
# These values are typically used with ImageNet dataset models.
preprocess = transforms.Compose([
    transforms.Resize((resolution, resolution)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create a dummy input Tensor for exporting
dummy_input = torch.randn(1, 3, resolution, resolution)

# Export the model to ONNX format
input_names = ["input"]
output_names = ["output"]

torch.onnx.export(
    model,
    dummy_input,
    export_model_name,
    export_params=True,
    opset_version=11,
    do_constant_folding=True,
    input_names=input_names,
    output_names=output_names,
    dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}
)


# Validate the ONNX model works

In [None]:
def load_image_from_url(url):
    try:
        response = requests.get(url)
        img = Image.open(BytesIO(response.content))
        return img
    except Exception as e:
        print(f"Error loading {url}: {e}")
        return None


# Validate the ONNX model
onnx_model = onnx.load(export_model_name)
onnx.checker.check_model(onnx_model)

# Create an ONNX Runtime session
ort_session = ort.InferenceSession(export_model_name)

# Prepare a sample input image
image = load_image_from_url(image_url)
input_tensor = preprocess(image.convert("RGB")).unsqueeze(0)

# Run the ONNX model
ort_inputs = {input_names[0]: input_tensor.numpy()}
ort_outputs = ort_session.run(None, ort_inputs)
ort_output_logits = ort_outputs[0]

# Get the predicted label index
predicted_idx = ort_output_logits.argmax(axis=1)[0]

# Get the predicted label text
predicted_label = labels[predicted_idx]

# Print the predicted label
print("Detected label:", predicted_label)

# Download the ONNX file

In [None]:
files.download(export_model_name)