# Exporting ConvNeXt Large from Pytorch to ONNX format.

In [15]:
import torch 
from torchvision.models import convnext_large, ConvNeXt_Large_Weights

## Load the pre-traned model

In [16]:
weights = ConvNeXt_Large_Weights.IMAGENET1K_V1
convnext_large = convnext_large(weights=weights)

## Create a random input tensor
<br>
ConvNeXt base mode is batched (B, C, H, W) and according to Pytocrh docs, the images are resized to 232 and croped into 224. So it is important to initialize the model with 224x224 dims, but resizing with 232x232 and croping it in 224x224 dimensions. 

In [17]:
input = torch.randn(1, 3, 224, 224)

## Export model to onnx format

In [18]:
torch.onnx.export(convnext_large,
                  input,
                  "convnextlarge.onnx",
                  input_names=["input"],
                  output_names=["output"])

## Verify ONNX Model

In [19]:
import onnxruntime as ort

model = ort.InferenceSession("convnextlarge.onnx")

print("Input names:", model.get_inputs()[0].name)
print("Output names:", model.get_outputs()[0].name)

Input names: input
Output names: output


## Verification Using Netron


<img src="input_convnextlarge.png"  width="1000" height="400">

<img src="output_convnextlarge.png"  width="1000" height="400">

Implementing ConvNeXt large on the CPU involves severar implementation steps

- Preparation of the input tensor (preproceccing)
- Performing an inference in the CPU accelerator
- Converting class probabilities to class labels (postprocessing)

In [20]:
import numpy as np
import cv2




def preprocessing(image): 
    
    img = cv2.resize(image, (232, 232))
    height, width, _ = img.shape
    x = (width - 224)//2
    y = (height - 224)//2
    img = img[y:y+224, x:x+224]
    img = np.asarray(img, np.float32)
    img = np.transpose(img, [2, 0, 1])
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    for i in range(3):
        img[i,:,:] = (img[i,:,:]/255 - mean[i] / std[i])
    
    img = np.expand_dims(img, axis=0)
    
    return img


img = "snake.jpg"
img = cv2.imread(img)
img = preprocessing(img)

    

In [21]:
import onnxruntime as ort

session_options = ort.SessionOptions()
session = ort.InferenceSession("convnextlarge.onnx", session_options, ["CPUExecutionProvider"])


input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
ortvalue = ort.OrtValue.ortvalue_from_numpy(img)
session = session.run([output_name], {input_name:ortvalue})

In [22]:

def read_classes(filepath):
    with open(filepath, 'r') as file: 
        classes = [line.strip() for line in file]
    return classes
        



def postprocessing(data, classes):
    
    data = np.exp(data - np.max(data))
    prob = data/data.sum()
    pred_index = np.argmax(prob)
    predicted_class = classes[pred_index]
    
    return predicted_class, pred_index
    

classes = read_classes("classes.txt")

predicted_class, predicted_index = postprocessing(session, classes)

print("Predicted Class:", predicted_class)
print("Predicted Index:", predicted_index)
    

Predicted Class: boa constrictor
Predicted Index: 61


Implementation of ResNet152 on the GPU. In case you installed `onnxruntime`, you need uninstall onnxruntime and install `pip install onnxruntime-gpu`. 

In [23]:
import onnxruntime as ort
import torch

providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']



session_options = ort.SessionOptions()
session_gpu = ort.InferenceSession("convnextlarge.onnx", session_options, providers)
input_name = session_gpu.get_inputs()[0].name
output_name = session_gpu.get_outputs()[0].name
io_binding = session_gpu.io_binding()
ortvalue = ort.OrtValue.ortvalue_from_numpy(img, 'cuda', 0)
io_binding.bind_input(name=input_name,
                        device_type='cuda',
                        device_id=0,
                        element_type=np.float32,
                        shape = ortvalue.shape(),
                        buffer_ptr=ortvalue.data_ptr()
                        )

io_binding.bind_output(name= output_name,
                             device_type='cuda',
                             device_id=0)

session_gpu.run_with_iobinding(io_binding)

output = io_binding.copy_outputs_to_cpu()
predicted_class, predicted_index = postprocessing(output, classes)

print("Predicted Class:", predicted_class)
print("Predicted Index:", predicted_index)

Predicted Class: boa constrictor
Predicted Index: 61
