# Exporting EfficientNetB2 from Pytorch to ONNX format. 

In [10]:
import torch 
from torchvision.models import efficientnet_b2, EfficientNet_B2_Weights

## Load the pre-trained model

In [11]:
weights = EfficientNet_B2_Weights.IMAGENET1K_V1
efficientnet_b2 = efficientnet_b2(weights=weights)

## Create a random input tensor

In [12]:
input = torch.randn(1, 3, 288, 288)

## Export model to onnx format

In [13]:
torch.onnx.export(efficientnet_b2,
                  input,
                  "efficientnetb2.onnx",
                  input_names=["input"],
                  output_names=["output"])

## Verify ONNX Model

In [14]:
import onnxruntime as ort

session = ort.InferenceSession("efficientnetb2.onnx")

print("Input names:", session.get_inputs()[0].name)
print("Output names:", session.get_outputs()[0].name)

Input names: input
Output names: output


## Verification Using Netron

<img src="input_efficientnetb2.png"  width="1000" height="400">

<img src="output_efficientnetb2.png"  width="1000" height="400">

## Using ONNXRUNTIME

Implementing EfficientNetB1 on the CPU involves severar implementation steps

- Preparation of the input tensor (preproceccing)
- Performing an inference in the CPU accelerator
- Converting class probabilities to class labels (postprocessing)

<img src="box.jpg" width="300" height="400">

In [15]:
import numpy as np
import cv2




def preprocessing(image): 
    
    img = cv2.resize(image, (288, 288))
    img = np.asarray(img, np.float32)
    img = np.transpose(img, [2, 0, 1])
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    for i in range(3):
        img[i,:,:] = (img[i,:,:]/255 - mean[i] / std[i])
    
    img = np.expand_dims(img, axis=0)
    
    return img


img = "box.jpg"
img = cv2.imread(img)
img = preprocessing(img)

    

In [16]:
import onnxruntime as ort

session_options = ort.SessionOptions()
session = ort.InferenceSession("efficientnetb2.onnx", session_options, ["CPUExecutionProvider"])


input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
ortvalue = ort.OrtValue.ortvalue_from_numpy(img)
session = session.run([output_name], {input_name:ortvalue})

In [17]:

def read_classes(filepath):
    with open(filepath, 'r') as file: 
        classes = [line.strip() for line in file]
    return classes
        



def postprocessing(data, classes):
    
    data = np.exp(data - np.max(data))
    prob = data/data.sum()
    pred_index = np.argmax(prob)
    predicted_class = classes[pred_index]
    
    return predicted_class, pred_index
    

classes = read_classes("classes.txt")

predicted_class, predicted_index = postprocessing(session, classes)

print("Predicted Class:", predicted_class)
print("Predicted Index:", predicted_index)
    

Predicted Class: studio couch
Predicted Index: 831


Implementation of EfficientNetB2 on the GPU. In case you installed `onnxruntime`, you need uninstall onnxruntime and install `pip install onnxruntime-gpu`. If you are facing problems with libs and failures, install `onnxruntime-gpu` from other [source](https://github.com/microsoft/onnxruntime/issues/20944). In Addition import `torch`to avoid errors. 

```bash

python3 -m pip install --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ onnxruntime_gpu

 ```

In [18]:
import onnxruntime as ort
import torch 

providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']



session_options = ort.SessionOptions()
session_gpu = ort.InferenceSession("efficientnetb2.onnx", session_options, providers)
input_name = session_gpu.get_inputs()[0].name
output_name = session_gpu.get_outputs()[0].name
io_binding = session_gpu.io_binding()
ortvalue = ort.OrtValue.ortvalue_from_numpy(img, 'cuda', 0)
io_binding.bind_input(name=input_name,
                        device_type='cuda',
                        device_id=0,
                        element_type=np.float32,
                        shape = ortvalue.shape(),
                        buffer_ptr=ortvalue.data_ptr()
                        )

io_binding.bind_output(name= output_name,
                             device_type='cuda',
                             device_id=0)

session_gpu.run_with_iobinding(io_binding)

output = io_binding.copy_outputs_to_cpu()
predicted_class, predicted_index = postprocessing(output, classes)

print("Predicted Class:", predicted_class)
print("Predicted Index:", predicted_index)

Predicted Class: studio couch
Predicted Index: 831
