In [1]:
## Connect to gg driver
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/MyDrive/AI Tutor/CV/Topic2: Object Classification/day06

/content/drive/.shortcut-targets-by-id/1g_hBCGxmI5lTFXyvD-igJcroxCjDzObt/AI Tutor/CV/Topic2: Object Classification/day06


### Common function usage


In [3]:
%%capture
!pip install onnxruntime
!pip install onnxruntime-gpu
!pip install netron
!pip install onnx
!pip install pyngrok

In [4]:
import torch
import onnxruntime
import numpy as np
import time
def load_onnx_model(path_onnx, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']):
    # Create an ONNX Runtime inference session for the ONNX model
    ort_session = onnxruntime.InferenceSession(
        path_onnx,
        providers=providers
        )
    return ort_session

def onnx_infer(ort_session, input_data):
    ort_inputs = {ort_session.get_inputs()[0].name: input_data}
    ort_output = ort_session.run(None, ort_inputs)
    return ort_output

## Estimate perfomances

In [5]:
model = torch.load("models/torch/resnet18.pth")
onnx_float16 = load_onnx_model('models/onnx/vgg19_float16bit.onnx')
onnx_float32 = load_onnx_model('models/onnx/vgg19_float32bit.onnx')

In [6]:
list_batch_size = [2,4,8,16,32,64]
n_times = 100
total_time = 0
for batch_size in list_batch_size:
    input_data = torch.randn(batch_size, 3, 224, 224)
    for _ in range(n_times):
        start = time.time()
        input_16bit = input_data.half()
        input_numpy = input_16bit.numpy()
        onnx_infer(onnx_float16, input_numpy)
        total_time+= time.time() - start
    print(f"Batch size {batch_size}: {total_time/100*1000:.4f} ms")

Batch size 2: 12.4135 ms
Batch size 4: 18.5043 ms
Batch size 8: 26.2775 ms
Batch size 16: 40.5384 ms
Batch size 32: 61.6622 ms
Batch size 64: 104.7756 ms


In [7]:
list_batch_size = [2,4,8,16,32,64]
n_times = 100
total_time = 0
for batch_size in list_batch_size:
    input_data = torch.randn(batch_size, 3, 224, 224)
    for _ in range(n_times):
        start = time.time()
        input_numpy = input_data.numpy()
        onnx_infer(onnx_float32, input_numpy)
        total_time+= time.time() - start
    print(f"Batch size {batch_size}: {total_time/100*1000:.4f} ms")

Batch size 2: 6.1749 ms
Batch size 4: 15.2343 ms
Batch size 8: 27.6548 ms
Batch size 16: 43.9141 ms
Batch size 32: 72.5729 ms
Batch size 64: 125.6837 ms


In [8]:
list_batch_size = [2,4,8,16,32,64]
n_times = 100
total_time = 0
model.to('cuda')
with torch.no_grad():
    for batch_size in list_batch_size:
        input_data = torch.randn(batch_size, 3, 224, 224)
        for _ in range(n_times):
            start = time.time()
            model(input_data.to('cuda'))
            total_time+= time.time() - start
        print(f"Batch size {batch_size}: {total_time/100*1000:.4f} ms")

Batch size 2: 3.1616 ms
Batch size 4: 8.5229 ms
Batch size 8: 18.4456 ms
Batch size 16: 33.9361 ms
Batch size 32: 63.1430 ms
Batch size 64: 120.2191 ms


## Estimate time infer in CPU

In [9]:
model = torch.load("models/torch/resnet18.pth").cpu()
onnx_float16 = load_onnx_model('models/onnx/vgg19_float16bit.onnx', ['CPUExecutionProvider'])
onnx_float32 = load_onnx_model('models/onnx/vgg19_float32bit.onnx', ['CPUExecutionProvider'])

In [10]:
list_batch_size = [2,4,8,16,32,64]
n_times = 10
total_time = 0
for batch_size in list_batch_size:
    input_data = torch.randn(batch_size, 3, 224, 224)
    for _ in range(n_times):
        start = time.time()
        input_16bit = input_data.half()
        input_numpy = input_16bit.numpy()
        onnx_infer(onnx_float16, input_numpy)
        total_time+= time.time() - start
    print(f"Batch size {batch_size}: {total_time/n_times*1000:.4f} ms")

Batch size 2: 114.1074 ms
Batch size 4: 314.1515 ms
Batch size 8: 733.3618 ms
Batch size 16: 1664.7996 ms
Batch size 32: 3378.2198 ms
Batch size 64: 6863.7208 ms


In [11]:
list_batch_size = [2,4,8,16,32,64]
n_times = 10
total_time = 0
for batch_size in list_batch_size:
    input_data = torch.randn(batch_size, 3, 224, 224)
    for _ in range(n_times):
        start = time.time()
        input_numpy = input_data.numpy()
        onnx_infer(onnx_float32, input_numpy)
        total_time+= time.time() - start
    print(f"Batch size {batch_size}: {total_time/n_times*1000:.4f} ms")

Batch size 2: 113.3912 ms
Batch size 4: 321.8349 ms
Batch size 8: 627.6034 ms
Batch size 16: 1113.3701 ms
Batch size 32: 2289.8710 ms
Batch size 64: 4464.4668 ms


In [12]:
list_batch_size = [2,4,8,16,32,64]
n_times = 10
total_time = 0
model.to('cpu')
with torch.no_grad():
    for batch_size in list_batch_size:
        input_data = torch.randn(batch_size, 3, 224, 224)
        for _ in range(n_times):
            start = time.time()
            model(input_data.to('cpu'))
            total_time+= time.time() - start
        print(f"Batch size {batch_size}: {total_time/n_times*1000:.4f} ms")

Batch size 2: 106.6412 ms
Batch size 4: 386.0994 ms
Batch size 8: 830.8327 ms
Batch size 16: 1551.9976 ms
Batch size 32: 3491.1518 ms
Batch size 64: 7336.3019 ms
