<a href="https://colab.research.google.com/github/Lednik7/CLIP-ONNX/blob/main/examples/RuCLIP_onnx_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Allowed Resources
import multiprocessing
import torch
from psutil import virtual_memory

ram_gb = round(virtual_memory().total / 1024**3, 1)

print('CPU:', multiprocessing.cpu_count())
print('RAM GB:', ram_gb)
print("PyTorch version:", torch.__version__)
print("CUDA version:", torch.version.cuda)
print("cuDNN version:", torch.backends.cudnn.version())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:", device.type)

!nvidia-smi

CPU: 2
RAM GB: 12.7
PyTorch version: 1.10.0+cu111
CUDA version: 11.1
cuDNN version: 8005
device: cuda
Wed Jan 19 22:10:10 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8     9W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                  

## Restart colab session after installation
Reload the session if something doesn't work

In [2]:
%%capture
!pip install git+https://github.com/Lednik7/CLIP-ONNX.git
!pip install ruclip==0.0.1rc7
!pip install onnxruntime-gpu

In [3]:
%%capture
!wget -c -O CLIP.png https://github.com/openai/CLIP/blob/main/CLIP.png?raw=true

In [4]:
import onnxruntime

# priority device (if available)
print(onnxruntime.get_device())

GPU


## RuCLIP
WARNING: specific RuCLIP like forward "model(text, image)" instead of classic(OpenAI CLIP) "model(image, text)"

In [1]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
import ruclip

# onnx cannot export with cuda
model, processor = ruclip.load("ruclip-vit-base-patch32-384", device="cpu")

In [3]:
from PIL import Image
import numpy as np

# simple input
pil_images = [Image.open("CLIP.png")]
labels = ['диаграмма', 'собака', 'кошка']
dummy_input = processor(text=labels, images=pil_images,
                        return_tensors='pt', padding=True)

# batch first
image = dummy_input["pixel_values"] # torch tensor [1, 3, 384, 384]
image_onnx = dummy_input["pixel_values"].cpu().detach().numpy().astype(np.float32)

# batch first
text = dummy_input["input_ids"] # torch tensor [3, 77]
text_onnx = dummy_input["input_ids"].cpu().detach().numpy()[::-1].astype(np.int64)

In [4]:
#RuCLIP output
logits_per_image, logits_per_text = model(text, image)
probs = logits_per_image.softmax(dim=-1).detach().cpu().numpy()

print("Label probs:", probs)  # prints: [[0.9885839  0.00894288 0.0024732 ]]

Label probs: [[0.9885839  0.00894288 0.0024732 ]]


## Convert RuCLIP model to ONNX

In [5]:
from clip_onnx import clip_onnx

visual_path = "clip_visual.onnx"
textual_path = "clip_textual.onnx"

onnx_model = clip_onnx(model, visual_path=visual_path, textual_path=textual_path)
onnx_model.convert2onnx(image, text, verbose=True)

[CLIP ONNX] Start convert visual model
[CLIP ONNX] Start check visual model
[CLIP ONNX] Start convert textual model
[CLIP ONNX] Start check textual model
[CLIP ONNX] Models converts successfully


## [ONNX] CPU inference mode

In [6]:
# ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
onnx_model.start_sessions(providers=["CPUExecutionProvider"]) # cpu mode

In [7]:
image_features = onnx_model.encode_image(image_onnx)
text_features = onnx_model.encode_text(text_onnx)

logits_per_image, logits_per_text = onnx_model(image_onnx, text_onnx)
probs = logits_per_image.softmax(dim=-1).detach().cpu().numpy()

print("Label probs:", probs)  # prints: Label probs: [[0.90831375 0.07174418 0.01994203]]

Label probs: [[0.90831375 0.07174418 0.01994203]]


In [8]:
%timeit onnx_model.encode_text(text_onnx) # text representation

1 loop, best of 5: 285 ms per loop


In [9]:
%timeit onnx_model.encode_image(image_onnx) # image representation

1 loop, best of 5: 412 ms per loop


## [ONNX] GPU inference mode

In [10]:
onnx_model.start_sessions(providers=["CUDAExecutionProvider"]) # cuda mode

In [11]:
%timeit onnx_model.encode_text(text_onnx) # text representation

The slowest run took 5.07 times longer than the fastest. This could mean that an intermediate result is being cached.
100 loops, best of 5: 6.89 ms per loop


In [12]:
%timeit onnx_model.encode_image(image_onnx) # image representation

The slowest run took 699.84 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 5: 18.9 ms per loop
