In [None]:
import random
from pathlib import Path

import matplotlib.pyplot as plt
from transformers import AutoImageProcessor, AutoModel
from PIL import Image

MODELS_DIR = Path("../infra/models")
IMAGES_DATA_TRAIN = Path("../infra/images/coco-2017/train/data")
assert IMAGES_DATA_TRAIN.exists()
extensions = {filename.suffix for filename in IMAGES_DATA_TRAIN.glob("*.*")}
assert extensions == {".jpg"}
IMAGES = list(IMAGES_DATA_TRAIN.glob("*.jpg"))

In [None]:
image = Image.open(random.choice(IMAGES))

processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
model = AutoModel.from_pretrained('facebook/dinov2-base').to("cuda")

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs.to("cuda"))
last_hidden_states = outputs.last_hidden_state

In [None]:
import torch

dummy_input = torch.randn(1, 3, 224, 224)  # Adjust dimensions as needed
model_path = MODELS_DIR / "dinov2/1/model.onnx"

# Export the model to ONNX
torch.onnx.export(
    model,
    dummy_input.to("cuda"),
    str(model_path),
    input_names=['input'],
    output_names=['last_hidden_state', 'pooler_output'],  # The name of the output node
)

In [None]:
import onnxruntime as ort
import torchvision

def preprocess_image(image_path):
    # Load the image
    image = Image.open(image_path).convert("RGB")
    
    # Resize the image to the expected input size of the model
    image = image.resize((224, 224))  # Adjust the size as needed
    
    # Convert the image to a PyTorch tensor
    image_tensor = torchvision.transforms.ToTensor()(image)
    
    # Normalize the image using the mean and standard deviation values expected by the model
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image_tensor = torchvision.transforms.Normalize(mean, std)(image_tensor)
    
    # Convert to numpy array if needed
    pixel_values = image_tensor.to("cpu").squeeze().numpy()
    
    return pixel_values

session = ort.InferenceSession(model_path)

result = session.run(None, {"input": preprocess_image(random.choice(IMAGES))[None, ...]})
result[0].shape

In [None]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img = inputs['pixel_values'].to("cpu").squeeze().numpy().transpose(1, 2, 0)
img = (img * std + mean)
plt.imshow(img)

In [None]:
from ultralytics import YOLO
import shutil

model_path = MODELS_DIR / "yolo/1/model.onnx"

model = YOLO(MODELS_DIR / "yolo/1/yolov8x.pt")
model.export(format="onnx", dynamic=True, opset=17)
shutil.move(model_path.parent / "yolov8x.onnx", model_path)

In [None]:
import onnxruntime as ort
import torch
import torchvision

session = ort.InferenceSession(model_path)

In [None]:
input = torchvision.transforms.ToTensor()(image)
input = torchvision.transforms.Resize((640, 640))(input)[None, ...]

In [None]:

result = session.run(None, {"images": input.numpy()})
result[0].shape