In [1]:
import tensorrt as trt
import pycuda.driver as cuda
import torchvision.transforms as transforms
import pycuda.autoinit  # initializes CUDA driver
import numpy as np
import time
import torch
import cv2 as cv

In [2]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

# 1. Load the engine
with open("checkpoints/arcface-r100-glint360k_fp16.engine", "rb") as f:
    runtime = trt.Runtime(TRT_LOGGER)
    engine = runtime.deserialize_cuda_engine(f.read())

# 2. Create context
context = engine.create_execution_context()

# 3. Allocate buffers
inputs, outputs, bindings, stream = [], [], [], cuda.Stream()

for i in range(engine.num_io_tensors):
    tensor_name = engine.get_tensor_name(i)

    size = trt.volume(engine.get_tensor_shape(tensor_name))
    dtype = trt.nptype(engine.get_tensor_dtype(tensor_name))

    host_mem = cuda.pagelocked_empty(size, dtype)
    device_mem = cuda.mem_alloc(host_mem.nbytes)

    bindings.append(int(device_mem))
    if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
        inputs.append((host_mem, device_mem))
    else:
        outputs.append((host_mem, device_mem))

In [3]:
def infer(input_numpy):
    context.set_input_shape("input", input_numpy.shape)
    # Copy input data to host buffer
    np.copyto(inputs[0][0], input_numpy.ravel())

    # Transfer to GPU
    cuda.memcpy_htod_async(inputs[0][1], inputs[0][0], stream)

    # Execute
    context.execute_v2(bindings)

    # Transfer outputs back
    cuda.memcpy_dtoh_async(outputs[0][0], outputs[0][1], stream)
    stream.synchronize()
    return outputs

In [4]:
def normalize(img):
    # img: (C, H, W)
    mean = np.array([0.5, 0.5, 0.5], dtype=np.float32).reshape(3, 1, 1)
    std = np.array([0.5, 0.5, 0.5], dtype=np.float32).reshape(3, 1, 1)
    return (img - mean) / std

In [28]:
def measure_inference_time(repetitions=100):
    input_tensor = np.random.randn(repetitions, 3, 112, 112).astype(np.float32)

    # warmup
    for i in range(10):
        _ = infer(input_tensor[0:10])

    start = time.time()
    for i in range(repetitions):
        _ = infer(input_tensor[0:10])
    end = time.time()

    avg_time = (end - start) / repetitions
    return avg_time

print("Avg inference time:", measure_inference_time() * 1000, "ms")


Avg inference time: 6.216003894805908 ms


In [5]:
from src.constants import QDRANT_PORT, QDRANT_HOST
from qdrant_client import QdrantClient
from qdrant_client.http import models
import os

collection_name = "faces"
client = QdrantClient(QDRANT_HOST, grpc_port=QDRANT_PORT)

In [12]:
for i, filename in enumerate( os.listdir("./testbench/img/faces")):
    name = filename.split(".")[0]
    image = cv.imread("./testbench/img/faces/"+ filename)
    cv.cvtColor(image, cv.COLOR_BGR2RGB, dst=image)
    image = np.transpose(cv.resize(image, (112,112)) / 127.5 - 1.0, (2,0,1)).astype(np.float32)
    vec = infer(image)
    vec = vec[0][0].tolist()

    client.upsert(
        collection_name=collection_name, 
        points=[models.PointStruct(id=i+1, vector=vec, payload={"name": name})])

In [None]:
def cosine(a, b):
    return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))

In [37]:
import onnxruntime as ort
import numpy as np

# Load model
session = ort.InferenceSession("checkpoints/model.onnx")

# Get input name
input_name = session.get_inputs()[0].name

# Example input (replace with real data)

# Run inference
def infer(x):
    return session.run(None, {input_name: np.expand_dims(x, axis=0)})

In [38]:
image = cv.imread("./testbench/img/faces/0.jpg")
cv.cvtColor(image, cv.COLOR_BGR2RGB, dst=image)
image = np.transpose(cv.resize(image, (112,112)) / 127.5 - 1.0, (2,0,1)).astype(np.float32)
vec = infer(image)
vec = vec[0][0]
# client.query_points(collection_name, vec)

In [48]:
image = cv.imread("./testbench/img/faces/amir.jpg")
cv.cvtColor(image, cv.COLOR_BGR2RGB, dst=image)
image = np.transpose(cv.resize(image, (112,112)) / 127.5 - 1.0, (2,0,1)).astype(np.float32)
vec1 = infer(image)
vec1 = vec1[0][0]
# client.query_points(collection_name, vec)

In [49]:
image.shape

(3, 112, 112)

In [41]:
cosine(vec, vec1)

0.1491871178150177

In [26]:
vec1

array([-6.94335938e-01, -8.62304688e-01, -1.18164062e+00, -3.05664062e-01,
       -1.21875000e+00, -5.81054688e-02, -1.13378906e+00, -9.06738281e-01,
       -5.72753906e-01,  1.40747070e-01,  2.54638672e-01,  6.98730469e-01,
        9.62402344e-01,  7.99316406e-01,  5.92773438e-01, -1.05102539e-01,
        2.05566406e-01, -1.27050781e+00,  2.84179688e-01, -1.65771484e-01,
       -9.03808594e-01,  6.68945312e-01, -1.18457031e+00,  2.39379883e-01,
        7.56835938e-01, -6.51855469e-01,  5.49316406e-01,  3.01757812e-01,
       -2.90771484e-01, -7.55615234e-02,  2.35156250e+00, -3.53271484e-01,
        2.92480469e-01,  5.76660156e-01,  4.74243164e-02, -5.79833984e-02,
       -1.99127197e-03,  9.35058594e-01, -2.26562500e-01,  1.01464844e+00,
        3.38867188e-01, -5.00976562e-01, -9.06250000e-01, -5.02929688e-02,
       -4.84130859e-01, -1.59765625e+00, -1.41845703e-01, -7.61718750e-01,
        7.56347656e-01,  1.90234375e+00,  1.27441406e+00, -6.23535156e-01,
       -1.38549805e-02,  