In [None]:
import io

import numpy as np
import torch
import tritonclient.http as httpclient
from PIL import Image

In [None]:
def inference_with_random_torch_image():
    client = httpclient.InferenceServerClient(url="localhost:8900")

    image_shape = (224, 224, 3)

    print(f"Creating random torch tensor with shape: {image_shape}")
    torch_image = torch.rand(image_shape, dtype=torch.float32)

    image_np = (torch_image * 255).byte().numpy().astype(np.uint8)
    pil_image = Image.fromarray(image_np)
    buffer = io.BytesIO()
    pil_image.save(buffer, format="JPEG")
    image_bytes = buffer.getvalue()
    image_input = np.frombuffer(image_bytes, dtype=np.uint8)

    print(f"Input image byte length: {image_input.shape[0]}")
    print(f"Data type: {image_input.dtype}")
    print(f"Value range: [{image_input.min()}, {image_input.max()}]")

    try:
        inputs = []
        batch_size = 1
        batched_input = image_input.reshape(1, -1)

        inputs.append(httpclient.InferInput("raw_image", batched_input.shape, "UINT8"))
        inputs[0].set_data_from_numpy(batched_input)

        outputs = []
        outputs.append(httpclient.InferRequestedOutput("predictions"))

        print(f"Input shape being sent: {batched_input.shape}")
        print(f"Batch size: {batch_size}")
        print("Running inference...")

        response = client.infer(model_name="ensemble_trt", inputs=inputs, outputs=outputs)

        predictions = response.as_numpy("predictions")

        print(f"Predictions shape: {predictions.shape}")
        print(f"Predictions dtype: {predictions.dtype}")
        print(f"First 10 predictions: {predictions.flatten()[:10]}")
        print(f"Last 10 predictions: {predictions.flatten()[-10:]}")

        return predictions

    except Exception as e:
        print(f"Inference failed: {e}")
        return None

In [51]:
probs = inference_with_random_torch_image()

Creating random torch tensor with shape: (224, 224, 3)
Input image byte length: 30743
Data type: uint8
Value range: [0, 255]
Input shape being sent: (1, 30743)
Batch size: 1
Running inference...
Predictions shape: (1, 589)
Predictions dtype: float32
First 10 predictions: [-0.06658936 -0.01670837  0.02011108 -0.13464355 -0.05551147  0.00735474
  0.00852966 -0.02246094  0.05340576 -0.08770752]
Last 10 predictions: [ 0.07836914  0.07098389  0.16088867 -0.10070801  0.01976013 -0.03790283
 -0.01708984 -0.06860352  0.07781982  0.00190735]


In [52]:
probs.argmax(-1)

array([360])