In [3]:
import tritonclient.grpc as grpcclient
import numpy as np

# Define the server URL and model name
url = "localhost:8004"  # This is the default gRPC port for Triton
model_name = "detection_model_1"  # Replace with your model's name

# Create a Triton client
client = grpcclient.InferenceServerClient(url=url)

# Check server health
print("Is the server live?", client.is_server_live())
print("Is the model ready?", client.is_model_ready(model_name))

# Prepare the input data (e.g., image data)
# Here we simulate a 3x512x512 input tensor
input_data = np.random.rand(1, 3, 512, 512).astype(np.float32)

# Create Triton input
inputs = [
    grpcclient.InferInput("images", input_data.shape, "FP32")
]
inputs[0].set_data_from_numpy(input_data)

# Define the output
outputs = [
    grpcclient.InferRequestedOutput("output1")
]

# Perform inference
response = client.infer(model_name=model_name, inputs=inputs, outputs=outputs)

# Get the output data
output_data = response.as_numpy("output1")

# Display the output
print("Inference result:", output_data.shape)


Is the server live? True
Is the model ready? True
Inference result: (1, 5, 5376)


In [16]:
np.where(output_data[:,-1,:]>0.35)

(array([0, 0]), array([5203, 5219]))

In [17]:
output_data[:,:,[5203, 5219]]

array([[[1.0799946e+02, 1.1105086e+02],
        [2.2697418e+02, 2.2704559e+02],
        [2.1463503e+02, 2.1988284e+02],
        [4.5843933e+02, 4.8306689e+02],
        [3.9507312e-01, 3.9106569e-01]]], dtype=float32)