In [2]:
import numpy as np
import requests
import json

import numpy as np
import tritonclient.http as httpclient


In [2]:


# sample data
input_data = np.array([[2.5]], dtype=np.float32)

# Specify the model name and version
model_name = "linear_regression_model" #specified in config.pbtxt
model_version = "1"

# Set the inference URL based on the Triton server's address
url = f"http://localhost:8000/v2/models/{model_name}/versions/{model_version}/infer"

# payload with input params
payload = {
    "inputs": [
        {
            "name": "INPUT__0",  # what you named input in config.pbtxt
            "datatype": "FP32",  
            "shape": input_data.shape,
            "data": input_data.tolist(),
        }
    ]
}

# sample invoke
response = requests.post(url, data=json.dumps(payload))
response.raise_for_status()

# output result
inference_result = response.json()
output_data = np.array(inference_result["outputs"][0]["data"])
output_data

array([6.16193581])

In [3]:
inference_result

{'model_name': 'linear_regression_model',
 'model_version': '1',
 'outputs': [{'name': 'OUTPUT__0',
   'datatype': 'FP32',
   'shape': [1, 1],
   'data': [6.161935806274414]}]}

In [4]:

# setup triton inference client
client = httpclient.InferenceServerClient(url="localhost:8000")

In [5]:
# triton can infer the inputs from your config values
inputs = httpclient.InferInput("INPUT__0", input_data.shape, datatype="FP32")
inputs.set_data_from_numpy(input_data) #we set a numpy array in this case
inputs

<tritonclient.http._infer_input.InferInput at 0x71bb9e1c6590>

In [6]:
# output configuration
outputs = httpclient.InferRequestedOutput("OUTPUT__0")
outputs

<tritonclient.http._requested_output.InferRequestedOutput at 0x71bb9e1c6f50>

In [7]:
#sample inference
res = client.infer(model_name = "linear_regression_model", inputs=[inputs], outputs=[outputs], model_version="1")
inference_output = res.as_numpy('OUTPUT__0') #serialize numpy output
inference_output

array([[6.161936]], dtype=float32)

In [8]:
type(res)

tritonclient.http._infer_result.InferResult

In [9]:
%%time

for i in range(100):
    res = client.infer(model_name = "linear_regression_model", inputs=[inputs], outputs=[outputs], model_version="1")

CPU times: user 12.5 ms, sys: 2.01 ms, total: 14.5 ms
Wall time: 29.3 ms


In [10]:
client.get_server_metadata()

{'name': 'triton',
 'version': '2.37.0',
 'extensions': ['classification',
  'sequence',
  'model_repository',
  'model_repository(unload_dependents)',
  'schedule_policy',
  'model_configuration',
  'system_shared_memory',
  'cuda_shared_memory',
  'binary_tensor_data',
  'parameters',
  'statistics',
  'trace',
  'logging']}

In [11]:
client.get_model_metadata(model_name="linear_regression_model")

{'name': 'linear_regression_model',
 'versions': ['1'],
 'platform': 'pytorch_libtorch',
 'inputs': [{'name': 'INPUT__0', 'datatype': 'FP32', 'shape': [1, 1]}],
 'outputs': [{'name': 'OUTPUT__0', 'datatype': 'FP32', 'shape': [1, 1]}]}

## Sentiment Analysis

In [12]:
client = httpclient.InferenceServerClient(url="localhost:8000", verbose=True)

In [13]:
# Verify the new model metadata
metadata = client.get_model_metadata(model_name="sentiment")
print("Updated Model Outputs:")
for output in metadata['outputs']:
    print(f"  - {output['name']}: {output['datatype']} {output['shape']}")

GET /v2/models/sentiment, headers {}
<HTTPSocketPoolResponse status=200 headers={'Content-Type': 'application/json', 'Content-Length': '222'}>
bytearray(b'{"name":"sentiment","versions":["1"],"platform":"python","inputs":[{"name":"text","datatype":"BYTES","shape":[1]}],"outputs":[{"name":"label","datatype":"BYTES","shape":[1]},{"name":"score","datatype":"FP32","shape":[1]}]}')
Updated Model Outputs:
  - label: BYTES [1]
  - score: FP32 [1]


In [14]:
# Test the improved sentiment model with separate outputs
test_texts = [
    "I am super happy right now",
    "This is terrible and I hate it",
    "It's okay, nothing special",
    "Absolutely amazing experience!",
    "Worst thing ever"
]

print("Sentiment Analysis Results:\n")
print(f"{'Text':<40} {'Label':<10} {'Confidence'}")
print("-" * 70)

for text in test_texts:
    # Prepare input
    input_data = np.array([text], dtype=np.object_)
    infer_input = httpclient.InferInput("text", [1], "BYTES")
    infer_input.set_data_from_numpy(input_data)
    
    # Request both outputs
    outputs = [
        httpclient.InferRequestedOutput("label"),
        httpclient.InferRequestedOutput("score")
    ]
    
    # Run inference
    result = client.infer(model_name="sentiment", inputs=[infer_input], outputs=outputs)
    
    # Extract results - now it's super clean!
    label = result.as_numpy("label")[0].decode('utf-8')
    score = result.as_numpy("score")[0]
    
    print(f"{text:<40} {label:<10} {score:.1%}")

Sentiment Analysis Results:

Text                                     Label      Confidence
----------------------------------------------------------------------
POST /v2/models/sentiment/infer, headers {'Inference-Header-Content-Length': 210}
b'{"inputs":[{"name":"text","shape":[1],"datatype":"BYTES","parameters":{"binary_data_size":30}}],"outputs":[{"name":"label","parameters":{"binary_data":true}},{"name":"score","parameters":{"binary_data":true}}]}\x1a\x00\x00\x00I am super happy right now'
<HTTPSocketPoolResponse status=200 headers={'Content-Type': 'application/octet-stream', 'Inference-Header-Content-Length': '226', 'Content-Length': '241'}>
bytearray(b'{"model_name":"sentiment","model_version":"1","outputs":[{"name":"label","datatype":"BYTES","shape":[1],"parameters":{"binary_data_size":11}},{"name":"score","datatype":"FP32","shape":[1],"parameters":{"binary_data_size":4}}]}')
I am super happy right now               5 stars    84.3%
POST /v2/models/sentiment/infer, headers {'I

In [15]:
# Check the model metadata to see what inputs/outputs are expected
metadata = client.get_model_metadata(model_name="text_ensemble")
print("Model metadata:")
print(metadata)

GET /v2/models/text_ensemble, headers {}
<HTTPSocketPoolResponse status=200 headers={'Content-Type': 'application/json', 'Content-Length': '183'}>
bytearray(b'{"name":"text_ensemble","versions":["1"],"platform":"ensemble","inputs":[{"name":"TEXT","datatype":"BYTES","shape":[-1]}],"outputs":[{"name":"LABEL","datatype":"BYTES","shape":[-1]}]}')
Model metadata:
{'name': 'text_ensemble', 'versions': ['1'], 'platform': 'ensemble', 'inputs': [{'name': 'TEXT', 'datatype': 'BYTES', 'shape': [-1]}], 'outputs': [{'name': 'LABEL', 'datatype': 'BYTES', 'shape': [-1]}]}


In [16]:
client = httpclient.InferenceServerClient("localhost:8000")

# Prepare your text inputs
texts = [
    "I am SO UPSET!",
    "I am super happy!!",
    "Life is great!"
]

# Triton expects BYTES tensors for string inputs
input_data = np.array([t.encode("utf-8") for t in texts], dtype=object)

# Create an InferInput matching the ensembleâ€™s config.pbtxt
infer_input = httpclient.InferInput("TEXT", [len(texts)], "BYTES")
infer_input.set_data_from_numpy(input_data)

# Perform inference on your ensemble
response = client.infer(
    model_name="text_ensemble",
    inputs=[infer_input]
)

# Retrieve outputs
#   - If your ensemble outputs LABEL (postprocess stage)
#   - Or LOGITS if you skipped postprocess
try:
    labels = response.as_numpy("LABEL")
    print([l.decode("utf-8") for l in labels])
except KeyError:
    logits = response.as_numpy("LOGITS")

['NEGATIVE', 'POSITIVE', 'POSITIVE']


In [20]:
# test resnet50
client = httpclient.InferenceServerClient("localhost:8000")

image = np.random.randint(low=0, high=255, size=(1, 3, 224, 224))
image = image.astype(np.float32)

infer_input = httpclient.InferInput("INPUT__0", image.shape, "FP32")
infer_input.set_data_from_numpy(image)

response = client.infer(model_name="resnet50", inputs=[infer_input])

logits = response.as_numpy("OUTPUT__0")


In [21]:
print(logits.shape)

(1, 1000)


In [None]:
# test resnet_ensemble

client = httpclient.InferenceServerClient("localhost:8000")

image = np.random.randint(low=0, high=255, size=(3, 224, 224))
image = image.astype(np.uint8)
infer_input = httpclient.InferInput("INPUT__0", image.shape, "UINT8")
infer_input.set_data_from_numpy(image)
response = client.infer(model_name="resnet_ensemble", inputs=[infer_input])

# Get labels and decode from bytes to string
labels_bytes = response.as_numpy("LABELS")
labels = [label.decode('utf-8') for label in labels_bytes]
print(f"Predicted labels: {labels}")

Predicted labels: ['bucket']
