In [48]:
import tritonclient.http as httpclient
import numpy as np
from transformers import AutoTokenizer

# Initialize tokenizer and Triton client
tokenizer = AutoTokenizer.from_pretrained("WhereIsAI/UAE-Large-V1")
client = httpclient.InferenceServerClient(url="213.192.2.120:40196")

# Example input text
text = "a black thing"
inputs = tokenizer(text, return_tensors="np")

# Ensure inputs are explicitly converted to INT64
inputs["input_ids"] = inputs["input_ids"].astype(np.int64)
inputs["attention_mask"] = inputs["attention_mask"].astype(np.int64)

# Some models do not use token_type_ids, so ensure it exists and is INT64
if "token_type_ids" not in inputs:
    inputs["token_type_ids"] = np.zeros_like(inputs["input_ids"], dtype=np.int64)
else:
    inputs["token_type_ids"] = inputs["token_type_ids"].astype(np.int64)

# Triton requires token_type_ids along with input_ids and attention_mask
input_ids = httpclient.InferInput("input_ids", inputs["input_ids"].shape, "INT64")
attention_mask = httpclient.InferInput("attention_mask", inputs["attention_mask"].shape, "INT64")
token_type_ids = httpclient.InferInput("token_type_ids", inputs["token_type_ids"].shape, "INT64")

# Set data for inputs
input_ids.set_data_from_numpy(inputs["input_ids"])
attention_mask.set_data_from_numpy(inputs["attention_mask"])
token_type_ids.set_data_from_numpy(inputs["token_type_ids"])

# Define the output tensor
outputs = httpclient.InferRequestedOutput("last_hidden_state")

# Send inference request
response = client.infer("UAE-Large-V1", inputs=[input_ids, attention_mask, token_type_ids], outputs=[outputs])

# Print response
print(response.as_numpy("last_hidden_state")[0][0])


[-0.3409501  -0.7467628  -0.06714214 ...  0.0640513  -0.00992554
  0.11812457]


In [19]:
response.as_numpy("last_hidden_state").shape

(1, 32, 1024)

In [42]:
import tritonclient.http as httpclient
import numpy as np
from transformers import AutoTokenizer

# Initialize tokenizer and Triton client
tokenizer = AutoTokenizer.from_pretrained("WhereIsAI/UAE-Large-V1")

In [52]:
import pandas as pd
df_products = pd.read_csv("dataset/textual/complete/final_products.csv")
result_array = df_products.apply(lambda row: f"Title of Product: {row['title']}\nProduct Image Description: {row['llava_generated_image_caption']}\nProduct Category: {row['category_name']}", axis=1).tolist()
result_array[:2]

['Title of Product: Original Replacement Dell 130W Laptop Charger USB C Slim AC Power Adapter for Dell Xps 17,Precision 5550 5530 2in1,XPS 15 2in1 9575，DA130PM170 HA130PM170 0K00F5 K00F5 0M0H25 M0H25 T4V18\nProduct Image Description: A black power bank, which is a portable charger used to charge electronic devices.\nProduct Category: Laptop Accessories',
 'Title of Product: Griffin Elevator Stand for Laptops - Lift Your Laptop to a Comfortable Viewing Height, Space Grey\nProduct Image Description: A laptop computer sitting on a stand or a docking station.\nProduct Category: Laptop Accessories']

In [None]:
client = httpclient.InferenceServerClient(url="213.192.2.120:40196")
# Example input texts (Batch Size = 128)
texts = result_array[:128]

# Tokenize all inputs
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="np")

# Ensure input data is in INT64
inputs["input_ids"] = inputs["input_ids"].astype(np.int64)
inputs["attention_mask"] = inputs["attention_mask"].astype(np.int64)

if "token_type_ids" not in inputs:
    inputs["token_type_ids"] = np.zeros_like(inputs["input_ids"], dtype=np.int64)
else:
    inputs["token_type_ids"] = inputs["token_type_ids"].astype(np.int64)

# Triton input tensors
input_ids = httpclient.InferInput("input_ids", inputs["input_ids"].shape, "INT64")
attention_mask = httpclient.InferInput("attention_mask", inputs["attention_mask"].shape, "INT64")
token_type_ids = httpclient.InferInput("token_type_ids", inputs["token_type_ids"].shape, "INT64")

# Set data for the inputs
input_ids.set_data_from_numpy(inputs["input_ids"])
attention_mask.set_data_from_numpy(inputs["attention_mask"])
token_type_ids.set_data_from_numpy(inputs["token_type_ids"])

# Define the output tensor
outputs = httpclient.InferRequestedOutput("last_hidden_state")

# Send batched inference request
response = client.infer("UAE-Large-V1", inputs=[input_ids, attention_mask, token_type_ids], outputs=[outputs])

# Print response embeddings for each input
embeddings = response.as_numpy("last_hidden_state")
for i, text in enumerate(texts):
    print(f"{embeddings[i][0]}")


[-0.61314815 -0.907887    0.3607005  ... -0.17837471  0.14217992
 -0.47099596]
[-0.39419082 -0.7121229   0.25297365 ... -0.5527055  -0.31108326
 -1.112491  ]
[ 0.20883867 -0.9983984   0.3888584  ...  0.00119227  0.40656745
 -0.0418727 ]
[-0.0975924   0.13179001  0.38385504 ...  0.2698132   0.26328212
 -0.1391977 ]
[-0.05747946 -0.47024244  0.00462383 ... -0.5454299  -0.3143
 -0.7572694 ]
[-0.43295807 -1.3614261  -0.14244504 ... -0.31081048 -0.18380916
 -1.1935176 ]
[-0.6850639  -0.7084948  -0.1809722  ...  0.1180134  -0.19899745
 -0.59653825]
[ 0.14423041 -0.43330437 -0.1065068  ... -0.01727496 -0.7225023
 -0.37565914]
[-0.10993994 -1.0743271   0.12810062 ...  0.20471972  0.17565478
 -0.44025752]
[-0.54397696 -0.31881618  0.74187464 ... -1.0271527  -0.36887744
 -0.7572328 ]
[-0.3144735  -0.7858785  -0.01049728 ... -0.16123521 -0.08738542
 -0.22701284]
[ 0.05019723 -0.42346582 -0.1955241  ... -0.62749755  0.37207773
 -0.54100025]
[-0.6767123  -0.55413455  0.48771986 ...  0.09059568 -0.6