In [None]:
from angle_emb import Prompts
from sentence_transformers import SentenceTransformer
from scipy import spatial

model = SentenceTransformer("WhereIsAI/UAE-Large-V1").cuda()

qv = model.encode(Prompts.C.format(text='a black thing'))
doc_vecs = model.encode([
    'Title of Product: Original Replacement Dell 130W Laptop Charger USB C Slim AC Power Adapter for Dell Xps 17,Precision 5550 5530 2in1,XPS 15 2in1 9575，DA130PM170 HA130PM170 0K00F5 K00F5 0M0H25 M0H25 T4V18\nProduct Image Description: A black power bank, which is a portable charger used to charge electronic devices.\nProduct Category: Laptop Accessories',
    'Title of Product: Griffin Elevator Stand for Laptops - Lift Your Laptop to a Comfortable Viewing Height, Space Grey\nProduct Image Description: A laptop computer sitting on a stand or a docking station.\nProduct Category: Laptop Accessories'
])

for dv in doc_vecs:
    print(1 - spatial.distance.cosine(qv, dv))


In [None]:
doc_vecs

In [None]:
# Save to ONNX

In [None]:
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForFeatureExtraction

model_checkpoint = "WhereIsAI/UAE-Large-V1"
save_directory = "onnx/"

In [4]:
# Load a model from transformers and export it to ONNX
ort_model = ORTModelForFeatureExtraction.from_pretrained(model_checkpoint, export=True)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Save the onnx model and tokenizer
ort_model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

('onnx/tokenizer_config.json',
 'onnx/special_tokens_map.json',
 'onnx/vocab.txt',
 'onnx/added_tokens.json',
 'onnx/tokenizer.json')

In [None]:
# Load ONNX - get embeddings

In [59]:
import pandas as pd
df_products = pd.read_csv("final_products.csv")
result_array = df_products.apply(lambda row: f"Title of Product: {row['title']}\nProduct Image Description: {row['llava_generated_image_caption']}\nProduct Category: {row['category_name']}", axis=1).tolist()
result_array[:2]

['Title of Product: Original Replacement Dell 130W Laptop Charger USB C Slim AC Power Adapter for Dell Xps 17,Precision 5550 5530 2in1,XPS 15 2in1 9575，DA130PM170 HA130PM170 0K00F5 K00F5 0M0H25 M0H25 T4V18\nProduct Image Description: A black power bank, which is a portable charger used to charge electronic devices.\nProduct Category: Laptop Accessories',
 'Title of Product: Griffin Elevator Stand for Laptops - Lift Your Laptop to a Comfortable Viewing Height, Space Grey\nProduct Image Description: A laptop computer sitting on a stand or a docking station.\nProduct Category: Laptop Accessories']

In [61]:
from transformers import AutoTokenizer
from onnxruntime import InferenceSession



tokenizer = AutoTokenizer.from_pretrained("WhereIsAI/UAE-Large-V1")

outputs = []
inputs = [
    'Title of Product: Original Replacement Dell 130W Laptop Charger USB C Slim AC Power Adapter for Dell Xps 17,Precision 5550 5530 2in1,XPS 15 2in1 9575，DA130PM170 HA130PM170 0K00F5 K00F5 0M0H25 M0H25 T4V18\nProduct Image Description: A black power bank, which is a portable charger used to charge electronic devices.\nProduct Category: Laptop Accessories',
    'Title of Product: Griffin Elevator Stand for Laptops - Lift Your Laptop to a Comfortable Viewing Height, Space Grey\nProduct Image Description: A laptop computer sitting on a stand or a docking station.\nProduct Category: Laptop Accessories'
]

session = InferenceSession("onnx/model.onnx")

query_string = tokenizer(str("a black thing"), return_tensors="np")
query_embedding = session.run(output_names=["last_hidden_state"], input_feed=dict(query_string))

for item in inputs:
    input_tokens = tokenizer(str(item), return_tensors="np")
    # print(item)
    outputs.append(session.run(output_names=["last_hidden_state"], input_feed=dict(input_tokens)))



In [None]:
item_number = 0
outputs[item_number][0][0][0]

In [None]:
query_embedding[0][0][0]

In [64]:
from scipy import spatial

for dv in outputs:
    print(1 - spatial.distance.cosine(query_embedding[0][0][0], dv[0][0][0]))
    # print(dv[0][0][0])

0.5139134
0.5181762
