In [1]:
from transformers import AutoTokenizer, AutoModel
from optimum.onnxruntime import ORTModelForFeatureExtraction
import torch
import torch.nn.functional as F
from tqdm.notebook import tqdm

In [3]:
tokenizer = AutoTokenizer.from_pretrained("./embedding_model_onnx")
model = ORTModelForFeatureExtraction.from_pretrained("./embedding_model_onnx")

In [27]:
encoded_input = tokenizer("I eat rice and fish", padding=True, truncation=True, return_tensors='pt')
model_output = model(**encoded_input)
token_embeddings = model_output[0]
attention_mask = encoded_input['attention_mask']

input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.shape).float()
sentence_embeddings = torch.sum(token_embeddings * input_mask_expanded, dim=1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1).squeeze().tolist()
sentence_embeddings

[0.04873916879296303,
 -0.0033005699515342712,
 0.04165685549378395,
 0.013030610047280788,
 0.00454717269167304,
 -0.01666136272251606,
 0.00416590366512537,
 0.008379707112908363,
 0.0415741503238678,
 -0.029379332438111305,
 0.022685937583446503,
 -0.06565552949905396,
 0.04701996594667435,
 -0.02536451816558838,
 0.013791220262646675,
 0.027440842241048813,
 -0.006900622509419918,
 0.026875711977481842,
 -0.040057942271232605,
 0.04048649221658707,
 0.02033095993101597,
 -0.010068368166685104,
 -0.015600644052028656,
 -0.02507505938410759,
 -0.015955261886119843,
 0.04058422893285751,
 -0.00997125543653965,
 0.06667803227901459,
 -0.04183228313922882,
 -0.010172998532652855,
 -0.024342017248272896,
 -0.008960658684372902,
 -0.0338151790201664,
 0.01572391390800476,
 -0.11281583458185196,
 0.015541750937700272,
 -0.021957436576485634,
 -0.03959431126713753,
 -0.0339304618537426,
 -0.02152763493359089,
 -0.01853657141327858,
 0.03966949135065079,
 -0.04733573645353317,
 0.04490980505