In [None]:
from sentence_transformers import SentenceTransformer
import os
import transformers
# print("CWD:", os.getcwd())
# print("Cache:", os.getenv("HF_HOME", "~/.cache/huggingface"))
# print("Transformers path:", transformers.__file__)


model = SentenceTransformer('intfloat/multilingual-e5-small')
input_texts = [
    "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 i     s 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or traini     ng for a marathon. Check out the chart below to see how much protein you should be eating each day.",
]


embeddings = model.encode(input_texts, normalize_embeddings=True)
print(embeddings.shape)
print(embeddings)



  from .autonotebook import tqdm as notebook_tqdm
W0604 12:12:14.369000 48724 site-packages/torch/distributed/elastic/multiprocessing/redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


CWD: /Users/arifdikici/Documents/Squirrel/LeafraSDK/sdk/utility/model_conversion
Cache: ~/.cache/huggingface
Model location:
Transformers path: /Users/arifdikici/.pyenv/versions/3.12.10/lib/python3.12/site-packages/transformers/__init__.py
{'prompts': {}, 'default_prompt_name': None, '_similarity_fn_name': None, 'trust_remote_code': False, 'truncate_dim': None, 'model_card_data': SentenceTransformerModelCardData(language=[], license=None, model_name=None, model_id=None, train_datasets=[], eval_datasets=[], task_name='semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more', tags=['sentence-transformers', 'sentence-similarity', 'feature-extraction'], generate_widget_examples='deprecated', base_model='intfloat/multilingual-e5-small', base_model_revision='c007d7ef6fd86656326059b28395a7a03a7c5846', non_default_hyperparameters={}, all_hyperparameters={}, eval_results_dict={}, training_logs=[], widget=[], predict_example=None, label_example_

In [None]:
import torch
from torch.fx import symbolic_trace
import torch.nn as nn
import executorch.exir as exir
from executorch.extension.pybindings.portable_lib import _load_for_executorch
from transformers import AutoModel, AutoTokenizer

class E5EmbeddingModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        token_embeddings = outputs.last_hidden_state
        mask = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * mask, dim=1)
        sum_mask = mask.sum(dim=1).clamp(min=1e-9)
        pooled = sum_embeddings / sum_mask
        return torch.nn.functional.normalize(pooled, p=2, dim=1)

# Create the complete model
complete_model = E5EmbeddingModel('intfloat/multilingual-e5-small')
complete_model.eval()


input_texts = [
    "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 i     s 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or traini     ng for a marathon. Check out the chart below to see how much protein you should be eating each day.",    
]

# 3. Tokenize the text
inputs = complete_model.tokenizer(
    input_texts,
    return_tensors="pt",
    padding="max_length",
    truncation=True,
    max_length=512
)
# tokenizer outputs a dictionary with input_ids and attention_mask
print("Tokenizer's output:")
for key, value in inputs.items():
    print(f"{key}: {value.shape}\n")


print ("Running the Pytorch Embeddings Neural Network program...")
print("\n\n")

print(inputs['input_ids'].dtype)         # Should be torch.int64
print(inputs['attention_mask'].dtype)    # Should be torch.int64
print(inputs['input_ids'].shape)         # e.g., torch.Size([1, 16])
print(inputs['attention_mask'].shape)    # Same

# 4. Generate embedding
with torch.no_grad():
    embedding = complete_model(inputs["input_ids"], inputs["attention_mask"])


# 5. Print or use the embedding
print("PYTORCH: Embedding shape:", embedding.shape)  # shape: (1, hidden_size)
print("PYTORCH: Embedding:", embedding)

print("Export-time input shape:", inputs['input_ids'].shape)
print("Export-time attention shape:", inputs['attention_mask'].shape)
#Export to ExecuTorch
with torch.no_grad():
    exported_program = torch.export.export(
        complete_model,
        (inputs['input_ids'], inputs['attention_mask'])
    )

# Print the exported program's graph
# print("Exported Program Graph:")
# print(exported_program.graph_module.graph)

edge_program = exir.to_edge(exported_program)
executorch_program = edge_program.to_executorch()


with open("e5_complete.pte", "wb") as f:
     executorch_program.write_to_file(f)


print ("Exported to ExecuTorch successfully!")
print ("Running the Executorch Neural Network program...")
print("\n\n")

print(inputs['input_ids'].dtype)         # Should be torch.int64
print(inputs['attention_mask'].dtype)    # Should be torch.int64
print(inputs['input_ids'].shape)         # e.g., torch.Size([1, 16])
print(inputs['attention_mask'].shape)    # Same



# Load model
model = _load_for_executorch("e5_complete.pte")

with torch.no_grad():
    embedding_et = model.forward((inputs['input_ids'], inputs['attention_mask']))[0]  # typically returns a tuple
# 5. Print or use the embedding
print("EXECUTORCH: Embedding shape:", embedding_et.shape)  # shape: (1, hidden_size)
print("PYTORCH: Embedding:", embedding_et)



  import pkg_resources


Tokenizer's output:
input_ids: torch.Size([1, 512])

attention_mask: torch.Size([1, 512])

Running the Pytorch Embeddings Neural Network program...



torch.int64
torch.int64
torch.Size([1, 512])
torch.Size([1, 512])
PYTORCH: Embedding shape: torch.Size([1, 384])
PYTORCH: Embedding: tensor([[-0.0277, -0.0381, -0.0398, -0.0576,  0.0945, -0.0044, -0.0012,  0.0480,
          0.1129, -0.0282, -0.0084,  0.0028,  0.0521, -0.0479, -0.0644,  0.0887,
          0.0600, -0.0555,  0.0106, -0.1062, -0.0007, -0.0265, -0.0085,  0.0753,
          0.0632,  0.0184,  0.0418,  0.0191,  0.0122, -0.0426, -0.0527, -0.0450,
          0.0724, -0.0334,  0.0465, -0.0118, -0.0824, -0.0484,  0.0593, -0.0503,
          0.0183,  0.0564,  0.0120,  0.0451,  0.0249,  0.0746, -0.0640,  0.0574,
          0.0025, -0.0241, -0.0417,  0.0636,  0.0223,  0.0485,  0.0758, -0.0490,
         -0.0148, -0.0472, -0.0805, -0.0555,  0.0634, -0.0657, -0.0106,  0.0055,
          0.0594,  0.0679, -0.0216,  0.0235, -0.0662, -0.0533, -0.05

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[program.cpp:136] InternalConsistency verification requested but not available


Exported to ExecuTorch successfully!
Running the Executorch Neural Network program...



torch.int64
torch.int64
torch.Size([1, 512])
torch.Size([1, 512])
EXECUTORCH: Embedding shape: torch.Size([1, 384])
PYTORCH: Embedding: tensor([[-0.0277, -0.0381, -0.0398, -0.0576,  0.0945, -0.0044, -0.0012,  0.0480,
          0.1129, -0.0282, -0.0084,  0.0028,  0.0521, -0.0479, -0.0644,  0.0887,
          0.0600, -0.0555,  0.0106, -0.1062, -0.0007, -0.0265, -0.0085,  0.0753,
          0.0632,  0.0184,  0.0418,  0.0191,  0.0122, -0.0426, -0.0527, -0.0450,
          0.0724, -0.0334,  0.0465, -0.0118, -0.0824, -0.0484,  0.0593, -0.0503,
          0.0183,  0.0564,  0.0120,  0.0451,  0.0249,  0.0746, -0.0640,  0.0574,
          0.0025, -0.0241, -0.0417,  0.0636,  0.0223,  0.0485,  0.0758, -0.0490,
         -0.0148, -0.0472, -0.0805, -0.0555,  0.0634, -0.0657, -0.0106,  0.0055,
          0.0594,  0.0679, -0.0216,  0.0235, -0.0662, -0.0533, -0.0586,  0.0497,
          0.0275, -0.0397,  0.0723,  0.0005,  

In [10]:
import sys
print("Python executable:", sys.executable)
print("sys.path:", sys.path)

Python executable: /Users/arifdikici/.pyenv/versions/3.12.10/bin/python
sys.path: ['/Users/arifdikici/.pyenv/versions/3.12.10/lib/python312.zip', '/Users/arifdikici/.pyenv/versions/3.12.10/lib/python3.12', '/Users/arifdikici/.pyenv/versions/3.12.10/lib/python3.12/lib-dynload', '', '/Users/arifdikici/.pyenv/versions/3.12.10/lib/python3.12/site-packages', '/var/folders/2v/7m66d7dj3q71w4mx7vh6xgf40000gn/T/tmp82gshfo_', '/Users/arifdikici/.pyenv/versions/3.12.10/lib/python3.12/site-packages/setuptools/_vendor']
