In [None]:
#REFERENCE EMBEDDING OUTPUT FROM ORIGINAL MODEL EXECUTION USING TRANSFORMERS LIBRARY
import torch
from transformers import AutoModel, AutoTokenizer
import torch.nn.functional as F
import numpy as np
# AD NOTE: I had to drop the sentence_transformers because of keras incompatibility 
# But I was able to get the same output as sentence-transformers by using the mean pooling method.
# Set print options
torch.set_printoptions(precision=8, sci_mode=False, linewidth=200, threshold=1000)
np.set_printoptions(precision=8, suppress=True, linewidth=200, threshold=1000)

# Load model and tokenizer
model_name = 'intfloat/multilingual-e5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()

input_texts = [
    "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or traini     ng for a marathon. Check out the chart below to see how much protein you should be eating each day.",
]

# Tokenize inputs
inputs = tokenizer(input_texts, padding=True, truncation=True, 
                  return_tensors='pt', max_length=512)

# Get embeddings
with torch.no_grad():
    outputs = model(**inputs)
    
    # Use mean pooling (attention-masked)
    token_embeddings = outputs.last_hidden_state
    attention_mask = inputs['attention_mask']
    
    # Apply attention mask and mean pool
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    
    # Normalize embeddings
    embeddings = F.normalize(embeddings, p=2, dim=1)

# Convert to numpy for same output format as sentence-transformers
embeddings_np = embeddings.numpy()

print(embeddings_np.shape)
print(embeddings_np)



(1, 384)
[[-0.02603657 -0.04028142 -0.0407016  -0.0567059   0.09808024 -0.00689326  0.00147932  0.04843688  0.11323299 -0.02740479 -0.00866384  0.0041722   0.05230619 -0.04768711 -0.06603136  0.08920389
   0.06334062 -0.0531108   0.00967995 -0.10728    -0.00384981 -0.02598385 -0.00927944  0.07551413  0.06361946  0.01656309  0.04170515  0.01730528  0.01455702 -0.04344152 -0.05670433 -0.04429421
   0.07144866 -0.03361619  0.04803946 -0.00959462 -0.08393569 -0.04850754  0.05855654 -0.05139397  0.01839359  0.05547391  0.00980077  0.04608278  0.02681234  0.07292694 -0.06347434  0.05774028
   0.00521451 -0.0223504  -0.04456337  0.06401621  0.0201432   0.04503602  0.07350688 -0.04566628 -0.01399929 -0.04260228 -0.08010492 -0.05667777  0.06421689 -0.0662206  -0.01281161  0.00306563
   0.06230233  0.06887282 -0.02185547  0.02037258 -0.06924744 -0.05492327 -0.05856651  0.04827426  0.02585801 -0.04206208  0.07226781 -0.00066223  0.02808696 -0.04768767 -0.02578073 -0.0346549
  -0.05184536 -0.02213

In [None]:
#TFLITE CONVERSION AND EXECUTION OF CONVERTED MODEL

import torch
from torch.fx import symbolic_trace
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
import numpy as np
torch.set_printoptions(precision=8, sci_mode=False, linewidth=200, threshold=1000)
np.set_printoptions(precision=8, suppress=True, linewidth=200, threshold=1000)
class E5EmbeddingModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        token_embeddings = outputs.last_hidden_state
        mask = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * mask, dim=1)
        sum_mask = mask.sum(dim=1).clamp(min=1e-9)
        pooled = sum_embeddings / sum_mask
        return torch.nn.functional.normalize(pooled, p=2, dim=1)

# Create the complete model
complete_model = E5EmbeddingModel('intfloat/multilingual-e5-small')
complete_model.eval()


input_texts = [
    "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 i     s 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or traini     ng for a marathon. Check out the chart below to see how much protein you should be eating each day.",    
]

# 3. Tokenize the text
inputs = complete_model.tokenizer(
    input_texts,
    return_tensors="pt",
    padding="max_length",
    truncation=True,
    max_length=512
)
# tokenizer outputs a dictionary with input_ids and attention_mask
print("Tokenizer's output:")
for key, value in inputs.items():
    print(f"{key}: {value.shape}\n")


print ("Running the Pytorch Embeddings Neural Network program...")
print("\n\n")

print(inputs['input_ids'].dtype)         # Should be torch.int64
print(inputs['attention_mask'].dtype)    # Should be torch.int64
print(inputs['input_ids'].shape)         # e.g., torch.Size([1, 16])
print(inputs['attention_mask'].shape)    # Same

# 4. Generate embedding
with torch.no_grad():
    embedding = complete_model(inputs["input_ids"], inputs["attention_mask"])


# 5. Print or use the embedding
print("PYTORCH: Embedding shape:", embedding.shape)  # shape: (1, hidden_size)
print("PYTORCH: Embedding:", embedding)

print("Export-time input shape:", inputs['input_ids'].shape)
print("Export-time attention shape:", inputs['attention_mask'].shape)
# #Export to ExecuTorch
# with torch.no_grad():
#     exported_program = torch.export.export(
#         complete_model,
#         (inputs['input_ids'], inputs['attention_mask'])
    )

# Print the exported program's graph
# print("Exported Program Graph:")
# print(exported_program.graph_module.graph)

#TODO: Convert to TFLite

print ("Exported to TFLite successfully!")
print ("Running the TFLite Neural Network program...")
print("\n\n")

print(inputs['input_ids'].dtype)         # Should be torch.int64
print(inputs['attention_mask'].dtype)    # Should be torch.int64
print(inputs['input_ids'].shape)         # e.g., torch.Size([1, 16])
print(inputs['attention_mask'].shape)    # Same


#TODO Run the TFLite model

print("EXECUTORCH: Embedding shape:", embedding_et.shape)  # shape: (1, hidden_size)
print("PYTORCH: Embedding:", embedding_et)

