In [2]:
from sentence_transformers import SentenceTransformer
import os
import transformers
# print("CWD:", os.getcwd())
# print("Cache:", os.getenv("HF_HOME", "~/.cache/huggingface"))
# print("Transformers path:", transformers.__file__)


model = SentenceTransformer('intfloat/multilingual-e5-small')
input_texts = [
    "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 i     s 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or traini     ng for a marathon. Check out the chart below to see how much protein you should be eating each day.",
]


embeddings = model.encode(input_texts, normalize_embeddings=True)
print(embeddings.shape)
print(embeddings)



ModuleNotFoundError: No module named 'sentence_transformers'

In [None]:
import torch
from torch.fx import symbolic_trace
import torch.nn as nn
import executorch.exir as exir
from executorch.extension.pybindings.portable_lib import _load_for_executorch
from transformers import AutoModel, AutoTokenizer

class E5EmbeddingModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        token_embeddings = outputs.last_hidden_state
        mask = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * mask, dim=1)
        sum_mask = mask.sum(dim=1).clamp(min=1e-9)
        pooled = sum_embeddings / sum_mask
        return torch.nn.functional.normalize(pooled, p=2, dim=1)

# Create the complete model
complete_model = E5EmbeddingModel('intfloat/multilingual-e5-small')
complete_model.eval()


input_texts = [
    "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 i     s 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or traini     ng for a marathon. Check out the chart below to see how much protein you should be eating each day.",    
]

# 3. Tokenize the text
inputs = complete_model.tokenizer(
    input_texts,
    return_tensors="pt",
    padding="max_length",
    truncation=True,
    max_length=512
)
# tokenizer outputs a dictionary with input_ids and attention_mask
print("Tokenizer's output:")
for key, value in inputs.items():
    print(f"{key}: {value.shape}\n")


print ("Running the Pytorch Embeddings Neural Network program...")
print("\n\n")

print(inputs['input_ids'].dtype)         # Should be torch.int64
print(inputs['attention_mask'].dtype)    # Should be torch.int64
print(inputs['input_ids'].shape)         # e.g., torch.Size([1, 16])
print(inputs['attention_mask'].shape)    # Same

# 4. Generate embedding
with torch.no_grad():
    embedding = complete_model(inputs["input_ids"], inputs["attention_mask"])


# 5. Print or use the embedding
print("PYTORCH: Embedding shape:", embedding.shape)  # shape: (1, hidden_size)
print("PYTORCH: Embedding:", embedding)

print("Export-time input shape:", inputs['input_ids'].shape)
print("Export-time attention shape:", inputs['attention_mask'].shape)
#Export to ExecuTorch
with torch.no_grad():
    exported_program = torch.export.export(
        complete_model,
        (inputs['input_ids'], inputs['attention_mask'])
    )

# Print the exported program's graph
# print("Exported Program Graph:")
# print(exported_program.graph_module.graph)

edge_program = exir.to_edge(exported_program)
executorch_program = edge_program.to_executorch()


with open("e5_complete.pte", "wb") as f:
     executorch_program.write_to_file(f)


print ("Exported to ExecuTorch successfully!")
print ("Running the Executorch Neural Network program...")
print("\n\n")

print(inputs['input_ids'].dtype)         # Should be torch.int64
print(inputs['attention_mask'].dtype)    # Should be torch.int64
print(inputs['input_ids'].shape)         # e.g., torch.Size([1, 16])
print(inputs['attention_mask'].shape)    # Same



# Load model
model = _load_for_executorch("e5_complete.pte")

with torch.no_grad():
    embedding_et = model.forward((inputs['input_ids'], inputs['attention_mask']))[0]  # typically returns a tuple
# 5. Print or use the embedding
print("EXECUTORCH: Embedding shape:", embedding_et.shape)  # shape: (1, hidden_size)
print("PYTORCH: Embedding:", embedding_et)



In [None]:
import torch
from torch.fx import symbolic_trace
import torch.nn as nn
import executorch.exir as exir
from executorch.extension.pybindings.portable_lib import _load_for_executorch
from transformers import AutoModel, AutoTokenizer
import sys 
class E5EmbeddingModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        token_embeddings = outputs.last_hidden_state
        mask = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * mask, dim=1)
        sum_mask = mask.sum(dim=1).clamp(min=1e-9)
        pooled = sum_embeddings / sum_mask
        return torch.nn.functional.normalize(pooled, p=2, dim=1)

# Create the complete model
complete_model = E5EmbeddingModel('intfloat/multilingual-e5-small')
complete_model.eval()

input_texts = [
    "passage: test text for analysis",    
]

# Tokenize the text
inputs = complete_model.tokenizer(
    input_texts,
    return_tensors="pt",
    padding="max_length",
    truncation=True,
    max_length=512
)

print("=== ANALYZING OPERATORS NEEDED ===")

# Export to ExecuTorch and analyze operators
with torch.no_grad():
    exported_program = torch.export.export(
        complete_model,
        (inputs['input_ids'], inputs['attention_mask'])
    )

print("\n=== ALL EXPORTED PROGRAM OPERATORS ===")
# Print all operators used in the exported program
ops_used = set()
for node in exported_program.graph_module.graph.nodes:
    if node.op == 'call_function':
        ops_used.add(str(node.target))

# Sort and print all operators
sorted_ops = sorted(list(ops_used))
for i, op in enumerate(sorted_ops, 1):
    print(f"{i:2d}. {op}")

print(f"\nTotal unique operators: {len(ops_used)}")

print("\n=== CONVERTING TO EDGE ===")
edge_program = exir.to_edge(exported_program)

print("\n=== ALL EDGE PROGRAM OPERATORS ===")
# Print operators in edge program
edge_ops_used = set()
for node in edge_program.exported_program().graph_module.graph.nodes:
    if node.op == 'call_function':
        edge_ops_used.add(str(node.target))

# Sort and print all edge operators
sorted_edge_ops = sorted(list(edge_ops_used))
for i, op in enumerate(sorted_edge_ops, 1):
    print(f"{i:2d}. {op}")
    sys.stdout.flush()

print(f"\nTotal unique edge operators: {len(edge_ops_used)}")

print("\n=== CHECKING FOR PROBLEMATIC OPERATORS ===")
# Check for operators that might not be supported in portable kernels
problematic_ops = [
    'aten.scaled_dot_product_attention',
    'aten.gelu', 
    'aten.silu',
    'aten.baddbmm',
    'aten._native_batch_norm_legit',
    'aten.native_batch_norm',
    'aten.group_norm',
    'aten.instance_norm'
]

found_problematic = []
for op in sorted_edge_ops:
    for prob_op in problematic_ops:
        if prob_op in op:
            found_problematic.append(op)

if found_problematic:
    print("Found potentially problematic operators:")
    for op in found_problematic:
        print(f"  - {op}")
else:
    print("No obviously problematic operators found")

print("\n=== CONVERTING TO EXECUTORCH ===")
try:
    executorch_program = edge_program.to_executorch()
    print("SUCCESS: Model converted to ExecuTorch")
    
    # Try to save and load
    with open("analysis_test.pte", "wb") as f:
        executorch_program.write_to_file(f)
    
    print("SUCCESS: Model saved to file")
    
    # Try to load with ExecuTorch
    try:
        model = _load_for_executorch("analysis_test.pte")
        print("SUCCESS: Model loaded in ExecuTorch runtime")
        
        # Try to run
        try:
            with torch.no_grad():
                result = model.forward((inputs['input_ids'], inputs['attention_mask']))
            print("SUCCESS: Model executed successfully")
            print(f"Output shape: {result[0].shape}")
        except Exception as e:
            print(f"FAILED: Model execution failed: {e}")
            
    except Exception as e:
        print(f"FAILED: Model loading failed: {e}")
        
except Exception as e:
    print(f"FAILED: to_executorch() failed: {e}")

print("\n=== ANALYSIS COMPLETE ===")

In [None]:
#!/usr/bin/env python3
"""
Check what operators are available in Apple prebuilt ExecuTorch libraries
"""

import subprocess
import os
import sys

def run_command(cmd):
    """Run a shell command and return output"""
    try:
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        return result.stdout, result.stderr, result.returncode
    except Exception as e:
        return "", str(e), 1

def check_library_exists(lib_path):
    """Check if library file exists"""
    return os.path.exists(lib_path)

def analyze_library_symbols(lib_path, lib_name):
    """Analyze symbols in a library file"""
    print(f"\n=== ANALYZING {lib_name} ===")
    print(f"Path: {lib_path}")
    
    if not check_library_exists(lib_path):
        print(f"❌ Library not found: {lib_path}")
        return
    
    print(f"✅ Library found")
    
    # Get file info
    stdout, stderr, code = run_command(f"file '{lib_path}'")
    if code == 0:
        print(f"File type: {stdout.strip()}")
    
    # Get total symbol count
    stdout, stderr, code = run_command(f"nm '{lib_path}' 2>/dev/null | wc -l")
    if code == 0:
        print(f"Total symbols: {stdout.strip()}")
    
    # Check for specific operators we need
    operators_to_check = [
        'gelu',
        'layer_norm', 
        'bmm',
        'softmax',
        'addmm',
        'embedding'
    ]
    
    print(f"\nChecking for required operators:")
    found_operators = []
    
    for op in operators_to_check:
        stdout, stderr, code = run_command(f"nm '{lib_path}' 2>/dev/null | grep -i '{op}'")
        if stdout.strip():
            found_operators.append(op)
            print(f"  ✅ {op}: FOUND")
            # Show first few matches
            lines = stdout.strip().split('\n')[:3]
            for line in lines:
                print(f"    {line}")
        else:
            print(f"  ❌ {op}: NOT FOUND")
    
    # Check for aten namespace symbols
    stdout, stderr, code = run_command(f"nm '{lib_path}' 2>/dev/null | grep 'aten::' | head -5")
    if stdout.strip():
        print(f"\nSample aten:: symbols found:")
        for line in stdout.strip().split('\n'):
            print(f"  {line}")
    else:
        print(f"\n❌ No aten:: symbols found")
    
    # List object files in the archive
    stdout, stderr, code = run_command(f"ar -t '{lib_path}' | head -10")
    if code == 0 and stdout.strip():
        print(f"\nSample object files in archive:")
        for line in stdout.strip().split('\n'):
            print(f"  {line}")
    
    return found_operators

def check_specific_operators():
    """Check for the exact operators your model needs"""
    required_ops = [
        "aten.gelu.default",
        "aten.native_layer_norm.default", 
        "aten.bmm.default",
        "aten._softmax.default",
        "aten.addmm.default",
        "aten.embedding.default",
        "dim_order_ops._to_dim_order_copy.default"
    ]
    
    print(f"\n{'='*60}")
    print(f"OPERATORS REQUIRED BY YOUR MODEL:")
    print(f"{'='*60}")
    
    for i, op in enumerate(required_ops, 1):
        print(f"{i:2d}. {op}")
    
    return required_ops

def main():
    print("🔍 EXECUTORCH APPLE LIBRARY ANALYZER")
    print("="*50)
    
    # Define library paths
    libraries = {
        "ExecuTorch Main": "../../corecpp/third_party/prebuilt/executorch/apple/executorch.xcframework/macos-arm64/libexecutorch_macos.a",
        "Portable Kernels": "../../corecpp/third_party/prebuilt/executorch/apple/kernels_portable.xcframework/macos-arm64/libkernels_portable_macos.a", 
        "Optimized Kernels": "../../corecpp/third_party/prebuilt/executorch/apple/kernels_optimized.xcframework/macos-arm64/libkernels_optimized_macos.a"
    }
    
    all_found_operators = []
    
    # Analyze each library
    for lib_name, lib_path in libraries.items():
        found_ops = analyze_library_symbols(lib_path, lib_name)
        if found_ops:
            all_found_operators.extend(found_ops)
    
    # Show required operators
    required_ops = check_specific_operators()
    
    # Summary
    print(f"\n{'='*60}")
    print(f"SUMMARY:")
    print(f"{'='*60}")
    
    unique_found = list(set(all_found_operators))
    print(f"✅ Found operator types: {len(unique_found)}")
    for op in unique_found:
        print(f"   - {op}")
    
    print(f"\n❓ Required operator types: {len(required_ops)}")
    
    # Check coverage
    missing_ops = []
    for req_op in required_ops:
        found = False
        for found_op in unique_found:
            if found_op.lower() in req_op.lower():
                found = True
                break
        if not found:
            missing_ops.append(req_op)
    
    if missing_ops:
        print(f"\n❌ LIKELY MISSING OPERATORS:")
        for op in missing_ops:
            print(f"   - {op}")
        print(f"\n💡 RECOMMENDATION: Build ExecuTorch from source to get full operator support")
    else:
        print(f"\n✅ All required operator types appear to be available!")
        print(f"💡 The error might be due to a different issue (memory, model format, etc.)")
    
    # Additional checks
    print(f"\n{'='*60}")
    print(f"ADDITIONAL DIAGNOSTICS:")
    print(f"{'='*60}")
    
    # Check if nm command is available
    stdout, stderr, code = run_command("which nm")
    if code != 0:
        print("❌ 'nm' command not found. Install Xcode Command Line Tools:")
        print("   xcode-select --install")
    else:
        print("✅ 'nm' command available")
    
    # Check if ar command is available  
    stdout, stderr, code = run_command("which ar")
    if code != 0:
        print("❌ 'ar' command not found")
    else:
        print("✅ 'ar' command available")

if __name__ == "__main__":
    main()

In [None]:
#!/usr/bin/env python3
"""
Check ExecuTorch version and build details
"""

import executorch
import torch
import sys

print("=== EXECUTORCH VERSION INFORMATION ===")

# Check ExecuTorch version
try:
    print(f"ExecuTorch version: {executorch.__version__}")
except AttributeError:
    print("ExecuTorch version: Unknown (no __version__ attribute)")

# Check if we can import key modules
try:
    from executorch.exir import to_edge
    print("✅ executorch.exir module available")
except ImportError as e:
    print(f"❌ executorch.exir import failed: {e}")

try:
    from executorch.extension.pybindings.portable_lib import _load_for_executorch
    print("✅ executorch.extension.pybindings.portable_lib available")
except ImportError as e:
    print(f"❌ portable_lib import failed: {e}")

# Check PyTorch version
print(f"PyTorch version: {torch.__version__}")

# Check installation path
print(f"ExecuTorch installed at: {executorch.__file__}")

# Try to get git commit if available
try:
    import subprocess
    import os
    
    # Check if we're in a git repo
    executorch_path = os.path.dirname(executorch.__file__)
    result = subprocess.run(['git', 'rev-parse', 'HEAD'], 
                          cwd=executorch_path, 
                          capture_output=True, 
                          text=True)
    if result.returncode == 0:
        print(f"Git commit: {result.stdout.strip()}")
    else:
        print("Not in a git repository or git not available")
except:
    print("Could not determine git commit")

print("\n=== SOURCE CODE VERSION ===")
# Check the source code version
try:
    source_path = "../../corecpp/third_party/executorch"
    result = subprocess.run(['git', 'rev-parse', 'HEAD'], 
                          cwd=source_path, 
                          capture_output=True, 
                          text=True)
    if result.returncode == 0:
        print(f"Source git commit: {result.stdout.strip()}")
        
        # Check if it's on branch 0.6.0
        result = subprocess.run(['git', 'branch', '--show-current'], 
                              cwd=source_path, 
                              capture_output=True, 
                              text=True)
        if result.returncode == 0:
            print(f"Source branch: {result.stdout.strip()}")
    else:
        print("Source: Not in a git repository")
except Exception as e:
    print(f"Could not check source version: {e}")

print("\n=== TESTING BASIC EXPORT ===")
# Try a simple export to see if it works
try:
    import torch
    from executorch.exir import to_edge
    
    class SimpleModel(torch.nn.Module):
        def forward(self, x, y):
            return x + y
    
    model = SimpleModel()
    example_inputs = (torch.ones(2, 2), torch.ones(2, 2))
    
    # Export
    exported_program = torch.export.export(model, example_inputs)
    edge_program = to_edge(exported_program)
    executorch_program = edge_program.to_executorch()
    
    # Save test model
    with open("version_test.pte", "wb") as f:
        executorch_program.write_to_file(f)
    
    print("✅ Simple export successful - created version_test.pte")
    
    # Try to load it
    from executorch.extension.pybindings.portable_lib import _load_for_executorch
    test_model = _load_for_executorch("version_test.pte")
    result = test_model.forward(example_inputs)
    print("✅ Python runtime execution successful")
    
except Exception as e:
    print(f"❌ Export/execution failed: {e}")
    import traceback
    traceback.print_exc()