In [2]:
!pip install -q sentence-transformers transformers torch

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m96.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m84.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
!pip install onnx


Collecting onnx
  Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.18.0


In [4]:
import torch
from transformers import AutoTokenizer, AutoModel
import os
from pathlib import Path

def convert_sentence_transformer_to_onnx():
    """Convert all-MiniLM-L6-v2 to ONNX format"""

    model_name = "sentence-transformers/all-MiniLM-L6-v2"

    print(f"Loading model: {model_name}")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    model.eval()

    # Create dummy input for export
    dummy_text = "This is a sample sentence for ONNX conversion"
    dummy_input = tokenizer(
        dummy_text,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=256
    )

    # Create output directory
    output_dir = Path("./onnx_model")
    output_dir.mkdir(exist_ok=True)

    # Export to ONNX
    output_path = output_dir / "model.onnx"

    print("Converting to ONNX...")
    torch.onnx.export(
        model,
        (dummy_input['input_ids'], dummy_input['attention_mask']),
        str(output_path),
        input_names=['input_ids', 'attention_mask'],
        output_names=['last_hidden_state'],
        dynamic_axes={
            'input_ids': {0: 'batch_size', 1: 'sequence'},
            'attention_mask': {0: 'batch_size', 1: 'sequence'},
            'last_hidden_state': {0: 'batch_size', 1: 'sequence'}
        },
        opset_version=14,
        verbose=False
    )

    # Save tokenizer files
    tokenizer.save_pretrained(str(output_dir))

    print(f"✅ Model converted successfully!")
    print(f"📁 Files saved to: {output_dir}/")

    # List created files
    for file in output_dir.iterdir():
        if file.is_file():
            print(f"   - {file.name}")

if __name__ == "__main__":
    convert_sentence_transformer_to_onnx()

# Zip the model folder for download
import shutil
shutil.make_archive("onnx_model", 'zip', "onnx_model")

from google.colab import files
files.download("onnx_model.zip")

Loading model: sentence-transformers/all-MiniLM-L6-v2
Converting to ONNX...
✅ Model converted successfully!
📁 Files saved to: onnx_model/
   - model.onnx
   - special_tokens_map.json
   - tokenizer.json
   - tokenizer_config.json
   - vocab.txt


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>