# OVOS Intent Classification — Multilingual Model
## Training, ONNX Export & Dependency-Free Inference

This notebook covers the full pipeline for the **complete multilingual** OVOS intents dataset:
1. **Train** classifiers on all languages using `model2vec`
2. **Export** trained classifiers to ONNX (embeddings + MLP head in one file)
3. **Inference** using only `onnxruntime` + `numpy` (no `model2vec`/`torch`/`tokenizers` needed)

**Dataset:** [`OpenVoiceOS/ovos-intents-train-latest`](https://huggingface.co/datasets/OpenVoiceOS/ovos-intents-train-latest)  
**Subset:** Full dataset — all 11 languages (en, de, it, ca, gl, es, fr, nl, da, eu, pt)  
**Task:** Multi-class intent classification (~160 intent labels)

---
## 0. Installs & Imports

In [1]:
# !pip install model2vec datasets torch onnx onnxruntime numpy

In [2]:
import gc
import re
import json
import datetime
import unicodedata
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
from datasets import load_dataset
from model2vec.train import StaticModelForClassification

  import pynvml  # type: ignore[import]


---
## 1. Configuration

In [3]:
DATASET_NAME = "OpenVoiceOS/ovos-intents-train-latest"
EXPORT_DIR = Path("exported_models_multilingual")
EXPORT_DIR.mkdir(parents=True, exist_ok=True)

# English baselines + the dedicated multilingual model
MODELS = [
    "Jarbas/m2v-256-LaBSE",
    "Jarbas/m2v-256-bert-base-multilingual-cased",
    "Jarbas/m2v-256-bge-m3",
    "Jarbas/m2v-256-paraphrase-multilingual-MiniLM-L12-v2",
    "minishlab/potion-multilingual-128M",
]

RESULTS = {}

---
## 2. Load Dataset (full multilingual)

The full dataset is ~11 MB on disk (~122 k rows).  
We stream each split into plain Python lists to keep memory usage predictable
and avoid double-buffering from the HF cache.

In [4]:
def stream_split(dataset_name: str, split: str) -> dict:
    """Stream a split into plain lists. Returns dict with sentence, label, lang."""
    sentences, labels, langs = [], [], []
    ds_stream = load_dataset(dataset_name, split=split, streaming=True)
    for row in ds_stream:
        sentences.append(row["sentence"])
        labels.append(row["label"])
        langs.append(row["lang"])
    return {"sentence": sentences, "label": labels, "lang": langs}


print("Streaming dataset splits ...")
train_data = stream_split(DATASET_NAME, "train")
val_data   = stream_split(DATASET_NAME, "validation")
test_data  = stream_split(DATASET_NAME, "test")

print(f"Train:      {len(train_data['sentence'])}")
print(f"Validation: {len(val_data['sentence'])}")
print(f"Test:       {len(test_data['sentence'])}")
print(f"Labels:     {len(set(train_data['label']))} unique")
print(f"Languages:  {sorted(set(train_data['lang']))}")
print(f"\nSample: sentence={train_data['sentence'][0]!r}  "
      f"label={train_data['label'][0]!r}  lang={train_data['lang'][0]!r}")

# Per-language breakdown
from collections import Counter
lang_counts = Counter(train_data["lang"])
print("\nTrain samples per language:")
for lang, count in sorted(lang_counts.items(), key=lambda x: -x[1]):
    print(f"  {lang}: {count:>6}")

Streaming dataset splits ...
Train:      96646
Validation: 24162
Test:       1370
Labels:     160 unique
Languages:  ['ca', 'da', 'de', 'en', 'es', 'eu', 'fr', 'gl', 'it', 'nl', 'pt']

Sample: sentence='quants llengües saps entendre?'  label='ovos-skill-diagnostics.openvoiceos:query_langs.intent'  lang='ca'

Train samples per language:
  ca:  34712
  gl:  13878
  en:   9515
  de:   8899
  it:   6617
  da:   5042
  nl:   4513
  pt:   4419
  es:   4141
  fr:   3632
  eu:   1278


---
## 3. Helpers

In [5]:
def parse_classification_report(report_str: str) -> dict:
    lines = [l.strip() for l in report_str.split("\n") if l.strip()]
    data = {}
    header_re = re.compile(r"precision\s+recall\s+f1-score\s+support")
    row_re = re.compile(
        r"^(?P<label>[\w\s\-\.:]+?)\s+"
        r"(?P<precision>\d\.\d+|\d)\s+"
        r"(?P<recall>\d\.\d+|\d)\s+"
        r"(?P<f1>\d\.\d+|\d)\s+"
        r"(?P<support>\d+)$"
    )
    acc_re = re.compile(r"^accuracy\s+(?P<precision>[\s\d\.]+)\s+(?P<support>\d+)$")
    header_found = False
    for line in lines:
        if header_re.search(line):
            header_found = True
            continue
        if not header_found:
            continue
        m = row_re.match(line)
        if m:
            label = m.group("label").strip()
            data[label] = {
                "precision": float(m.group("precision")),
                "recall": float(m.group("recall")),
                "f1-score": float(m.group("f1")),
                "support": int(m.group("support")),
            }
            continue
        m = acc_re.match(line)
        if m:
            data["accuracy"] = float(m.group("precision"))
            data["accuracy_support"] = int(m.group("support"))
    return data

---
## 4. ONNX Export Utilities

In [6]:
class OnnxExportWrapper(nn.Module):
    """Thin wrapper: takes padded input_ids, returns logits only."""

    def __init__(self, classifier: StaticModelForClassification):
        super().__init__()
        self.classifier = classifier

    def forward(self, input_ids: torch.Tensor) -> torch.Tensor:
        logits, _ = self.classifier(input_ids)
        return logits

In [7]:
def export_classifier_to_onnx(
    classifier: StaticModelForClassification,
    save_dir: Path,
) -> None:
    save_dir.mkdir(parents=True, exist_ok=True)
    classifier.eval()

    wrapper = OnnxExportWrapper(classifier)
    wrapper.eval()

    dummy_input = classifier.tokenize(["hello", "hello world"])
    print(f"  Dummy input shape: {dummy_input.shape}  (batch, max_seq_len)")

    onnx_path = save_dir / "classifier.onnx"
    torch.onnx.export(
        wrapper,
        (dummy_input,),
        str(onnx_path),
        export_params=True,
        opset_version=18,
        do_constant_folding=True,
        input_names=["input_ids"],
        output_names=["logits"],
        dynamic_axes={
            "input_ids": {0: "batch_size", 1: "seq_len"},
            "logits": {0: "batch_size"},
        },
    )

    config = {
        "labels": list(classifier.classes_),
        "pad_id": int(classifier.pad_id),
        "multilabel": bool(classifier.multilabel),
    }
    (save_dir / "config.json").write_text(json.dumps(config, indent=2))
    classifier.tokenizer.save(str(save_dir / "tokenizer.json"))

    print(f"  Exported: {onnx_path}")
    print(f"  Labels:   {len(config['labels'])} classes")
    print(f"  Pad ID:   {config['pad_id']}")
    print(f"  Files:    classifier.onnx, config.json, tokenizer.json")

---
## 5. Train & Export Loop

Each model is loaded, trained on the **full multilingual** train set, evaluated and exported to ONNX.

In [8]:
for model_name in MODELS:
    print(f"\n{'='*60}")
    print(f"Model: {model_name}")
    print(f"{'='*60}")

    classifier = StaticModelForClassification.from_pretrained(model_name=model_name)
    classifier.fit(
        max_epochs=25,
        X=train_data["sentence"],
        y=train_data["label"],
        X_val=val_data["sentence"],
        y_val=val_data["label"],
    )

    raw_report = classifier.evaluate(test_data["sentence"], test_data["label"])
    print(raw_report)
    parsed = parse_classification_report(raw_report)

    short_name = model_name.split("/")[-1]
    pipeline_name = f"ovos-intents-multilingual-{short_name}"
    pipeline = classifier.to_pipeline()
    pipeline.save_pretrained(pipeline_name)

    onnx_dir = EXPORT_DIR / short_name
    print(f"\nExporting to ONNX...")
    export_classifier_to_onnx(classifier, onnx_dir)

    RESULTS[model_name] = {
        "raw": raw_report,
        "parsed": parsed,
        "onnx_dir": str(onnx_dir),
    }

    # Free memory before next model
    del classifier, pipeline
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print("\nAll models trained and exported!")


Model: Jarbas/m2v-256-LaBSE


model.safetensors:   0%|          | 0.00/257M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/288 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

Your vectors are torch.float16 precision, converting to to torch.float32 to avoid compatibility issues.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2026-02-14 18:44:22.512859: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-14 18:44:22.877917: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 28.47it/s]


                                                                              precision    recall  f1-score   support

                                                   common_query:common_query       0.61      1.00      0.76        11
                                                                    ocp:play       0.71      1.00      0.83        10
                               ovos-skill-alerts.openvoiceos:AddListSubitems       1.00      0.88      0.93         8
                                  ovos-skill-alerts.openvoiceos:CalendarList       1.00      1.00      1.00         6
                                   ovos-skill-alerts.openvoiceos:CancelAlert       0.88      0.88      0.88         8
                              ovos-skill-alerts.openvoiceos:ChangeProperties       1.00      0.75      0.86         8
                                   ovos-skill-alerts.openvoiceos:CreateAlarm       1.00      1.00      1.00         8
                                   ovos-skill-alerts.op

  torch.onnx.export(


[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 7 of general pattern rewrite rules.
  Exported: exported_models_multilingual/m2v-256-LaBSE/classifier.onnx
  Labels:   160 classes
  Pad ID:   0
  Files:    classifier.onnx, config.json, tokenizer.json

Model: Jarbas/m2v-256-bert-base-multilingual-cased


model.safetensors:   0%|          | 0.00/61.2M [00:00<?, ?B/s]

'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 086c0e27-33f7-427e-8208-dab62735fdae)')' thrown while requesting HEAD https://huggingface.co/api/resolve-cache/models/Jarbas/m2v-256-bert-base-multilingual-cased/985a6ca9107b9272c3852e29d5fe282dc2e41511/README.md
Retrying in 1s [Retry 1/5].


README.md: 0.00B [00:00, ?B/s]

'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 6d7a26b9-6a37-459b-b5bb-4aa5aa630950)')' thrown while requesting HEAD https://huggingface.co/api/resolve-cache/models/Jarbas/m2v-256-bert-base-multilingual-cased/985a6ca9107b9272c3852e29d5fe282dc2e41511/config.json
Retrying in 1s [Retry 1/5].


config.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Your vectors are torch.float16 precision, converting to to torch.float32 to avoid compatibility issues.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type                         | Params | Mode 
-----------------------------------------------------------------------
0 | model         | StaticModelForClassification | 31.0 M | train
1 | loss_function | CrossEntropyLoss             | 0      | train
-----------------------------------------------------------------------
30.9 M    Trainable params
119 K     Non-trainable params
31.0 M    Total params
124.122   Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=25` reached.
100%|██████████| 2/2 [00:00<00:00, 40.49it/s]


                                                                              precision    recall  f1-score   support

                                                   common_query:common_query       0.55      1.00      0.71        11
                                                                    ocp:play       0.62      1.00      0.77        10
                               ovos-skill-alerts.openvoiceos:AddListSubitems       1.00      0.88      0.93         8
                                  ovos-skill-alerts.openvoiceos:CalendarList       1.00      1.00      1.00         6
                                   ovos-skill-alerts.openvoiceos:CancelAlert       1.00      0.88      0.93         8
                              ovos-skill-alerts.openvoiceos:ChangeProperties       1.00      1.00      1.00         8
                                   ovos-skill-alerts.openvoiceos:CreateAlarm       0.80      1.00      0.89         8
                                   ovos-skill-alerts.op

  torch.onnx.export(


[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 7 of general pattern rewrite rules.
  Exported: exported_models_multilingual/m2v-256-bert-base-multilingual-cased/classifier.onnx
  Labels:   160 classes
  Pad ID:   0
  Files:    classifier.onnx, config.json, tokenizer.json

Model: Jarbas/m2v-256-bge-m3


model.safetensors:   0%|          | 0.00/128M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/272 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

Your vectors are torch.float16 precision, converting to to torch.float32 to avoid compatibility issues.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type                         | Params | Mode 
-----------------------------------------------------------------------
0 | model         | StaticModelForClassification | 64.7 M | train
1 | loss_function | CrossEntropyLoss             | 0      | train
-----------------------------------------------------------------------
64.5 M    Trainable params
249 K     Non-trainable params
64.7 M    Total params
258.854   Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=25` reached.
100%|██████████| 2/2 [00:00<00:00, 34.63it/s]


                                                                              precision    recall  f1-score   support

                                                   common_query:common_query       0.44      1.00      0.61        11
                                                                    ocp:play       0.59      1.00      0.74        10
                               ovos-skill-alerts.openvoiceos:AddListSubitems       1.00      0.88      0.93         8
                                  ovos-skill-alerts.openvoiceos:CalendarList       1.00      0.83      0.91         6
                                   ovos-skill-alerts.openvoiceos:CancelAlert       1.00      0.88      0.93         8
                              ovos-skill-alerts.openvoiceos:ChangeProperties       1.00      0.88      0.93         8
                                   ovos-skill-alerts.openvoiceos:CreateAlarm       0.80      1.00      0.89         8
                                   ovos-skill-alerts.op

  torch.onnx.export(


[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 7 of general pattern rewrite rules.
  Exported: exported_models_multilingual/m2v-256-bge-m3/classifier.onnx
  Labels:   160 classes
  Pad ID:   112632
  Files:    classifier.onnx, config.json, tokenizer.json

Model: Jarbas/m2v-256-paraphrase-multilingual-MiniLM-L12-v2


model.safetensors:   0%|          | 0.00/128M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

Your vectors are torch.float16 precision, converting to to torch.float32 to avoid compatibility issues.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type                         | Params | Mode 
-----------------------------------------------------------------------
0 | model         | StaticModelForClassification | 64.7 M | train
1 | loss_function | CrossEntropyLoss             | 0      | train
-----------------------------------------------------------------------
64.5 M    Trainable params
249 K     Non-trainable params
64.7 M    Total params
258.854   Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 32.09it/s]


                                                                              precision    recall  f1-score   support

                                                   common_query:common_query       0.50      0.91      0.65        11
                                                                    ocp:play       0.67      1.00      0.80        10
                               ovos-skill-alerts.openvoiceos:AddListSubitems       1.00      0.88      0.93         8
                                  ovos-skill-alerts.openvoiceos:CalendarList       1.00      1.00      1.00         6
                                   ovos-skill-alerts.openvoiceos:CancelAlert       1.00      0.88      0.93         8
                              ovos-skill-alerts.openvoiceos:ChangeProperties       1.00      1.00      1.00         8
                                   ovos-skill-alerts.openvoiceos:CreateAlarm       0.89      1.00      0.94         8
                                   ovos-skill-alerts.op

  torch.onnx.export(


[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 7 of general pattern rewrite rules.
  Exported: exported_models_multilingual/m2v-256-paraphrase-multilingual-MiniLM-L12-v2/classifier.onnx
  Labels:   160 classes
  Pad ID:   112632
  Files:    classifier.onnx, config.json, tokenizer.json

Model: minishlab/potion-multilingual-128M


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type                         | Params | Mode 
-----------------------------------------------------------------------
0 | model         | StaticModelForClassification | 129 M  | train
1 | loss_function | CrossEntropyLoss             | 0      | train
-----------------------------------------------------------------------
128 M     Trainable params
500 K     Non-trainable params
129 M     Total params
517.219   Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/miro/PycharmProjects/wakeHuBert/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 35.47it/s]


                                                                              precision    recall  f1-score   support

                                                   common_query:common_query       0.69      1.00      0.81        11
                                                                    ocp:play       0.67      1.00      0.80        10
                               ovos-skill-alerts.openvoiceos:AddListSubitems       1.00      0.88      0.93         8
                                  ovos-skill-alerts.openvoiceos:CalendarList       1.00      1.00      1.00         6
                                   ovos-skill-alerts.openvoiceos:CancelAlert       1.00      1.00      1.00         8
                              ovos-skill-alerts.openvoiceos:ChangeProperties       1.00      1.00      1.00         8
                                   ovos-skill-alerts.openvoiceos:CreateAlarm       0.89      1.00      0.94         8
                                   ovos-skill-alerts.op

  torch.onnx.export(


[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `OnnxExportWrapper([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 7 of general pattern rewrite rules.
  Exported: exported_models_multilingual/potion-multilingual-128M/classifier.onnx
  Labels:   160 classes
  Pad ID:   0
  Files:    classifier.onnx, config.json, tokenizer.json

All models trained and exported!


---
## 6. Results Summary

In [9]:
print(f"{'Model':<50} {'Accuracy':>10} {'F1 (macro)':>12}")
print("-" * 75)
for model_name, result in RESULTS.items():
    parsed = result["parsed"]
    acc = parsed.get("accuracy", 0)
    macro_f1 = parsed.get("macro avg", {}).get("f1-score", 0)
    print(f"{model_name:<50} {acc:>10.4f} {macro_f1:>12.4f}")

Model                                                Accuracy   F1 (macro)
---------------------------------------------------------------------------
Jarbas/m2v-256-LaBSE                                   0.9700       0.9700
Jarbas/m2v-256-bert-base-multilingual-cased            0.9500       0.9500
Jarbas/m2v-256-bge-m3                                  0.9400       0.9500
Jarbas/m2v-256-paraphrase-multilingual-MiniLM-L12-v2     0.9500       0.9600
minishlab/potion-multilingual-128M                     0.9700       0.9700


---
## 6b. Per-Language Accuracy Breakdown (ONNX)

This evaluates the best model (last in the list — `potion-multilingual-128M`)
per language on the test set, so you can see where the multilingual model
excels and where it struggles.

In [17]:
# We'll use the ONNX classifier from the last (multilingual) model for this
best_model = MODELS[-1]
best_onnx_dir = EXPORT_DIR / best_model.split("/")[-1]

# (Defined later in Part 2, but let's peek ahead and use it here if available)
try:
    clf_multi = OnnxClassifier(str(best_onnx_dir))
except NameError:
    print("OnnxClassifier not yet defined — run Part 2 cells first, then re-run this cell.")
    clf_multi = None

if clf_multi is not None:
    from collections import defaultdict
    lang_correct = defaultdict(int)
    lang_total = defaultdict(int)

    batch_size = 256
    all_preds = []
    for i in range(0, len(test_data["sentence"]), batch_size):
        batch = test_data["sentence"][i : i + batch_size]
        all_preds.extend(clf_multi.predict(batch))

    for pred, true_label, lang in zip(all_preds, test_data["label"], test_data["lang"]):
        lang_total[lang] += 1
        if pred == true_label:
            lang_correct[lang] += 1

    print(f"\nPer-language accuracy ({best_model}):")
    print(f"{'Lang':<6} {'Correct':>8} {'Total':>8} {'Accuracy':>10}")
    print("-" * 35)
    for lang in sorted(lang_total.keys()):
        acc = lang_correct[lang] / lang_total[lang]
        print(f"{lang:<6} {lang_correct[lang]:>8} {lang_total[lang]:>8} {acc:>10.4f}")


Per-language accuracy (minishlab/potion-multilingual-128M):
Lang    Correct    Total   Accuracy
-----------------------------------
ca          130      135     0.9630
da           91       93     0.9785
de          143      148     0.9662
en          152      157     0.9682
es          123      129     0.9535
eu           86       90     0.9556
fr          121      124     0.9758
gl          130      136     0.9559
it          130      136     0.9559
nl           90       92     0.9783
pt          127      130     0.9769


---
---
# PART 2: Dependency-Free ONNX Inference

Everything below needs **only `onnxruntime` and `numpy`** — no `torch`, `model2vec`, `transformers`, or `tokenizers`.

You can copy cells 8–10 into a standalone `inference.py` for production deployment.

---
## 8. Pure-Python Tokenizer (auto-detects WordPiece / BPE / Unigram)

Reads the HuggingFace `tokenizer.json` and auto-detects the model type.
Supports the three main tokenizer types used by model2vec models:
- **WordPiece** (BERT-style)
- **BPE** (GPT-style)
- **Unigram** (SentencePiece-style) — used by `potion-multilingual-128M`

In [12]:
import json
import math
import unicodedata
import numpy as np
import onnxruntime as ort


class MinimalTokenizer:
    """
    Pure-Python tokenizer that reads a HuggingFace tokenizer.json.
    Auto-detects WordPiece, BPE, or Unigram model type.
    No dependencies beyond the standard library + numpy.
    """

    def __init__(self, tokenizer_json_path: str):
        with open(tokenizer_json_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        self.model_type = data["model"]["type"]  # "WordPiece", "BPE", "Unigram"
        self.model_data = data["model"]
        self.normalizer_config = data.get("normalizer")
        self.pre_tokenizer_config = data.get("pre_tokenizer")

        # Build vocab lookup based on model type
        if self.model_type == "Unigram":
            self._init_unigram()
        elif self.model_type == "WordPiece":
            self._init_wordpiece()
        elif self.model_type == "BPE":
            self._init_bpe()
        else:
            raise ValueError(f"Unsupported tokenizer model type: {self.model_type}")

    # ================================================================
    # UNIGRAM (SentencePiece)
    # ================================================================
    def _init_unigram(self):
        vocab_list = self.model_data["vocab"]
        self.unk_id = self.model_data.get("unk_id", 0)
        self.vocab = {}
        for idx, (piece, score) in enumerate(vocab_list):
            self.vocab[piece] = (idx, score)
        self.max_piece_len = max(len(p) for p, _ in vocab_list)

    def _tokenize_unigram(self, text: str) -> list[int]:
        """Viterbi decoding for Unigram tokenization."""
        n = len(text)
        if n == 0:
            return []
        NEG_INF = float("-inf")
        best_score = [NEG_INF] * (n + 1)
        best_prev = [0] * (n + 1)
        best_score[0] = 0.0

        for i in range(n):
            if best_score[i] == NEG_INF:
                continue
            max_len = min(self.max_piece_len, n - i)
            for length in range(1, max_len + 1):
                piece = text[i : i + length]
                if piece in self.vocab:
                    _, score = self.vocab[piece]
                    new_score = best_score[i] + score
                    if new_score > best_score[i + length]:
                        best_score[i + length] = new_score
                        best_prev[i + length] = i

        if best_score[n] == NEG_INF:
            return self._unigram_fallback(text)

        ids = []
        pos = n
        while pos > 0:
            prev = best_prev[pos]
            piece = text[prev:pos]
            token_id, _ = self.vocab[piece]
            ids.append(token_id)
            pos = prev
        ids.reverse()
        return ids

    def _unigram_fallback(self, text: str) -> list[int]:
        ids = []
        for ch in text:
            if ch in self.vocab:
                ids.append(self.vocab[ch][0])
            else:
                ids.append(self.unk_id)
        return ids

    # ================================================================
    # WORDPIECE
    # ================================================================
    def _init_wordpiece(self):
        self.vocab = self.model_data["vocab"]
        self.unk_token = self.model_data.get("unk_token", "[UNK]")
        self.unk_id = self.vocab.get(self.unk_token)
        self.continuing_subword_prefix = self.model_data.get(
            "continuing_subword_prefix", "##"
        )
        self.max_input_chars_per_word = self.model_data.get(
            "max_input_chars_per_word", 100
        )

    def _tokenize_wordpiece(self, text: str) -> list[int]:
        words = self._pre_tokenize_whitespace_punct(text)
        ids = []
        for word in words:
            ids.extend(self._wordpiece_single(word))
        return ids

    def _wordpiece_single(self, word: str) -> list[int]:
        if len(word) > self.max_input_chars_per_word:
            return [self.unk_id] if self.unk_id is not None else []
        token_ids = []
        start = 0
        while start < len(word):
            end = len(word)
            found = False
            while start < end:
                substr = word[start:end]
                if start > 0:
                    substr = self.continuing_subword_prefix + substr
                if substr in self.vocab:
                    token_ids.append(self.vocab[substr])
                    found = True
                    break
                end -= 1
            if not found:
                return [self.unk_id] if self.unk_id is not None else []
            start = end
        return token_ids

    # ================================================================
    # BPE
    # ================================================================
    def _init_bpe(self):
        self.vocab = self.model_data["vocab"]
        self.merges = self.model_data.get("merges", [])
        self.unk_token = self.model_data.get("unk_token", "<unk>")
        self.unk_id = self.vocab.get(self.unk_token)
        self.merge_ranks = {}
        for rank, merge_str in enumerate(self.merges):
            parts = merge_str.split(" ", 1)
            if len(parts) == 2:
                self.merge_ranks[(parts[0], parts[1])] = rank

    def _tokenize_bpe(self, text: str) -> list[int]:
        words = self._pre_tokenize_whitespace_punct(text)
        ids = []
        for word in words:
            ids.extend(self._bpe_single(word))
        return ids

    def _bpe_single(self, word: str) -> list[int]:
        symbols = list(word)
        while len(symbols) > 1:
            best_pair = None
            best_rank = float("inf")
            for i in range(len(symbols) - 1):
                pair = (symbols[i], symbols[i + 1])
                rank = self.merge_ranks.get(pair, float("inf"))
                if rank < best_rank:
                    best_rank = rank
                    best_pair = pair
            if best_pair is None or best_rank == float("inf"):
                break
            merged = best_pair[0] + best_pair[1]
            new_symbols = []
            i = 0
            while i < len(symbols):
                if (
                    i < len(symbols) - 1
                    and symbols[i] == best_pair[0]
                    and symbols[i + 1] == best_pair[1]
                ):
                    new_symbols.append(merged)
                    i += 2
                else:
                    new_symbols.append(symbols[i])
                    i += 1
            symbols = new_symbols
        return [
            self.vocab.get(s, self.unk_id if self.unk_id is not None else 0)
            for s in symbols
        ]

    # ================================================================
    # NORMALIZER
    # ================================================================
    def _normalize(self, text: str) -> str:
        if not self.normalizer_config:
            return text
        return self._apply_normalizer(text, self.normalizer_config)

    def _apply_normalizer(self, text: str, cfg: dict) -> str:
        ntype = cfg.get("type", "")

        if ntype == "Sequence":
            for sub in cfg.get("normalizers", []):
                text = self._apply_normalizer(text, sub)
            return text

        if ntype == "Precompiled":
            text = unicodedata.normalize("NFKC", text)
            return text

        if ntype == "Replace":
            pattern = cfg.get("pattern", {})
            content = cfg.get("content", "")
            if "String" in pattern:
                text = text.replace(pattern["String"], content)
            elif "Regex" in pattern:
                text = re.sub(pattern["Regex"], content, text)
            return text

        if ntype == "NFKC":
            return unicodedata.normalize("NFKC", text)

        if ntype in ("BertNormalizer", "Lowercase"):
            if cfg.get("lowercase", True):
                text = text.lower()
            if cfg.get("strip_accents", False):
                text = "".join(
                    c
                    for c in unicodedata.normalize("NFD", text)
                    if unicodedata.category(c) != "Mn"
                )
            return text

        if ntype == "NFC":
            return unicodedata.normalize("NFC", text)

        if ntype == "StripAccents":
            return "".join(
                c
                for c in unicodedata.normalize("NFD", text)
                if unicodedata.category(c) != "Mn"
            )

        if ntype == "Strip":
            return text.strip()

        return text

    # ================================================================
    # PRE-TOKENIZER
    # ================================================================
    def _pre_tokenize(self, text: str) -> list[str]:
        if not self.pre_tokenizer_config:
            return [text]
        return self._apply_pre_tokenizer(text, self.pre_tokenizer_config)

    def _apply_pre_tokenizer(self, text: str, cfg: dict) -> list[str]:
        ptype = cfg.get("type", "")

        if ptype == "Sequence":
            chunks = [text]
            for sub in cfg.get("pretokenizers", []):
                new_chunks = []
                for chunk in chunks:
                    new_chunks.extend(self._apply_pre_tokenizer(chunk, sub))
                chunks = new_chunks
            return chunks

        if ptype == "Metaspace":
            replacement = cfg.get("replacement", "\u2581")
            prepend = cfg.get("prepend_scheme", "always")
            text = text.replace(" ", replacement)
            if prepend in ("always", "first"):
                if not text.startswith(replacement):
                    text = replacement + text
            return [text]

        if ptype == "WhitespaceSplit":
            return text.split()

        if ptype == "BertPreTokenizer":
            return self._pre_tokenize_whitespace_punct(text)

        if ptype == "Split":
            pattern = cfg.get("pattern", {})
            if "Regex" in pattern:
                parts = re.split(pattern["Regex"], text)
                return [p for p in parts if p]
            return [text]

        return [text]

    def _pre_tokenize_whitespace_punct(self, text: str) -> list[str]:
        tokens = []
        current = []
        for ch in text:
            if ch.isspace():
                if current:
                    tokens.append("".join(current))
                    current = []
            elif _is_punctuation(ch):
                if current:
                    tokens.append("".join(current))
                    current = []
                tokens.append(ch)
            else:
                current.append(ch)
        if current:
            tokens.append("".join(current))
        return tokens

    # ================================================================
    # PUBLIC API
    # ================================================================
    def encode(self, text: str) -> list[int]:
        text = self._normalize(text)
        chunks = self._pre_tokenize(text)

        ids = []
        if self.model_type == "Unigram":
            for chunk in chunks:
                ids.extend(self._tokenize_unigram(chunk))
        elif self.model_type == "WordPiece":
            for chunk in chunks:
                words = self._pre_tokenize_whitespace_punct(chunk)
                for word in words:
                    ids.extend(self._wordpiece_single(word))
        elif self.model_type == "BPE":
            for chunk in chunks:
                words = self._pre_tokenize_whitespace_punct(chunk)
                for word in words:
                    ids.extend(self._bpe_single(word))
        return ids

    def encode_batch(
        self, texts: list[str], max_length: int = 512
    ) -> list[list[int]]:
        return [self.encode(t)[:max_length] for t in texts]

    def encode_and_pad(
        self,
        texts: list[str],
        pad_id: int = 0,
        max_length: int = 512,
    ) -> np.ndarray:
        encoded = self.encode_batch(texts, max_length=max_length)
        max_len = max((len(ids) for ids in encoded), default=1)
        max_len = max(max_len, 1)
        padded = np.full((len(encoded), max_len), pad_id, dtype=np.int64)
        for i, ids in enumerate(encoded):
            padded[i, : len(ids)] = ids
        return padded


def _is_punctuation(ch: str) -> bool:
    cp = ord(ch)
    if (33 <= cp <= 47) or (58 <= cp <= 64) or (91 <= cp <= 96) or (123 <= cp <= 126):
        return True
    return unicodedata.category(ch).startswith("P")

---
## 9. ONNX Classifier (inference-only)

In [13]:
class OnnxClassifier:
    """
    Full inference pipeline using only onnxruntime + numpy.

    Expected directory layout:
        model_dir/
            classifier.onnx
            config.json       (labels, pad_id, multilabel)
            tokenizer.json    (HuggingFace tokenizers format)
    """

    def __init__(self, model_dir: str):
        model_dir = Path(model_dir)
        self.session = ort.InferenceSession(str(model_dir / "classifier.onnx"))

        with open(model_dir / "config.json") as f:
            config = json.load(f)
        self.labels = config["labels"]
        self.pad_id = config["pad_id"]
        self.multilabel = config.get("multilabel", False)

        self.tokenizer = MinimalTokenizer(str(model_dir / "tokenizer.json"))

    def _run(self, sentences: list[str], max_length: int = 512) -> np.ndarray:
        input_ids = self.tokenizer.encode_and_pad(
            sentences, pad_id=self.pad_id, max_length=max_length
        )
        logits = self.session.run(
            ["logits"], {"input_ids": input_ids}
        )[0]
        return logits

    def predict(
        self,
        sentences: list[str],
        max_length: int = 512,
        threshold: float = 0.5,
    ) -> list:
        logits = self._run(sentences, max_length)
        if self.multilabel:
            probs = _sigmoid(logits)
            return [
                [self.labels[j] for j, v in enumerate(row) if v > threshold]
                for row in probs
            ]
        else:
            indices = np.argmax(logits, axis=1)
            return [self.labels[i] for i in indices]

    def predict_proba(
        self, sentences: list[str], max_length: int = 512
    ) -> list[dict[str, float]]:
        logits = self._run(sentences, max_length)
        probs = _sigmoid(logits) if self.multilabel else _softmax(logits)
        return [
            {label: float(p) for label, p in zip(self.labels, row)}
            for row in probs
        ]


def _softmax(x: np.ndarray) -> np.ndarray:
    e = np.exp(x - x.max(axis=1, keepdims=True))
    return e / e.sum(axis=1, keepdims=True)


def _sigmoid(x: np.ndarray) -> np.ndarray:
    return 1.0 / (1.0 + np.exp(-x))

---
## 10. Test ONNX Inference (multilingual examples)

In [14]:
# Use the multilingual model for inference demo
test_model_name = MODELS[-1]  # potion-multilingual-128M
test_onnx_dir = EXPORT_DIR / test_model_name.split("/")[-1]

print(f"Loading ONNX classifier from: {test_onnx_dir}")
clf = OnnxClassifier(str(test_onnx_dir))

test_sentences = [
    # English
    "what time is it",
    "play some jazz music",
    "set a timer for five minutes",
    "what's the weather like tomorrow",
    # German
    "wie spät ist es",
    "spiel etwas Jazzmusik",
    # Spanish
    "qué hora es",
    "pon algo de música jazz",
    # Italian
    "che ore sono",
    # French
    "quelle heure est-il",
    # Portuguese
    "que horas são",
    # Dutch
    "hoe laat is het",
    # Catalan
    "quina hora és",
    # Stop command (multiple languages)
    "stop",
    "arrête",
    "para",
]

print("\nPredictions:")
predictions = clf.predict(test_sentences)
for sent, pred in zip(test_sentences, predictions):
    print(f"  [{pred}]  {sent}")

print("\nTop-3 Probabilities:")
probs = clf.predict_proba(test_sentences)
for sent, p in zip(test_sentences, probs):
    top3 = sorted(p.items(), key=lambda x: x[1], reverse=True)[:3]
    prob_str = "  ".join(f"{k}: {v:.4f}" for k, v in top3)
    print(f"  {sent}")
    print(f"    {prob_str}")

Loading ONNX classifier from: exported_models_multilingual/potion-multilingual-128M

Predictions:
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  what time is it
  [ocp:play]  play some jazz music
  [ovos-skill-alerts.openvoiceos:CreateTimer]  set a timer for five minutes
  [ovos-skill-weather.openvoiceos:daily_forecast.intent]  what's the weather like tomorrow
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  wie spät ist es
  [ocp:play]  spiel etwas Jazzmusik
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  qué hora es
  [ocp:play]  pon algo de música jazz
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  che ore sono
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  quelle heure est-il
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  que horas são
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  hoe laat is het
  [ovos-skill-date-time.openvoiceos:what.time.is.it.intent]  quina hora és
  [stop:stop]  st

---
## 11. Validate: ONNX vs Original Model on Full Test Set

In [15]:
print(f"Running ONNX inference on {len(test_data['sentence'])} test samples...")

onnx_preds = []
batch_size = 256
test_texts = test_data["sentence"]
test_labels = test_data["label"]

for i in range(0, len(test_texts), batch_size):
    batch = test_texts[i : i + batch_size]
    preds = clf.predict(batch)
    onnx_preds.extend(preds)

onnx_correct = sum(p == t for p, t in zip(onnx_preds, test_labels))
onnx_acc = onnx_correct / len(test_labels)

original_acc = RESULTS[test_model_name]["parsed"].get("accuracy", None)

print(f"\nONNX Accuracy:     {onnx_acc:.4f}")
if original_acc is not None:
    print(f"Original Accuracy: {original_acc:.4f}")
    diff = abs(onnx_acc - original_acc)
    if diff < 0.005:
        print(f"Match: YES (diff = {diff:.4f})")
    else:
        print(f"Match: NO  (diff = {diff:.4f}) -- check tokenizer alignment")

Running ONNX inference on 1370 test samples...

ONNX Accuracy:     0.9657
Original Accuracy: 0.9700
Match: YES (diff = 0.0043)


---
## 12. Verify Tokenizer Alignment

In [16]:
# Compare: HuggingFace tokenizer vs MinimalTokenizer
# We reload a lightweight classifier just for tokenizer comparison

try:
    from model2vec.train import StaticModelForClassification
    check_model = MODELS[-1]  # multilingual
    check_classifier = StaticModelForClassification.from_pretrained(model_name=check_model)
    hf_tokenizer = check_classifier.tokenizer
except Exception:
    hf_tokenizer = None
    print("Could not load classifier for tokenizer comparison -- skip this cell.")

if hf_tokenizer is not None:
    tok_dir = EXPORT_DIR / check_model.split("/")[-1]
    if (tok_dir / "tokenizer.json").exists():
        mini_tok = MinimalTokenizer(str(tok_dir / "tokenizer.json"))
        print(f"Tokenizer type: {mini_tok.model_type}")

        sample_texts = test_texts[:20]
        mismatches = 0
        for text in sample_texts:
            hf_ids = hf_tokenizer.encode(text, add_special_tokens=False).ids
            mini_ids = mini_tok.encode(text)
            if hf_ids != mini_ids:
                mismatches += 1
                print(f"MISMATCH: '{text[:60]}...'")
                print(f"  HF:   {hf_ids[:15]}")
                print(f"  Mini: {mini_ids[:15]}")

        if mismatches == 0:
            print(f"All {len(sample_texts)} samples match!")
        else:
            print(f"\n{mismatches}/{len(sample_texts)} mismatches.")
    else:
        print(f"No tokenizer.json found at {tok_dir} -- was this model exported?")

    del check_classifier
    gc.collect()

Tokenizer type: Unigram
All 20 samples match!


---
## 13. Deployment Summary

Copy cells 8–9 into `inference.py`.

### Directory layout
```
your_model_dir/
    classifier.onnx
    config.json
    tokenizer.json
inference.py
```

### Dependencies
```
onnxruntime
numpy
```

### Usage
```python
from inference import OnnxClassifier

clf = OnnxClassifier("your_model_dir")

# Works across all trained languages
clf.predict(["what time is it"])            # -> ["ovos-skill-date-time.openvoiceos:what.time.is.it.intent"]
clf.predict(["wie spät ist es"])            # -> ["ovos-skill-date-time.openvoiceos:what.time.is.it.intent"]
clf.predict(["pon algo de música jazz"])    # -> ["ocp:play"]
clf.predict_proba(["arrête le minuteur"])   # -> [{"stop:stop": 0.87, ...}]
```