# Export to ONNX (and Quantize)\n\nThis notebook exports a trained token-classification model to ONNX and (optionally) applies dynamic INT8 quantization.\n\nPrerequisites (dev-only):\n- `onnx` and `optimum[onnxruntime]` for export\n- `onnxruntime` + `onnx` for quantization\n

In [None]:
import os\nfrom pathlib import Path\n\n# AI_WAREHOUSE 3.0 cache layout (avoid $HOME/.cache)\nos.environ.setdefault('HF_HOME', '/mnt/c/ai_cache/huggingface')\nos.environ.setdefault('TRANSFORMERS_CACHE', os.environ['HF_HOME'])\nos.environ.setdefault('TORCH_HOME', '/mnt/c/ai_cache/torch')\nos.environ.setdefault('XDG_CACHE_HOME', '/mnt/c/ai_cache')\nos.environ.setdefault('PIP_CACHE_DIR', '/mnt/c/ai_cache/pip')\n\nfor key in ('HF_HOME', 'TORCH_HOME', 'XDG_CACHE_HOME', 'PIP_CACHE_DIR'):\n    Path(os.environ[key]).expanduser().mkdir(parents=True, exist_ok=True)\n\n# Export a model directory to ONNX (copies tokenizer/config into the output dir by default)\n!PYTHONPATH=src python scripts/export_token_classifier_onnx.py \\\n  --model-dir /mnt/data/training/runs/edge_deid/synthetic-ner-demo \\\n  --output-dir /mnt/c/ai_models/detection/edge_deid/synthetic-ner-demo \\\n  --file-name model.onnx\n

In [None]:
# Optional: quantize weights to INT8\n!PYTHONPATH=src python scripts/quantize_onnx_model.py \\\n  --input /mnt/c/ai_models/detection/edge_deid/synthetic-ner-demo/model.onnx \\\n  --output /mnt/c/ai_models/detection/edge_deid/synthetic-ner-demo/model.int8.onnx\n

In [None]:
# Validate the exported ONNX model\n!PYTHONPATH=src python scripts/validate_onnx_token_classifier.py \\\n  --onnx-model /mnt/c/ai_models/detection/edge_deid/synthetic-ner-demo/model.onnx \\\n  --tokenizer-dir /mnt/c/ai_models/detection/edge_deid/synthetic-ner-demo \\\n  --pytorch-model-dir /mnt/data/training/runs/edge_deid/synthetic-ner-demo\n