<a href="https://colab.research.google.com/github/MMillward2012/deepmind_internship/blob/main/notebooks/7_benchmarks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import packages

In [5]:
# %cd ..
!ls

README.md        [34mmodels[m[m           [34mresults[m[m
[34mdata[m[m             [34mnotebooks[m[m        [34msrc[m[m
[34mfigures[m[m          requirements.txt [34mvenv-py311[m[m


In [1]:
import os
import time
import numpy as np
import pandas as pd
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from transformers.onnx import export
from transformers.onnx.features import FeaturesManager
import onnxruntime as ort
from onnxruntime.quantization import quantize_dynamic, QuantType
import torch


In [2]:
BASE_DIR = Path("models")
EXAMPLE_INPUT = "Stocks surged after the company reported record earnings."
MAX_LENGTH = 128
ONNX_OPSET = 13
BENCHMARK_ITERATIONS = 100

In [6]:
model_dirs = [d for d in BASE_DIR.iterdir() if d.is_dir()]
print("Found models:", [m.name for m in model_dirs])

Found models: ['all-MiniLM-L6-v2-financial-sentiment', 'distilbert-financial-sentiment', 'tinybert-financial-classifier', 'mobilebert-uncased-financial-sentiment']


In [None]:
def export_to_onnx(model_dir, onnx_path, task="sequence-classification"):
    config = AutoConfig.from_pretrained(model_dir)
    model = AutoModelForSequenceClassification.from_pretrained(model_dir)
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    
    model_kind, onnx_config_class = FeaturesManager.check_supported_model_or_raise(config, task=task)
    onnx_config = onnx_config_class(config)
    
    export(
        preprocessor=tokenizer,
        model=model,
        config=onnx_config,
        opset=ONNX_OPSET,
        output=onnx_path
    )

In [None]:
def benchmark_onnx(onnx_model_path, tokenizer, quantised=False):
    sess = ort.InferenceSession(str(onnx_model_path), providers=["CPUExecutionProvider"])
    inputs = tokenizer(EXAMPLE_INPUT, return_tensors="np", max_length=MAX_LENGTH, padding="max_length", truncation=True)

    # Warm-up
    for _ in range(10):
        sess.run(None, {"input_ids": inputs["input_ids"]})

    # Benchmark
    times = []
    for _ in range(BENCHMARK_ITERATIONS):
        start = time.time()
        sess.run(None, {"input_ids": inputs["input_ids"]})
        times.append((time.time() - start) * 1000)

    return {
        "avg_latency_ms": np.mean(times),
        "p99_latency_ms": np.percentile(times, 99),
        "quantised": quantised
    }

In [None]:
results = []

for model_dir in model_dirs:
    print(f"\n⏳ Processing {model_dir.name}...")
    
    onnx_dir = model_dir / "onnx"
    onnx_dir.mkdir(exist_ok=True)
    onnx_model_path = onnx_dir / "model.onnx"
    quantised_model_path = onnx_dir / "model-int8.onnx"

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_dir)

    # Export ONNX if not already done
    if not onnx_model_path.exists():
        print("📦 Exporting to ONNX...")
        export_to_onnx(model_dir, onnx_model_path)
    else:
        print("✅ ONNX already exists.")

    # Benchmark original
    print("🧪 Benchmarking original model...")
    result_fp32 = benchmark_onnx(onnx_model_path, tokenizer, quantised=False)
    result_fp32["model"] = model_dir.name
    result_fp32["size_mb"] = onnx_model_path.stat().st_size / 1e6
    results.append(result_fp32)

    # Quantise if not already done
    if not quantised_model_path.exists():
        print("⚙️  Quantising...")
        quantize_dynamic(str(onnx_model_path), str(quantised_model_path), weight_type=QuantType.QInt8)
    else:
        print("✅ Quantised model already exists.")

    # Benchmark quantised
    print("🧪 Benchmarking quantised model...")
    result_int8 = benchmark_onnx(quantised_model_path, tokenizer, quantised=True)
    result_int8["model"] = model_dir.name + " (INT8)"
    result_int8["size_mb"] = quantised_model_path.stat().st_size / 1e6
    results.append(result_int8)

In [None]:
df = pd.DataFrame(results)
df = df[["model", "size_mb", "avg_latency_ms", "p99_latency_ms", "quantised"]]
df = df.sort_values(by="avg_latency_ms")
df.reset_index(drop=True, inplace=True)

df.style.format({
    "size_mb": "{:.1f}",
    "avg_latency_ms": "{:.2f}",
    "p99_latency_ms": "{:.2f}"
})