In [42]:
import evaluate
import numpy as np
import optuna
import os
import torch
import torch.nn as nn

from benchmark import PerformanceBenchmark
from datasets import load_dataset
from pathlib import Path
from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions
from psutil import cpu_count
from scipy.special import softmax
from torch.quantization import quantize_dynamic
from training import KDTrainer, KDTrainingArguments
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, pipeline
from transformers.convert_graph_to_onnx import convert

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load model and dataset

In [None]:
# Model
teacher_ckpt = "transformersbook/bert-base-uncased-finetuned-clinc"
student_ckpt = "distilbert-base-uncased"
pipe = pipeline("text-classification", model=teacher_ckpt)

# Dataset
clinc = load_dataset("clinc_oos", "plus")

# Tokenizers
student_tokenizer = AutoTokenizer.from_pretrained(student_ckpt)

# Metrics
accuracy_score = evaluate.load("accuracy")

# Queries
bench_query = "What is the pin number for my account?"

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

## Inspect dataset

In [None]:
sample = clinc["test"][42]
print("Sample", sample)

intents = clinc["test"].features["intent"]
print("Label", intents.int2str(sample["intent"]))

## Run sample from dataset through model

In [None]:
query = [
    "Hey, I'd like to rent a vehicle from Nov 1st to Nov 15th in Paris and I need a 15 passenger van",
    "Please transfer 100 Dollars from my account."
]
results = pipe(query)
label_ids = [intents.str2int(results[0]["label"]), intents.str2int(results[1]["label"])]
print("Results", results)
print("Label IDs", label_ids)

## Run Benchmarks against baseline model

In [None]:
bench = PerformanceBenchmark(pipeline=pipe, dataset=clinc["test"], query=bench_query, class_labels=intents)
bench.run_benchmark()

## Prepare Dataset

In [None]:
def tokenize_text(batch):
    return student_tokenizer(batch["text"], truncation=True)

clinc_enc = clinc.map(tokenize_text, batched=True, remove_columns=["text"])
clinc_enc = clinc_enc.rename_column("intent", "labels")
clinc_enc

## Model Training

In [None]:
def compute_metrics(pred):
    predictions, labels = pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy_score.compute(predictions=predictions, references=labels)

batch_size = 4
finetuned_ckpt = "distilbert-base-uncased-finetuned-clinc"

training_args = KDTrainingArguments(
    output_dir=f"../../checkpoints/{finetuned_ckpt}",
    num_train_epochs=10,
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    alpha=0.12,
    temperature=7,
    weight_decay=0.01,
    push_to_hub=False,
)

id2label = pipe.model.config.id2label
label2id = pipe.model.config.label2id

num_labels = intents.num_classes
student_config = AutoConfig.from_pretrained(student_ckpt, num_labels=num_labels, id2label=id2label, label2id=label2id)

def student_init():
    return AutoModelForSequenceClassification.from_pretrained(student_ckpt, config=student_config).to(device)

trainer = KDTrainer(
    model_init=student_init,
    teacher_model=pipe.model,
    args=training_args,
    train_dataset=clinc_enc["train"],
    eval_dataset=clinc_enc["validation"],
    compute_metrics=compute_metrics,
    tokenizer=student_tokenizer
)

In [None]:
trainer.train()

In [None]:
pipe_student = pipeline("text-classification", model=trainer.model, tokenizer=student_tokenizer)
bench_student = PerformanceBenchmark(pipeline=pipe_student, dataset=clinc["test"], class_labels=intents, query=bench_query, optim_type="DistilBERT")
bench_student.run_benchmark()

## Hyperparameter Tuning

In [None]:
def hp_space(trial):
    return {
        "num_train_epochs": trial.suggest_int("num_train_epochs", 5, 10),
        "alpha": trial.suggest_float("alpha", 0, 1),
        "temperature": trial.suggest_int("temperature", 2, 20)
    }

best_run = trainer.hyperparameter_search(n_trials=20, direction="maximize", hp_space=hp_space)
print("BEST RUN", best_run)

## Model Quantization

In [None]:
model_quantized = quantize_dynamic(trainer.model, {nn.Linear}, dtype=torch.qint8)
pipe_quantized = pipeline("text-classification", model=model_quantized, tokenizer=student_tokenizer)
bench_quantized = PerformanceBenchmark(pipeline=pipe_quantized, dataset=clinc["test"], class_labels=intents, query=bench_query, optim_type="DistilBERT")
bench_quantized.run_benchmark()

## Export to ONNX

In [None]:
os.environ["OMP_NUM_THREADS"] = f"{cpu_count()}"
os.environ["OMP_WAIT_POLICY"] = "ACTIVE"

model_onnx_path = Path("../../checkpoints/export-onnx/distilbert.onnx")
convert(framework="pt", model=trainer.model, tokenizer=student_tokenizer, output=model_onnx_path, opset=12, pipeline_name="text-classification")

In [41]:
def create_model_for_provider(model_path, provider="CPUExecutionProvider"):
    options = SessionOptions()
    options.intra_op_num_threads = 1
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    session = InferenceSession(str(model_path), options, providers=[provider])
    session.disable_fallback()
    return session

onnx_model = create_model_for_provider(model_onnx_path)

NoSuchFile: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from ../../checkpoints/export-onnx/distilbert.onnx failed:Load model ../../checkpoints/export-onnx/distilbert.onnx failed. File doesn't exist

## ONNX Inference Pipeline

In [None]:
class OnnxPipeline:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
    
    def __call__(self, query):
        model_inputs = self.tokenizer(query, return_tensors="pt")
        inputs_onnx = {k: v.cpu().detach().numpy() for k, v in model_inputs.items()}
        logits = self.model.run(None, inputs_onnx)[0][0, :]
        probs = softmax(logits)
        pred_idx = np.argmax(probs).item()
        return [{"label": intents.int2str(pred_idx), "score": probs[pred_idx]}]
    
pipe_onnx = OnnxPipeline(onnx_model, student_tokenizer)
pipe(bench_query)

## ONNX Performance Benchmark

In [None]:
class OnnxPerformanceBenchmark(PerformanceBenchmark):
    def __init__(self, *args, model_path, **kwargs):
        super().__init__(*args, **kwargs)
        self.model_path = model_path

    def compute_size(self):
        size_mb = np.round(Path(self.model_path).stat().st_size / (1024**2), 3)
        return {"size_mb": size_mb}
    

bench_onnx = OnnxPerformanceBenchmark(pipe_onnx, clinc["test"], model_path=model_onnx_path, optim_type="ORT")
bench_onnx.run_benchmark()