## Model Optimization And Quantization

In [5]:
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoModelForSequenceClassification
import torch

In [2]:
model_path="./tinybert-sentiment"

In [3]:
# Convert to ONNX format
model = ORTModelForSequenceClassification.from_pretrained(
    model_path,
    export=True,
    provider="CPUExecutionProvider"
)

In [4]:
# Save ONNX model
model.save_pretrained("./tinybert-sentiment-onnx")

In [6]:
# Make sure you're loading a pure PyTorch model, not an ONNX model
model = AutoModelForSequenceClassification.from_pretrained(model_path)

In [7]:
# Dynamic quantization for PyTorch (alternative)
quantized_model = torch.quantization.quantize_dynamic(
    model,
    {torch.nn.Linear},
    dtype=torch.qint8
)

In [8]:
torch.save(quantized_model.state_dict(), "./tinybert-sentiment-quantized.pt")