## Create a lightweight inference class & perform Benchmarking

In [6]:

import onnxruntime as ort
from transformers import AutoTokenizer
import numpy as np
import torch
import re
import time
from tqdm import tqdm
import pandas as pd

In [2]:
# Preprocessing function
def preprocess_text(text):
    text = str(text).lower()  # Convert to lowercase
    text = text.replace('$', '')  # Remove dollar signs (tickers)
    # Remove URLs
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    # Remove special characters except basic punctuation
    text = re.sub(r'[^\w\s.,!?]', '', text)
    return text.strip()


In [3]:
class SentimentAnalyzer:
    def __init__(self, model_path="./tinybert-sentiment-onnx"):
        self.session = ort.InferenceSession(f"{model_path}/model.onnx")
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.labels = ["negative", "neutral", "positive"]  # Hard-coded labels
        
    def predict(self, text):
        # Preprocess
        text = preprocess_text(text)
        
        # Tokenize
        inputs = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=128,
            return_tensors="np",
            return_token_type_ids=True
        )
        
        # Run inference
        outputs = self.session.run(
            None,
            {
                "input_ids": inputs["input_ids"].astype(np.int64),
                "attention_mask": inputs["attention_mask"].astype(np.int64),
                "token_type_ids": inputs["token_type_ids"].astype(np.int64)
            }
        )
        
        # Get predictions
        logits = outputs[0]
        probabilities = torch.nn.functional.softmax(torch.from_numpy(logits), dim=-1)
        predicted_class = np.argmax(logits, axis=-1)[0]
        
        return {
            "sentiment": self.labels[predicted_class],
            "confidence": float(probabilities[0][predicted_class]),
            "probabilities": {
                label: float(prob)
                for label, prob in zip(self.labels, probabilities[0])
            }
        }


In [4]:
# Example usage
analyzer = SentimentAnalyzer()
result = analyzer.predict("$BYND - JPMorgan reels in expectations on Beyond Meat")
print(result)

{'sentiment': 'negative', 'confidence': 0.6044331192970276, 'probabilities': {'negative': 0.6044331192970276, 'neutral': 0.045562803745269775, 'positive': 0.35000404715538025}}


## Performance Benchmarking

In [7]:
def benchmark_model(analyzer, dataset, n_runs=100):
    # Warmup
    for _ in range(5):
        analyzer.predict(dataset["text"].iloc[0])
    
    # Latency test
    latencies = []
    for i in tqdm(range(n_runs)):
        text = dataset["text"].iloc[i % len(dataset)]
        start = time.perf_counter()
        analyzer.predict(text)
        end = time.perf_counter()
        latencies.append((end - start) * 1000)  # in ms
    
    avg_latency = np.mean(latencies)
    p95_latency = np.percentile(latencies, 95)
    
    print(f"Average latency: {avg_latency:.2f}ms")
    print(f"95th percentile latency: {p95_latency:.2f}ms")
    return latencies


In [9]:
test_df = pd.read_csv('../processed-datasets/test.csv')

In [11]:
test_df.shape

(5834, 2)

In [12]:
# Run benchmark
print("Benchmarking ONNX model...")
latencies = benchmark_model(analyzer, test_df)

Benchmarking ONNX model...


100%|██████████| 100/100 [00:00<00:00, 234.49it/s]

Average latency: 4.18ms
95th percentile latency: 5.80ms





In [13]:
# Check model size
import os
model_size = os.path.getsize("./tinybert-sentiment-onnx/model.onnx") / (1024 * 1024)
print(f"Model size: {model_size:.2f}MB")

Model size: 54.84MB
