In [None]:
import os
import time
import numpy as np
import pandas as pd
import torch
import onnx
import onnxruntime as ort
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler


In [None]:
csv_path = os.getenv("NBA_TEST_DATA", "nba_test.csv")
df = pd.read_csv(csv_path)

# Assume 'score_diff' is the target column
X = df.drop(columns=['score_diff']).values
y = df['score_diff'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

test_dataset = TensorDataset(X_tensor, y_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
model_path = "models/nba_model.pt"
device = torch.device("cpu")
model = torch.load(model_path, map_location=device)
model.eval()

onnx_model_path = "models/nba_model.onnx"
dummy_input = torch.randn(1, X.shape[1], dtype=torch.float32)

torch.onnx.export(model, dummy_input, onnx_model_path,
                  export_params=True, opset_version=20,
                  do_constant_folding=True, input_names=['input'],
                  output_names=['output'], dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}})

print(f"ONNX model saved to {onnx_model_path}")

onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)


In [None]:
onnx_model_path = "models/nba_model.onnx"
ort_session = ort.InferenceSession(onnx_model_path, providers=['CPUExecutionProvider'])


In [None]:
total_mae = 0
total = 0
for features, labels in test_loader:
    outputs = ort_session.run(None, {ort_session.get_inputs()[0].name: features.numpy()})[0]
    mae = np.abs(outputs - labels.numpy()).sum()
    total_mae += mae
    total += labels.size(0)

accuracy = total_mae / total


In [None]:
print(f"Mean Absolute Error (MAE): {accuracy:.2f}")


In [None]:
model_size = os.path.getsize(onnx_model_path)
print(f"Model Size on Disk: {model_size / 1e6:.2f} MB")


In [None]:
num_trials = 100
single_sample = X_tensor[0].unsqueeze(0).numpy()

# Warm-up
ort_session.run(None, {ort_session.get_inputs()[0].name: single_sample})

latencies = []
for _ in range(num_trials):
    start_time = time.time()
    ort_session.run(None, {ort_session.get_inputs()[0].name: single_sample})
    latencies.append(time.time() - start_time)


In [None]:
print(f"Inference Latency (median): {np.percentile(latencies, 50) * 1000:.2f} ms")
print(f"Inference Latency (95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
print(f"Inference Latency (99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
print(f"Inference Throughput (single sample): {num_trials / np.sum(latencies):.2f} FPS")


In [None]:
num_batches = 50
batch_input = X_tensor[:32].numpy()

# Warm-up
ort_session.run(None, {ort_session.get_inputs()[0].name: batch_input})

batch_times = []
for _ in range(num_batches):
    start_time = time.time()
    ort_session.run(None, {ort_session.get_inputs()[0].name: batch_input})
    batch_times.append(time.time() - start_time)

batch_fps = (batch_input.shape[0] * num_batches) / np.sum(batch_times)
print(f"Batch Throughput: {batch_fps:.2f} FPS")


In [None]:
print(f"Mean Absolute Error (MAE): {accuracy:.2f}")
print(f"Model Size on Disk: {model_size/ (1e6) :.2f} MB")
print(f"Inference Latency (single sample, median): {np.percentile(latencies, 50) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
print(f"Inference Throughput (single sample): {num_trials/np.sum(latencies):.2f} FPS")
print(f"Batch Throughput: {batch_fps:.2f} FPS")