In [None]:
import os
import time
import onnx
import onnxruntime as ort
from utils import *

In [None]:
# Read data
base_data_dir = os.getenv("NBA_DATA_DIR", "nba_data")

X1_train = pd.read_csv(os.path.join(base_data_dir, 'train/X_train_model1.csv'))
X1_test = pd.read_csv(os.path.join(base_data_dir, 'train/X_test_model1.csv'))
Y1_train = pd.read_csv(os.path.join(base_data_dir, 'train/Y_train_model1.csv'))
Y1_test = pd.read_csv(os.path.join(base_data_dir, 'train/Y_test_model1.csv'))
full1_df = pd.read_csv(os.path.join(base_data_dir, 'train/full_stats.csv'))

# Convert to tensors, pass to dataloader
X1_train = torch.tensor(X1_train.values, dtype=torch.float32)
X1_test = torch.tensor(X1_test.values, dtype=torch.float32)
Y1_train = torch.tensor(Y1_train.values, dtype=torch.float32)
Y1_test = torch.tensor(Y1_test.values, dtype=torch.float32)

train1_data = TensorDataset(X1_train, Y1_train)
test1_data = TensorDataset(X1_test, Y1_test)

train1_loader = DataLoader(train1_data, batch_size=32, shuffle=True)
test1_loader = DataLoader(test1_data, batch_size=32, shuffle=False)

game_ids = full1_df['gameId'].values


In [None]:
model_path = "models/point_diff.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model1 = torch.load(model_path, map_location=device)
model1.eval()

onnx_model1_path = "models/model1.onnx"
dummy_input = torch.randn(1, X1_test.shape[1], dtype=torch.float32)

torch.onnx.export(model1, dummy_input, onnx_model1_path,
                  export_params=True, opset_version=20,
                  do_constant_folding=True, input_names=['input'],
                  output_names=['output'], dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}})

print(f"ONNX model saved to {onnx_model1_path}")

onnx_model1 = onnx.load(onnx_model1_path)
onnx.checker.check_model(onnx_model1)

In [None]:
predictions = []
with torch.no_grad():
    # Processsing in batches
    batch_size = 32

    for i in range(0, len(X1_test), batch_size):
        batch = X1_test[i:i+batch_size]

        # FF, convert to numpy
        batch_preds = model1(batch)
        predictions.append(batch_preds.cpu().numpy())

# Combine all predictions
all_predictions = np.concatenate(predictions, axis=0)

# Return results
result_df = pd.DataFrame({
    'gameId': game_ids,
    'predicted_point_diff': all_predictions.flatten()
})

In [None]:
X2_train = pd.read_csv(os.path.join(base_data_dir, 'train/X_train_model2.csv'))
X2_test = pd.read_csv(os.path.join(base_data_dir, 'test/X_test_model2.csv'))
Y2_train = pd.read_csv(os.path.join(base_data_dir, 'train/Y_train_model2.csv'))
Y2_test = pd.read_csv(os.path.join(base_data_dir, 'test/Y_test_model2.csv'))

X2_train = X2_train.merge(result_df, on='gameId', how='inner')
X2_train = X2_train.drop('gameId', axis=1)
X2_test = X2_test.merge(result_df, on='gameId', how='inner')
X2_test = X2_test.drop('gameId', axis=1)
full2_df = pd.read_csv(os.path.join(base_data_dir, 'train/full_attendance.csv'))
full2_df = full2_df.merge(result_df, on='gameId', how='inner')

train2_data = TensorDataset(X2_train, Y2_train)
test2_data = TensorDataset(X2_test, Y2_test)
train2_loader = DataLoader(train2_data, batch_size=32, shuffle=True)
test2_loader = DataLoader(test2_data, batch_size=32, shuffle=False)

In [None]:
model_path = "models/attendance_model.pth"
model2 = torch.load(model_path, map_location=device)
model2.eval()

onnx_model2_path = "models/model2.onnx"
dummy_input = torch.randn(1, X2_test.shape[1], dtype=torch.float32)

torch.onnx.export(model2, dummy_input, onnx_model2_path,
                  export_params=True, opset_version=20,
                  do_constant_folding=True, input_names=['input'],
                  output_names=['output'], dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}})

print(f"ONNX model saved to {onnx_model2_path}")

onnx_model2 = onnx.load(onnx_model2_path)
onnx.checker.check_model(onnx_model2)

In [None]:
model1_size = os.path.getsize(onnx_model1_path)
model2_size = os.path.getsize(onnx_model2_path)
print(f"Model Size on Disk: {model1_size / 1e6:.2f} MB")
print(f"Model Size on Disk: {model2_size / 1e6:.2f} MB")

In [None]:

ort_session1 = ort.InferenceSession(onnx_model1_path, providers=['CPUExecutionProvider'])
ort_session2 = ort.InferenceSession(onnx_model1_path, providers=['CPUExecutionProvider'])

In [None]:
criterion = nn.MSELoss()

test1_r2, test1_rmse, test1_loss = validate(model1, test1_loader, criterion, device)
print(f"Test R^2: {test1_r2:.4f}, Test RMSE: {test1_rmse:.4f}, Test Model MSE Loss: {test1_loss:.4f}")
test2_r2, test2_rmse, test2_loss = validate(model2, test2_loader, criterion, device)
print(f"Test R^2: {test2_r2:.4f}, Test RMSE: {test2_rmse:.4f}, Test Model MSE Loss: {test2_loss:.4f}")

In [None]:
num_trials = 100
single_sample = X2_test[0].unsqueeze(0).numpy()

# Warm-up
ort_session2.run(None, {ort_session2.get_inputs()[0].name: single_sample})

latencies = []
for _ in range(num_trials):
    start_time = time.time()
    ort_session2.run(None, {ort_session2.get_inputs()[0].name: single_sample})
    latencies.append(time.time() - start_time)


In [None]:
print(f"Inference Latency (median): {np.percentile(latencies, 50) * 1000:.2f} ms")
print(f"Inference Latency (95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
print(f"Inference Latency (99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
print(f"Inference Throughput (single sample): {num_trials / np.sum(latencies):.2f} FPS")


In [None]:
num_batches = 50
batch_input = X2_test[:32].numpy()

# Warm-up
ort_session2.run(None, {ort_session2.get_inputs()[0].name: batch_input})

batch_times = []
for _ in range(num_batches):
    start_time = time.time()
    ort_session2.run(None, {ort_session2.get_inputs()[0].name: batch_input})
    batch_times.append(time.time() - start_time)

batch_fps = (batch_input.shape[0] * num_batches) / np.sum(batch_times)
print(f"Batch Throughput: {batch_fps:.2f} FPS")


In [None]:
print('Model1 Stats: ')
print(f"Model1 Size on Disk: {model1_size/ (1e6) :.2f} MB")
print(f"Test R^2: {test1_r2:.4f}, Test RMSE: {test1_rmse:.4f}, Test Model MSE Loss: {test1_loss:.4f}")
print('\n')
print('Model2 Stats: ')
print(f"Model2 Size on Disk: {model2_size/ (1e6) :.2f} MB")
print(f"Test R^2: {test2_r2:.4f}, Test RMSE: {test2_rmse:.4f}, Test Model MSE Loss: {test2_loss:.4f}")

print(f"Inference Latency (single sample, median): {np.percentile(latencies, 50) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
print(f"Inference Throughput (single sample): {num_trials/np.sum(latencies):.2f} FPS")
print(f"Batch Throughput: {batch_fps:.2f} FPS")