In [1]:
!pip install onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.9 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.5/16.5 MB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected pack

In [2]:
import numpy as np
import joblib
import onnxruntime as ort


In [3]:
# STEP 1: Load Saved Threshold
threshold = joblib.load("loan_default_threshold.pkl")
print(f"Loaded Threshold: {threshold:.4f}")

Loaded Threshold: 0.5070


In [4]:
# STEP 2: Load ONNX Model
def load_onnx_model(onnx_path):
    session = ort.InferenceSession(onnx_path)
    input_name = session.get_inputs()[0].name
    output_name = session.get_outputs()[0].name
    return session, input_name, output_name


In [5]:
# STEP 3: Predict Function
def predict_onnx(onnx_path, test_cases):
    session, input_name, output_name = load_onnx_model(onnx_path)

    data = np.array(test_cases, dtype=np.float32)
    outputs = session.run([output_name], {input_name: data})

    # If probability scores returned (scikit-learn ONNX)
    if outputs[0].ndim > 1 and outputs[0].shape[1] > 1:
        probs = outputs[0][:,1]   # take probability of class 1
    else:
        # Spark ONNX often outputs only score for positive class
        probs = outputs[0].ravel()

    preds = (probs >= threshold).astype(int)
    return preds, probs

In [6]:
# STEP 4: Run Test
test_cases = [
    [25, 30000, 15000, 600, 12, 10.5, 0.50, 40],
    [45, 90000, 5000, 750, 120, 3.5, 0.10, 60],
    [56, 85994, 50587, 520, 80, 15.23, 0.44, 36],
    [28, 55000, 200000, 720, 36, 9.5, 0.25, 60],
    [45, 40000, 350000, 580, 12, 15.2, 0.60, 48],
    [27, 25000, 120000, 560, 6, 16.8, 0.68, 24]
]

In [7]:
# Test Scikit-learn ONNX
preds, probs = predict_onnx("Loan_default_prediction_model.onnx", test_cases)
print("\nScikit-learn ONNX Predictions:")
for i, (p, pr) in enumerate(zip(preds, probs)):
    print(f"Case {i+1}: Pred={p}, Prob={pr:.3f}")



Scikit-learn ONNX Predictions:
Case 1: Pred=1, Prob=1.000
Case 2: Pred=0, Prob=0.000
Case 3: Pred=0, Prob=0.000
Case 4: Pred=0, Prob=0.000
Case 5: Pred=1, Prob=1.000
Case 6: Pred=0, Prob=0.000


In [8]:
# Test Spark ONNX
preds, probs = predict_onnx("loan_default_spark_xgb_model.onnx", test_cases)
print("\nSpark ONNX Predictions:")
for i, (p, pr) in enumerate(zip(preds, probs)):
    print(f"Case {i+1}: Pred={p}, Prob={pr:.3f}")


Spark ONNX Predictions:
Case 1: Pred=1, Prob=1.000
Case 2: Pred=0, Prob=0.000
Case 3: Pred=0, Prob=0.000
Case 4: Pred=0, Prob=0.000
Case 5: Pred=1, Prob=1.000
Case 6: Pred=1, Prob=1.000
