# Net Benefit (Decision Curve Analysis)

Import Data + Models + Libraries/Packages

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../'))
from src.build_dnn_model import build_nn_model
import matplotlib.pyplot as plt
from statkit.decision import NetBenefitDisplay
import pandas as pd
import joblib


## Data
y_test = (pd.read_excel('../data/raw/split/Raw_y_test.xlsx'))['ORN']
ml_X_test = pd.read_parquet('../data/processed/ml_test_transformed.parquet')
nomo_X_test = pd.read_parquet('../data/processed/nomo_test_transformed.parquet')

## Models
lightgbm_clf = joblib.load('../models/LightGBM.joblib')
svc_clf = joblib.load('../models/SVC.joblib')
knn_clf = joblib.load('../models/KNN.joblib')
dnn_clf = joblib.load('../models/DNN.joblib')
stack_clf = joblib.load('../models/stack.joblib')
nomo_clf = joblib.load('../models/NLR.joblib')

Construct plot

In [None]:
# Define models, names, and colors
models = [stack_clf, dnn_clf, knn_clf, lightgbm_clf, svc_clf, nomo_clf]
model_names = ['Stack',
               'DNN',
               'KNN',
               'LightGBM',
               'SVC',
               'Nomogram (LR)']

color_list = [
"tab:blue", "tab:orange", "tab:green", "tab:purple", "tab:red",
"tab:olive", "tab:pink", "tab:gray", "tab:olive", "tab:cyan"
]
plt.figure(figsize=(10, 8))
ax = plt.gca()

for model, name, color in zip(models, model_names, color_list):
    if name == "Nomogram":
        y_pred_proba = model.predict_proba(nomo_X_test)[:,1]
    else:
        y_pred_proba = model.predict_proba(ml_X_test)[:,1]
    NetBenefitDisplay.from_predictions(
        y_test, 
        y_pred_proba, 
        name=name,
        ax=ax,
    )

lines = [line for line in ax.get_lines() if line.get_label() in model_names]
for line, color in zip(lines, color_list):
    line.set_color(color)


# Get all handles and labels
handles, labels = ax.get_legend_handles_labels()
by_label = dict(zip(labels, handles))  # Remove duplicates

# # Separate baselines from models
baseline_labels = ["Always act", "Never act", "Oracle"]
model_labels = [name for name in model_names if name in by_label]

# # Models first, then baselines
ordered_labels = model_labels + baseline_labels

# # Create ordered handles and labels
ordered_handles = [by_label[label] for label in ordered_labels if label in by_label]
final_labels = [label for label in ordered_labels if label in by_label]

ax.legend(ordered_handles, final_labels, loc="upper right")

plt.title("Decision Curve Analysis: Model Comparison")
plt.ylim(-0.15, 0.27)


plt.savefig('../results/figures/DCA.pdf', bbox_inches='tight')
plt.show()
