In [1]:
import pandas as pd
import joblib
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    confusion_matrix,
    classification_report
)
import numpy as np
import shap

In [2]:
# Testdata laden
X_test_scaled = pd.read_pickle('X_ontest_scaled.pkl')
y_test        = pd.read_pickle('y_ontest.pkl')

# Opgeslagen model en drempel inladen
lr            = joblib.load('nfinal_logreg_model.pkl')
opt_threshold = joblib.load('nfinal_logreg_threshold.pkl')


In [3]:
probs       = lr.predict_proba(X_test_scaled)[:,1]
y_pred_opt  = (probs >= opt_threshold).astype(int)

acc    = accuracy_score(y_test, y_pred_opt)
auc    = roc_auc_score(y_test, probs)
cm     = confusion_matrix(y_test, y_pred_opt)
report = classification_report(
    y_test, y_pred_opt,
    target_names=['Geen HVZ','HVZ']
)

print(f"Accuracy:  {acc:.3f}")
print(f"ROC AUC:   {auc:.3f}")
print("\nConfusion Matrix:")
print(f"TN: {cm[0,0]}  FP: {cm[0,1]}")
print(f"FN: {cm[1,0]}  TP: {cm[1,1]}")
print("\nClassification Report:")
print(report)


Accuracy:  0.864
ROC AUC:   0.936

Confusion Matrix:
TN: 33  FP: 5
FN: 3  TP: 18

Classification Report:
              precision    recall  f1-score   support

    Geen HVZ       0.92      0.87      0.89        38
         HVZ       0.78      0.86      0.82        21

    accuracy                           0.86        59
   macro avg       0.85      0.86      0.86        59
weighted avg       0.87      0.86      0.87        59



### Testen op 1 deelnemer

In [4]:
lr            = joblib.load('nfinal_logreg_model.pkl')
opt_threshold = joblib.load('nfinal_logreg_threshold.pkl')
scaler        = joblib.load('standard_scaler.pkl')


X_test_scaled = pd.read_pickle('X_ontest_scaled.pkl')  # moet alle preprocessing al hebben
y_test        = pd.read_pickle('y_ontest.pkl')

idx    = 0
x_row  = X_test_scaled.iloc[[idx]]    # DataFrame (1×n_features)
y_true = y_test.iloc[idx]

proba = lr.predict_proba(x_row)[:,1][0]
pred  = int(proba >= opt_threshold)

print("True label:            ", y_true)
print(f"Predicted probability: {proba:.3f}")
print(f"Predicted class (thr={opt_threshold:.3f}):", pred)


True label:             0
Predicted probability: 0.550
Predicted class (thr=0.672): 0


In [5]:
lr = joblib.load('nfinal_logreg_model.pkl')
X_train = pd.read_pickle('X_ontrain_scaled.pkl')
X_test  = pd.read_pickle('X_ontest_scaled.pkl')

idx = 0
x_row = X_test.iloc[[idx]]  # DataFrame shape (1, n_features)

explainer = shap.LinearExplainer(lr, X_train, feature_perturbation="interventional")
shap_vals = explainer.shap_values(x_row)  # array shape (1, n_features)

features = X_train.columns
contrib = shap_vals[0]  # bijdragen voor die ene rij
df_contrib = pd.DataFrame({
    'Feature': features,
    'Contribution': contrib,
    'Abs Contribution': np.abs(contrib)
})

df_top = df_contrib.sort_values('Abs Contribution', ascending=False).head(10).reset_index(drop=True)
print("Top 10 feature contributions for this prediction:\n")
print(df_top.to_string(index=False))


  from .autonotebook import tqdm as notebook_tqdm


Top 10 feature contributions for this prediction:

 Feature  Contribution  Abs Contribution
    cp_2     -0.490639          0.490639
    cp_4     -0.288981          0.288981
 oldpeak      0.211637          0.211637
     age      0.173509          0.173509
   exang     -0.149558          0.149558
     sex      0.139728          0.139728
 thalach     -0.062734          0.062734
    chol      0.051339          0.051339
     fbs     -0.030335          0.030335
trestbps      0.019350          0.019350


