In [1]:
import os
import sys
from pathlib import Path

print("Current working directory:")
print(os.getcwd())

cwd = Path.cwd()
print("\nParents of CWD:")
for i, p in enumerate(cwd.parents):
    print(f"{i}: {p}")

print("\nInitial sys.path (first 5):")
for p in sys.path[:5]:
    print(p)
# Resolve project root: wfa_xgb_cvd_prediction

Current working directory:
c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\notebooks

Parents of CWD:
0: c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction
1: c:\Users\dhanu\OneDrive\Desktop\CD_Main
2: c:\Users\dhanu\OneDrive\Desktop
3: c:\Users\dhanu\OneDrive
4: c:\Users\dhanu
5: c:\Users
6: c:\

Initial sys.path (first 5):
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\python311.zip
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\DLLs
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\Lib
C:\Users\dhanu\AppData\Local\Programs\Python\Python311
c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\wfa_xgb_env


In [2]:
# ---- Project path fix (DO NOT SKIP) ----
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parents[0]  # ✅ VERIFIED CORRECT

if not (PROJECT_ROOT / "src").exists():
    raise RuntimeError(f"'src' not found at {PROJECT_ROOT}")

sys.path.insert(0, str(PROJECT_ROOT))

print("✅ Project root set to:", PROJECT_ROOT)

✅ Project root set to: c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction


In [3]:
from src.config.paths import (
    HEART_VERIFIED_CSV,
    BASELINE_RESULTS_CSV,
    WFA_FEATURE_WEIGHTS_CSV,
    FEATURE_AUGMENTED_WEIGHTS_CSV,
    BASELINE_MODEL_PKL,
    WFA_XGB_MODEL_JSON
)

In [4]:
import pandas as pd
import numpy as np

from xgboost import XGBClassifier

from src.data.load_data import load_dataset
from src.data.split_data import split_data
from src.features.mutual_information import compute_mutual_information
from src.features.shap_extractor import SHAPExtractor

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [5]:
X, y = load_dataset(
    path="../data/processed/heart_Verified.csv",
    target_col="target"
)

X.shape, y.shape

((1548, 11), (1548,))

In [6]:
%pip install shap xgboost scikit-learn pandas numpy matplotlib

Note: you may need to restart the kernel to use updated packages.


In [7]:
xgb_model = XGBClassifier(
    n_estimators=300,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss",
    random_state=42
)

xgb_model.fit(X, y)

In [8]:
shap_extractor = SHAPExtractor(xgb_model)

global_importance = shap_extractor.global_importance(X)
global_importance = global_importance.astype(float)

global_importance.sort_values(ascending=False).head(10)

slope       0.896852
cp          0.685588
restecg     0.341422
oldpeak     0.331244
thalach     0.235060
exang       0.181895
age         0.172537
trestbps    0.163201
chol        0.151310
sex         0.125238
Name: shap_importance, dtype: float64

In [9]:
mi_table = compute_mutual_information(X, y)
mi_table = mi_table.astype(float)

mi_table.sort_values(ascending=False).head(10)


slope       0.128619
cp          0.052018
exang       0.020089
oldpeak     0.019716
restecg     0.018155
trestbps    0.014690
sex         0.010855
age         0.000000
chol        0.000000
fbs         0.000000
Name: mutual_information, dtype: float64

In [10]:
LAMBDA_WFA = 0.6

shap_norm = (global_importance - global_importance.min()) / (
    global_importance.max() - global_importance.min() + 1e-8
)

mi_norm = (mi_table - mi_table.min()) / (
    mi_table.max() - mi_table.min() + 1e-8
)

feature_augmented_weights = (
    LAMBDA_WFA * shap_norm +
    (1 - LAMBDA_WFA) * mi_norm
)

feature_augmented_weights.name = "wfa_weight"

feature_augmented_weights.sort_values(ascending=False).head(10)

slope       1.000000
cp          0.612684
restecg     0.264488
oldpeak     0.262157
exang       0.157920
thalach     0.132963
trestbps    0.127937
sex         0.089219
age         0.088840
chol        0.073860
Name: wfa_weight, dtype: float64

In [11]:
os.makedirs("../experiments", exist_ok=True)

feature_augmented_weights.to_csv(
    "../experiments/feature_augmented_weights.csv"
)

print("✅ Feature-augmented WFA weights saved successfully.")


✅ Feature-augmented WFA weights saved successfully.
