In [1]:
import os
import sys
from pathlib import Path

print("Current working directory:")
print(os.getcwd())

cwd = Path.cwd()
print("\nParents of CWD:")
for i, p in enumerate(cwd.parents):
    print(f"{i}: {p}")

print("\nInitial sys.path (first 5):")
for p in sys.path[:5]:
    print(p)
# Resolve project root: wfa_xgb_cvd_prediction

Current working directory:
c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\notebooks

Parents of CWD:
0: c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction
1: c:\Users\dhanu\OneDrive\Desktop\CD_Main
2: c:\Users\dhanu\OneDrive\Desktop
3: c:\Users\dhanu\OneDrive
4: c:\Users\dhanu
5: c:\Users
6: c:\

Initial sys.path (first 5):
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\python311.zip
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\DLLs
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\Lib
C:\Users\dhanu\AppData\Local\Programs\Python\Python311
c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\wfa_xgb_env


In [2]:
# ---- Project path fix (DO NOT SKIP) ----
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parents[0]  # ✅ VERIFIED CORRECT

if not (PROJECT_ROOT / "src").exists():
    raise RuntimeError(f"'src' not found at {PROJECT_ROOT}")

sys.path.insert(0, str(PROJECT_ROOT))

print("✅ Project root set to:", PROJECT_ROOT)

✅ Project root set to: c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction


In [3]:
from src.config.paths import (
    HEART_VERIFIED_CSV,
    BASELINE_RESULTS_CSV,
    WFA_FEATURE_WEIGHTS_CSV,
    FEATURE_AUGMENTED_WEIGHTS_CSV,
    BASELINE_MODEL_PKL,
    WFA_XGB_MODEL_JSON
)


In [4]:
import pandas as pd
import numpy as np

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score
)

from xgboost import XGBClassifier

from src.data.load_data import load_dataset
from src.data.split_data import split_data
from src.features.mutual_information import compute_mutual_information
from src.features.shap_extractor import SHAPExtractor
from src.models.wfa_xgb import WFAXGB


Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [5]:
X, y = load_dataset(
    PROJECT_ROOT / "data/processed/heart_Verified.csv",
    target_col="target"
)

X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)


In [6]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    return {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred),
        "recall": recall_score(y_test, y_pred),
        "f1_score": f1_score(y_test, y_pred),
        "roc_auc": roc_auc_score(y_test, y_prob),
    }


In [7]:
baseline_xgb = XGBClassifier(
    n_estimators=300,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss",
    random_state=42
)

baseline_xgb.fit(X_train, y_train)

results = []
res = evaluate_model(baseline_xgb, X_test, y_test)
res["variant"] = "Baseline XGB"
results.append(res)


In [8]:
mi_scores = compute_mutual_information(X_train, y_train)
mi_norm = (mi_scores - mi_scores.min()) / (mi_scores.max() - mi_scores.min() + 1e-8)

X_train_mi = X_train * mi_norm
X_test_mi = X_test * mi_norm

mi_xgb = XGBClassifier(**baseline_xgb.get_params())
mi_xgb.fit(X_train_mi, y_train)

res = evaluate_model(mi_xgb, X_test_mi, y_test)
res["variant"] = "MI-XGB"
results.append(res)


In [9]:
mi_scores = compute_mutual_information(X_train, y_train)
mi_norm = (mi_scores - mi_scores.min()) / (mi_scores.max() - mi_scores.min() + 1e-8)

X_train_mi = X_train * mi_norm
X_test_mi = X_test * mi_norm

mi_xgb = XGBClassifier(**baseline_xgb.get_params())
mi_xgb.fit(X_train_mi, y_train)

res = evaluate_model(mi_xgb, X_test_mi, y_test)
res["variant"] = "MI-XGB"
results.append(res)


In [10]:
wfa_xgb = WFAXGB()
wfa_xgb.fit(X_train, y_train)

res = evaluate_model(wfa_xgb, X_test, y_test)
res["variant"] = "WFA-XGB"
results.append(res)


Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.


In [11]:
ablation_df = pd.DataFrame(results)
ablation_df = ablation_df.set_index("variant")
ablation_df


Unnamed: 0_level_0,accuracy,precision,recall,f1_score,roc_auc
variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Baseline XGB,0.797794,0.824427,0.771429,0.797048,0.865855
MI-XGB,0.772059,0.791045,0.757143,0.773723,0.858063
MI-XGB,0.772059,0.791045,0.757143,0.773723,0.858063
WFA-XGB,0.485294,0.0,0.0,0.0,0.5


In [12]:
import os
os.makedirs(PROJECT_ROOT / "experiments", exist_ok=True)

ablation_df.to_csv(
    PROJECT_ROOT / "experiments/ablation_results.csv"
)

print("✅ Ablation study saved")


✅ Ablation study saved
