In [1]:
import os
import sys
from pathlib import Path

print("Current working directory:")
print(os.getcwd())

cwd = Path.cwd()
print("\nParents of CWD:")
for i, p in enumerate(cwd.parents):
    print(f"{i}: {p}")

print("\nInitial sys.path (first 5):")
for p in sys.path[:5]:
    print(p)
# Resolve project root: wfa_xgb_cvd_prediction


Current working directory:
c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\notebooks

Parents of CWD:
0: c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction
1: c:\Users\dhanu\OneDrive\Desktop\CD_Main
2: c:\Users\dhanu\OneDrive\Desktop
3: c:\Users\dhanu\OneDrive
4: c:\Users\dhanu
5: c:\Users
6: c:\

Initial sys.path (first 5):
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\python311.zip
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\DLLs
C:\Users\dhanu\AppData\Local\Programs\Python\Python311\Lib
C:\Users\dhanu\AppData\Local\Programs\Python\Python311
c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\wfa_xgb_env


In [2]:
# ---- Project path fix (DO NOT SKIP) ----
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parents[0]  # ✅ VERIFIED CORRECT

if not (PROJECT_ROOT / "src").exists():
    raise RuntimeError(f"'src' not found at {PROJECT_ROOT}")

sys.path.insert(0, str(PROJECT_ROOT))

print("✅ Project root set to:", PROJECT_ROOT)

✅ Project root set to: c:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction


In [3]:
from src.config.paths import (
    HEART_VERIFIED_CSV,
    BASELINE_RESULTS_CSV,
    WFA_FEATURE_WEIGHTS_CSV,
    FEATURE_AUGMENTED_WEIGHTS_CSV,
    BASELINE_MODEL_PKL,
    WFA_XGB_MODEL_JSON
)

In [4]:
import pandas as pd
import numpy as np

from src.data.load_data import load_dataset
from src.data.split_data import split_data
from src.models.wfa_xgb import WFAXGB
from src.models.baseline_models import BaselineModels

In [5]:
X, y = load_dataset(
    path="../data/processed/heart_Verified.csv",
    target_col="target"
)

In [6]:
X_train, X_val, X_test, y_train, y_val, y_test = split_data(
    X, y,
    test_size=0.2,
    stratify=True
)

X_train.shape, X_test.shape

((1238, 11), (272, 11))

In [7]:
feature_weights = pd.read_csv(
    "../experiments/feature_augmented_weights.csv",
    index_col=0
).squeeze()

feature_weights.head(10)

age         0.088840
sex         0.089219
cp          0.612684
trestbps    0.127937
chol        0.073860
fbs         0.000000
restecg     0.264488
thalach     0.132963
exang       0.157920
oldpeak     0.262157
Name: wfa_weight, dtype: float64

In [8]:
X_train_wfa = X_train * feature_weights
X_test_wfa = X_test * feature_weights


In [9]:
wfa_xgb = WFAXGB(
    lambda_wfa=0.6,
    random_state=42
)

wfa_xgb.fit(X_train, y_train)

<src.models.wfa_xgb.WFAXGB at 0x22df1e66890>

In [10]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score
)

y_pred = wfa_xgb.predict(X_test)
y_prob = wfa_xgb.predict_proba(X_test)[:, 1]

wfa_results = {
    "accuracy": accuracy_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred),
    "recall": recall_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred),
    "roc_auc": roc_auc_score(y_test, y_prob)
}

wfa_results

  _warn_prf(average, modifier, msg_start, len(result))


{'accuracy': 0.4852941176470588,
 'precision': 0.0,
 'recall': 0.0,
 'f1_score': 0.0,
 'roc_auc': 0.5}

In [11]:
baseline = BaselineModels()
baseline.train(X_train, y_train)

baseline_results = baseline.evaluate(X_test, y_test)

baseline_df = pd.DataFrame(baseline_results).T
baseline_df

Unnamed: 0,accuracy,precision,recall,f1_score,roc_auc
logistic_regression,0.727941,0.757812,0.692857,0.723881,0.783929
random_forest,0.8125,0.82963,0.8,0.814545,0.892208
xgboost,0.801471,0.830769,0.771429,0.8,0.876677


In [12]:
os.makedirs("../experiments", exist_ok=True)
os.makedirs("../models/baselines", exist_ok=True)

baseline_df.to_csv("../experiments/baseline_results.csv")
baseline.save("../models/baselines/baseline_models.pkl")

print("✅ All training artifacts saved successfully.")

✅ All training artifacts saved successfully.


In [13]:
from src.config.paths import WFA_XGB_MODEL_JSON
from pathlib import Path

# Ensure directory exists
Path(WFA_XGB_MODEL_JSON).parent.mkdir(parents=True, exist_ok=True)

# Save trained XGBoost model
wfa_xgb.model.save_model(WFA_XGB_MODEL_JSON)

print("✅ WFA-XGB model saved to:", WFA_XGB_MODEL_JSON)

✅ WFA-XGB model saved to: C:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\models\wfa_xgb\wfa_xgb_model.json


In [14]:
import numpy as np

# feature_weights: Series indexed by feature name
# X_train: DataFrame

sample_weights = (
    X_train.abs()
    .mul(feature_weights, axis=1)
    .sum(axis=1)
)

# normalize
sample_weights = sample_weights / sample_weights.mean()

sample_weights.describe()
print("\nUpdated sys.path (first 5):")


Updated sys.path (first 5):


In [15]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

wfa_xgb_safe = XGBClassifier(
    n_estimators=600,
    max_depth=5,
    learning_rate=0.03,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric="logloss",
    random_state=42
)

wfa_xgb_safe.fit(
    X_train,
    y_train,
    sample_weight=sample_weights
)

y_pred = wfa_xgb_safe.predict(X_test)

wfa_safe_acc = accuracy_score(y_test, y_pred)
wfa_safe_acc

0.8125

In [16]:
# ============================================================
# FINAL MODEL EXPORT — DO NOT MODIFY ABOVE CELLS
# ============================================================

from xgboost import XGBClassifier
from src.config.paths import WFA_XGB_MODEL_JSON

# Re-train WFA-XGB on CURRENT dataset & weights
wfa_xgb_safe = XGBClassifier(
    n_estimators=600,
    max_depth=5,
    learning_rate=0.03,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric="logloss",
    random_state=42
)

wfa_xgb_safe.fit(
    X_train,
    y_train,
    sample_weight=sample_weights
)

# Persist model
wfa_xgb_safe.save_model(WFA_XGB_MODEL_JSON)

print("✅ WFA-XGB model retrained and saved to:", WFA_XGB_MODEL_JSON)


✅ WFA-XGB model retrained and saved to: C:\Users\dhanu\OneDrive\Desktop\CD_Main\wfa_xgb_cvd_prediction\models\wfa_xgb\wfa_xgb_model.json
