In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
from sklearn.calibration import CalibratedClassifierCV
import joblib
import os
import warnings


# Load dataset
df = pd.read_csv("C:\\Users\\jasmi\\Downloads\\framingham_heart_study.csv")
df.drop(columns=["education"], inplace=True)
df.dropna(inplace=True)

from sklearn.preprocessing import StandardScaler

# Define features and target
X = df.drop("TenYearCHD", axis=1)
y = df["TenYearCHD"]

# Scale and retain feature names
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Balance with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)



# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# Train LGBM with balanced class weights
base_model = LGBMClassifier(
    random_state=42,
    class_weight='balanced',
    n_estimators=200,
    max_depth=5,
    learning_rate=0.05
)

# Calibrate output using isotonic regression
calibrated_model = CalibratedClassifierCV(base_model, method='isotonic', cv=5)
calibrated_model.fit(X_train, y_train)

# Evaluate
y_pred = calibrated_model.predict(X_test)
y_proba = calibrated_model.predict_proba(X_test)[:, 1]

print(f"✅ Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print(f"✅ ROC AUC: {roc_auc_score(y_test, y_proba):.4f}")
print("\n🩺 Classification Report:")
print(classification_report(y_test, y_pred))

[LightGBM] [Info] Number of positive: 2034, number of negative: 2034
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000546 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2135
[LightGBM] [Info] Number of data points in the train set: 4068, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2034, number of negative: 2035
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000631 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2135
[LightGBM] [Info] Number of data points in the train set: 4069, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 2034, number of negative: 2035
[Lig

In [7]:
import os
import joblib

# Absolute path to your Desktop heart_risk folder
desktop_path = r"C:\Users\jasmi\Desktop\heart_risk"
os.makedirs(desktop_path, exist_ok=True)

# Save the model
joblib.dump(calibrated_model, os.path.join(desktop_path, "heart_model_v2.pkl"))

# ✅ Save the scaler
joblib.dump(scaler, os.path.join(desktop_path, "scaler_v2.pkl"))

print("✅ Model and scaler saved directly to Desktop heart_risk folder.")




✅ Model and scaler saved directly to Desktop heart_risk folder.
