In [6]:
# 📦 Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
import joblib
import pickle



In [7]:
# 📄 Step 2: Load dataset
df = pd.read_csv("framingham_heart_study.csv")
df.drop(columns=["education"], inplace=True)  # optional


In [8]:
# 🔍 Step 3: Handle missing values
df = df.dropna()  # Safe for now
print("✅ Final check - any missing values left?")
print(df.isnull().sum().sum())  # Should be 0


✅ Final check - any missing values left?
0


In [9]:
# 🎯 Step 4: Define features and target
X = df.drop("TenYearCHD", axis=1)
y = df["TenYearCHD"]


In [10]:
# ⚖️ Step 5: Balance dataset using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)


In [11]:
# 🧪 Step 6: Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42)


In [12]:
# 🚀 Step 7: Train LightGBM
model = LGBMClassifier(random_state=42)
model.fit(X_train, y_train)


[LightGBM] [Info] Number of positive: 2537, number of negative: 2549
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001337 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1911
[LightGBM] [Info] Number of data points in the train set: 5086, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498820 -> initscore=-0.004719
[LightGBM] [Info] Start training from score -0.004719


In [13]:
# 📊 Step 8: Evaluate
y_pred = model.predict(X_test)
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("\n🩺 Classification Report:")
print(classification_report(y_test, y_pred))


✅ Accuracy: 87.58%

🩺 Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.92      0.88       630
           1       0.91      0.84      0.87       642

    accuracy                           0.88      1272
   macro avg       0.88      0.88      0.88      1272
weighted avg       0.88      0.88      0.88      1272



In [15]:
# Save the trained LightGBM model to a file
with open("heart_model.pkl", "wb") as f:
    pickle.dump(model, f)