In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
# Use the correct relative path to the Data folder
data = pd.read_csv("../Data/heart_2020_cleaned.csv")

# Encode all object (string) columns automatically
le = LabelEncoder()
for col in data.select_dtypes(include=['object']).columns:
    data[col] = le.fit_transform(data[col])

# Drop rows with missing values if any
data = data.dropna()

data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0,16.6,1,0,0,3,30,0,0,7,5,2,1,4,5,1,0,1
1,0,20.34,0,0,1,0,0,0,0,12,5,0,1,4,7,0,0,0
2,0,26.58,1,0,0,20,30,0,1,9,5,2,1,1,8,1,0,0
3,0,24.21,0,0,0,0,0,0,0,11,5,0,0,2,6,0,0,1
4,0,23.71,0,0,0,28,0,1,0,4,5,0,1,4,8,0,0,0


In [3]:
# Features & Target
X = data.drop('HeartDisease', axis=1)
y = data['HeartDisease']  # Already encoded as 0/1

# Ensure all features are numeric (for safety)
if not all([pd.api.types.is_numeric_dtype(X[col]) for col in X.columns]):
    print("Non-numeric columns detected. Converting...")
    for col in X.columns:
        if not pd.api.types.is_numeric_dtype(X[col]):
            X[col] = pd.factorize(X[col])[0]

# Print dtypes for debugging
print("Feature dtypes:\n", X.dtypes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Feature dtypes:
 BMI                 float64
Smoking               int64
AlcoholDrinking       int64
Stroke                int64
PhysicalHealth        int64
MentalHealth          int64
DiffWalking           int64
Sex                   int64
AgeCategory           int64
Race                  int64
Diabetic              int64
PhysicalActivity      int64
GenHealth             int64
SleepTime             int64
Asthma                int64
KidneyDisease         int64
SkinCancer            int64
dtype: object
Accuracy: 0.9045169561750497
              precision    recall  f1-score   support

           0       0.92      0.98      0.95     58367
           1       0.36      0.12      0.18      5592

    accuracy                           0.90     63959
   macro avg       0.64      0.55      0.56     63959
weighted avg       0.87      0.90      0.88     63959



In [6]:
import tkinter as tk
from tkinter import messagebox

# --- GUI for Heart Disease Prediction ---

def predict():
    try:
        # Collect and validate user input
        vals = []
        for i, entry in enumerate(entries):
            val = entry.get()
            if val == "":
                raise ValueError(f"Missing value for {labels[i]}")
            vals.append(float(val))
        # Predict using the trained model
        pred = model.predict([vals])[0]
        result = "⚠️ High Risk of Heart Disease" if pred == 1 else "✅ Low Risk"
        messagebox.showinfo("Prediction", result)
    except ValueError as ve:
        messagebox.showerror("Input Error", str(ve))
    except Exception:
        messagebox.showerror("Error", "Please enter valid numbers for all fields.")

root = tk.Tk()
root.title("Heart Disease Predictor")
root.geometry(f"400x{30*len(X.columns)+60}")

labels = list(X.columns)
entries = []

frame = tk.Frame(root, padx=10, pady=10)
frame.pack()

# Create input fields for each feature
for i, label in enumerate(labels):
    tk.Label(frame, text=label, anchor="w", width=20).grid(row=i, column=0, sticky="w", pady=2)
    entry = tk.Entry(frame, width=20)
    entry.grid(row=i, column=1, pady=2)
    entries.append(entry)

tk.Button(frame, text="Predict", command=predict, bg="#4CAF50", fg="white", width=20).grid(row=len(labels), column=0, columnspan=2, pady=10)

root.mainloop()