In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load dataset
# Use the correct relative path to the Data folder
data = pd.read_csv("../Data/heart_2020_cleaned.csv")

# Encode all object (string) columns automatically
le = LabelEncoder()
for col in data.select_dtypes(include=['object']).columns:
    data[col] = le.fit_transform(data[col])

# Drop rows with missing values if any
data = data.dropna()

# Identify categorical and numeric columns
def get_column_types(df):
    cat_cols = []
    num_cols = []
    for col in df.columns:
        if df[col].dtype == 'object' or df[col].nunique() < 10:
            cat_cols.append(col)
        else:
            num_cols.append(col)
    return cat_cols, num_cols

cat_cols, num_cols = get_column_types(data.drop('HeartDisease', axis=1))

# Prepare options for categorical columns
def get_options(df, col):
    vals = df[col].unique()
    return sorted([str(v) for v in vals])

labels = list(data.drop('HeartDisease', axis=1).columns)
input_widgets = []

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data.drop('HeartDisease', axis=1), data['HeartDisease'], test_size=0.2, random_state=42)

# Model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9046107662721431
              precision    recall  f1-score   support

           0       0.92      0.98      0.95     58367
           1       0.36      0.12      0.18      5592

    accuracy                           0.90     63959
   macro avg       0.64      0.55      0.56     63959
weighted avg       0.87      0.90      0.88     63959



In [7]:
# Heart Disease Predictor User Interface
# This section demonstrates a dynamic Tkinter GUI for heart disease prediction. The interface automatically generates dropdowns for categorical features (with Yes/No for binary) and entry fields for numeric features, based on the dataset.

import tkinter as tk
from tkinter import messagebox
import pandas as pd
import joblib

# Load the dataset
try:
    data = pd.read_csv("../Data/heart_2020_cleaned.csv")
except Exception:
    data = None

# Load the model (assume model is trained and saved as 'model.pkl')
model = None
try:
    model = joblib.load("model.pkl")
except Exception:
    pass  # For UI demo purposes if model is not present

# Identify categorical and numeric columns
def get_column_types(df):
    cat_cols = []
    num_cols = []
    for col in df.columns:
        if df[col].dtype == 'object' or df[col].nunique() < 10:
            cat_cols.append(col)
        else:
            num_cols.append(col)
    return cat_cols, num_cols

cat_cols, num_cols = get_column_types(data.drop('HeartDisease', axis=1))

# Prepare options for categorical columns, with Yes/No mapping for binary
def get_options(df, col):
    vals = sorted(df[col].unique())
    if len(vals) == 2:
        # Try to map to Yes/No if possible
        if set(vals) == {0, 1}:
            return ["No", "Yes"], {"No": 0, "Yes": 1}
        elif set(str(v).lower() for v in vals) == {"no", "yes"}:
            return ["No", "Yes"], {"No": "No", "Yes": "Yes"}
        else:
            return [str(v) for v in vals], {str(v): v for v in vals}
    else:
        return [str(v) for v in vals], {str(v): v for v in vals}

labels = list(data.drop('HeartDisease', axis=1).columns)
input_widgets = []
option_maps = {}

# Build the Tkinter GUI
def build_gui():
    root = tk.Tk()
    root.title("Heart Disease Predictor")
    root.geometry(f"520x{44*len(labels)+140}")
    root.configure(bg="#f5f5f5")

    header = tk.Label(root, text="Heart Disease Predictor", font=("Arial", 20, "bold"), bg="#f5f5f5", fg="#1976D2")
    header.pack(pady=(20, 10))

    desc = tk.Label(root, text="Enter your health details below and click Predict.", font=("Arial", 12), bg="#f5f5f5")
    desc.pack(pady=(0, 10))

    frame = tk.Frame(root, padx=20, pady=10, bg="#f5f5f5")
    frame.pack()

    for i, label in enumerate(labels):
        tk.Label(frame, text=label, anchor="w", width=22, font=("Arial", 10), bg="#f5f5f5").grid(row=i, column=0, sticky="w", pady=4)
        if label in cat_cols:
            opts, opt_map = get_options(data, label)
            var = tk.StringVar(value="Select")
            opt = tk.OptionMenu(frame, var, *opts)
            opt.config(width=20, font=("Arial", 10), bg="#f5f5f5", relief="groove", highlightthickness=1)
            opt.var = var
            opt.grid(row=i, column=1, pady=4)
            input_widgets.append(opt)
            option_maps[label] = opt_map
        else:
            entry = tk.Entry(frame, width=22, font=("Arial", 10))
            entry.grid(row=i, column=1, pady=4)
            input_widgets.append(entry)

    def animate_button(btn):
        def grow():
            btn.config(font=("Arial", 14, "bold"))
            btn.after(120, shrink)
        def shrink():
            btn.config(font=("Arial", 12, "bold"))
        grow()

    def predict():
        animate_button(predict_btn)
        try:
            vals = []
            for i, widget in enumerate(input_widgets):
                label = labels[i]
                if isinstance(widget, tk.Entry):
                    val = widget.get()
                    if val == "":
                        raise ValueError(f"Missing value for {label}")
                    try:
                        val = float(val)
                    except ValueError:
                        raise ValueError(f"Invalid number for {label}")
                else:  # OptionMenu
                    val = widget.var.get()
                    if val == "" or val == "Select":
                        raise ValueError(f"Missing value for {label}")
                    # Map display value to code
                    val = option_maps[label][val]
                vals.append(val)
            if model:
                pred = model.predict([vals])[0]
                result = "⚠️ High Risk of Heart Disease" if pred == 1 else "✅ Low Risk"
                messagebox.showinfo("Prediction", result)
            else:
                messagebox.showinfo("Prediction", f"Inputs: {vals}\n(Model not loaded)")
        except ValueError as ve:
            messagebox.showerror("Input Error", str(ve))
        except Exception:
            messagebox.showerror("Error", "Please enter valid values for all fields.")

    predict_btn = tk.Button(
        root, text="Predict", command=predict,
        bg="#43A047", fg="white", font=("Arial", 12, "bold"),
        activebackground="#388E3C", activeforeground="white",
        width=20, height=2, bd=0, relief="ridge", cursor="hand2"
    )
    predict_btn.pack(pady=20)

    footer = tk.Label(root, text="Powered by Machine Learning", font=("Arial", 9), bg="#f5f5f5", fg="#888")
    footer.pack(side="bottom", pady=8)

    root.mainloop()

# Run the GUI
build_gui()