In [None]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Importing Classification models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Page Configuration
st.set_page_config(page_title="Heart Disease Predictor", page_icon="ü´Ä", layout="wide")

st.title("ü´Ä Heart Disease Prediction & Model Comparison")
st.markdown("""
Upload your dataset to train the models, then use the sidebar to input your own data for a live prediction!
""")

# --- SIDEBAR: FILE UPLOAD & USER INPUT ---
st.sidebar.header("üìÅ 1. Dataset Upload")
uploaded_file = st.sidebar.file_uploader("Upload 'heart.csv'", type=["csv"])

st.sidebar.divider()
st.sidebar.header("üë§ 2. Manual Patient Input")

def get_user_input(features):
    """Dynamically creates input widgets based on dataset columns"""
    user_data = {}
    for col in features:
        # Check if the column is likely categorical or continuous
        if col in ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']:
            val = st.sidebar.number_input(f"Enter {col}", value=0, step=1)
        else:
            val = st.sidebar.number_input(f"Enter {col}", value=0.0)
        user_data[col] = val
    return pd.DataFrame(user_data, index=[0])

# --- MAIN LOGIC ---
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    st.subheader("üìä Dataset Preview")
    st.dataframe(df.head(), use_container_width=True)

    if 'target' in df.columns:
        X = df.drop('target', axis=1)
        y = df['target']

        # Get User Input from Sidebar based on CSV columns
        user_input_df = get_user_input(X.columns)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Scale the user input too!
        user_input_scaled = scaler.transform(user_input_df)

        models = {
            "Logistic Regression": LogisticRegression(),
            "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
            "Decision Tree": DecisionTreeClassifier(random_state=42),
            "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
            "Naive Bayes": GaussianNB(),
            "SVC": SVC(probability=True), # probability=True for prediction styles
            "Gradient Boosting": GradientBoostingClassifier(random_state=42)
        }

        st.divider()

        if st.button("üöÄ Train Models & Predict"):
            with st.spinner('Calculating...'):
                results = []
                trained_models = {}

                for name, model in models.items():
                    model.fit(X_train_scaled, y_train)
                    trained_models[name] = model # Save trained model

                    predictions = model.predict(X_test_scaled)
                    accuracy = accuracy_score(y_test, predictions)
                    results.append({"Model": name, "Accuracy (%)": round(accuracy * 100, 2)})

                results_df = pd.DataFrame(results).sort_values(by="Accuracy (%)", ascending=False)

                # --- UI: Display Comparison ---
                col1, col2 = st.columns(2)
                with col1:
                    st.write("### üèÜ Model Ranking")
                    st.dataframe(results_df, use_container_width=True)
                with col2:
                    st.write("### üìà Accuracy Comparison")
                    st.bar_chart(results_df.set_index("Model")['Accuracy (%)'])

                # --- UI: Display Prediction for User Input ---
                st.divider()
                st.subheader("üîÆ Prediction for Your Input")

                # We use the best performing model for the prediction
                best_model_name = results_df.iloc[0]['Model']
                best_model = trained_models[best_model_name]

                prediction = best_model.predict(user_input_scaled)
                prediction_proba = best_model.predict_proba(user_input_scaled)

                p_col1, p_col2 = st.columns(2)

                with p_col1:
                    st.metric(label="Model Used", value=best_model_name)
                    if prediction[0] == 1:
                        st.error("Result: Heart Disease Detected")
                    else:
                        st.success("Result: No Heart Disease Detected")

                with p_col2:
                    st.write("### Confidence Level")
                    conf = prediction_proba[0][1] if prediction[0] == 1 else prediction_proba[0][0]
                    st.write(f"The model is **{round(conf*100, 2)}%** confident in this result.")

    else:
        st.error("‚ö†Ô∏è Your CSV must contain a 'target' column.")
else:
    st.info("üëà Please upload your 'heart.csv' file and enter patient data in the sidebar.")