In [3]:
import pandas as pd

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"

cols = [
    "age","sex","cp","trestbps","chol","fbs","restecg",
    "thalach","exang","oldpeak","slope","ca","thal","target"
]

df = pd.read_csv(url, names=cols)

# Convert target to binary (0 = No Disease, 1 = Disease)
df["target"] = df["target"].apply(lambda x: 1 if x > 0 else 0)

# Replace ? with median
df = df.replace("?", pd.NA)
df = df.apply(pd.to_numeric, errors="coerce")
df = df.fillna(df.median())

df.to_csv("heart.csv", index=False)

print("heart.csv created successfully")
df.head()


heart.csv created successfully


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,1
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [4]:
# =========================
# ML Assignment - 6 Models
# =========================

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, confusion_matrix, classification_report

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# -------------------------
# Load Dataset
# -------------------------
from google.colab import files

# Check if 'heart.csv' exists, if not, prompt for upload
try:
    with open('heart.csv', 'r') as f:
        pass
except FileNotFoundError:
    print("heart.csv not found. Please upload the file.")
    uploaded = files.upload()
    if 'heart.csv' not in uploaded:
        raise FileNotFoundError("heart.csv was not uploaded. Please ensure you upload the correct file.")

df = pd.read_csv("heart.csv")   # Put dataset in same folder

X = df.drop("target", axis=1)
y = df["target"]

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling (important for LR & KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -------------------------
# Function to evaluate
# -------------------------
def evaluate_model(model, name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]

    print("\n=========", name, "=========")

    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("AUC:", roc_auc_score(y_test, y_prob))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))
    print("MCC:", matthews_corrcoef(y_test, y_pred))

    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))


# -------------------------
# Models
# -------------------------

# 1 Logistic Regression
evaluate_model(LogisticRegression(), "Logistic Regression")

# 2 Decision Tree
evaluate_model(DecisionTreeClassifier(), "Decision Tree")

# 3 KNN
evaluate_model(KNeighborsClassifier(n_neighbors=5), "KNN")

# 4 Naive Bayes
evaluate_model(GaussianNB(), "Naive Bayes")

# 5 Random Forest
evaluate_model(RandomForestClassifier(n_estimators=100), "Random Forest")

# 6 XGBoost
evaluate_model(XGBClassifier(use_label_encoder=False, eval_metric='logloss'), "XGBoost")



Accuracy: 0.8852459016393442
AUC: 0.9213362068965517
Precision: 0.8787878787878788
Recall: 0.90625
F1 Score: 0.8923076923076924
MCC: 0.7699801976524022

Confusion Matrix:
 [[25  4]
 [ 3 29]]

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.86      0.88        29
           1       0.88      0.91      0.89        32

    accuracy                           0.89        61
   macro avg       0.89      0.88      0.88        61
weighted avg       0.89      0.89      0.89        61


Accuracy: 0.7540983606557377
AUC: 0.7543103448275862
Precision: 0.7741935483870968
Recall: 0.75
F1 Score: 0.7619047619047619
MCC: 0.5080734913368186

Confusion Matrix:
 [[22  7]
 [ 8 24]]

Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.76      0.75        29
           1       0.77      0.75      0.76        32

    accuracy                           0.75        61
   macro avg       0.75 

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [5]:
import streamlit as st
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

st.title("Heart Disease Prediction - ML Models")

# Upload CSV
uploaded_file = st.file_uploader("Upload CSV dataset", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)

    if "target" not in df.columns:
        st.error("Dataset must contain 'target' column")
    else:
        X = df.drop("target", axis=1)
        y = df["target"]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        model_name = st.selectbox(
            "Select Model",
            ["Logistic Regression", "Decision Tree", "KNN", "Naive Bayes", "Random Forest", "XGBoost"]
        )

        if model_name == "Logistic Regression":
            model = LogisticRegression()
        elif model_name == "Decision Tree":
            model = DecisionTreeClassifier()
        elif model_name == "KNN":
            model = KNeighborsClassifier()
        elif model_name == "Naive Bayes":
            model = GaussianNB()
        elif model_name == "Random Forest":
            model = RandomForestClassifier()
        else:
            model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:,1]

        st.subheader("Evaluation Metrics")

        st.write("Accuracy:", accuracy_score(y_test, y_pred))
        st.write("AUC:", roc_auc_score(y_test, y_prob))
        st.write("Precision:", precision_score(y_test, y_pred))
        st.write("Recall:", recall_score(y_test, y_pred))
        st.write("F1 Score:", f1_score(y_test, y_pred))
        st.write("MCC:", matthews_corrcoef(y_test, y_pred))

        st.subheader("Confusion Matrix")
        st.write(confusion_matrix(y_test, y_pred))


ModuleNotFoundError: No module named 'streamlit'