In [None]:
"""
================================================================================
Machine Learning Assignment - 2  
================================================================================
BITS ID: 2025aa05047
Name: BRAJESH KUMAR
Email: 2025aa05047@wilp.bits-pilani.ac.in
Date: 26-01-2026

================================================================================
"""

In [None]:
# Install Required Libraries and Import Libraries
!pip install pandas numpy scikit-learn xgboost joblib
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import (accuracy_score, roc_auc_score, precision_score,
    recall_score, f1_score, matthews_corrcoef)


In [None]:
# Load Dataset
df = pd.read_csv("heart.csv")

print(df.shape)
df.head()

In [None]:
#Separate Features and Target
X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]
categorical_cols = X.select_dtypes(include="object").columns
numerical_cols = X.select_dtypes(exclude="object").columns

print("Categorical Columns:", categorical_cols)
print("Numerical Columns:", numerical_cols)

In [None]:
# Trainâ€“Test Split
X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
# Feature Scaling (IMPORTANT)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# STEP-BY-STEP MODEL IMPLEMENTATION

In [None]:
# Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)


In [None]:
# Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)


In [None]:
# K-Nearest Neighbor Classifier (KNN)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)


In [None]:
# Naive Bayes Classifier (Gaussian) 

nb = GaussianNB()
nb.fit(X_train, y_train)


In [None]:
#  Ensemble Model- Random Forest

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)


In [None]:
# Ensemble Model - XGBoost

xgb = XGBClassifier(eval_metric="logloss", random_state=42)
xgb.fit(X_train, y_train)

In [None]:
# EVALUATION METRICS

In [None]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    return {
        "Accuracy": accuracy_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_prob),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred),
        "MCC": matthews_corrcoef(y_test, y_pred)
    }

In [None]:
# Re-train ALL models
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
knn.fit(X_train, y_train)
nb.fit(X_train, y_train)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)


In [None]:
# Evaluate ALL Models
models = {
    "Logistic Regression": lr,
    "Decision Tree": dt,
    "KNN": knn,
    "Naive Bayes": nb,
    "Random Forest": rf,
    "XGBoost": xgb
}

results = {}

for name, model in models.items():
    results[name] = evaluate_model(model, X_test, y_test)

results_df = pd.DataFrame(results).T
results_df

In [None]:
#Display Results Table (For README/PDF)
print(results_df.round(3))

In [None]:
# STEP-BY-STEP: SAVE MODELS (FOR STREAMLIT)

import joblib
import os

os.makedirs("model", exist_ok=True)

for name, model in models.items():
    joblib.dump(model, f"model/{name.replace(' ', '_')}.pkl")

joblib.dump(scaler, "model/scaler.pkl")


In [None]:
print(xgb)

In [None]:
!pip install streamlit

In [None]:
%cd "E:\ML-Assignment-2-Classification\project-folder"
!streamlit run app.py
#!streamlit run app.py --server.headless true --server.port 8502