In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("Heart_Disease_Prediction.csv")
df.columns = df.columns.str.strip() 

In [9]:
X = df.drop('Heart Disease', axis=1)
y = df['Heart Disease'].replace({'Absence': 0, 'Presence': 1})

In [11]:
X = pd.get_dummies(X, drop_first=True)

In [13]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [19]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

In [21]:
best_models = []

In [23]:
log_params = {'C': [0.01, 0.1, 1, 10, 100]}
log_grid = GridSearchCV(LogisticRegression(), log_params, cv=5)
log_grid.fit(X_train, y_train)
best_models.append(('Logistic Regression', log_grid.best_score_, log_grid.best_params_))


In [25]:
svm_params = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
svm_grid = GridSearchCV(SVC(), svm_params, cv=5)
svm_grid.fit(X_train, y_train)
best_models.append(('SVM', svm_grid.best_score_, svm_grid.best_params_))

In [27]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_acc = nb_model.score(X_test, y_test)
best_models.append(('Naive Bayes', nb_acc, 'Default'))

In [29]:
rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 10],
    'min_samples_split': [2, 5],
}
rf_grid = GridSearchCV(RandomForestClassifier(), rf_params, cv=5)
rf_grid.fit(X_train, y_train)
best_models.append(('Random Forest', rf_grid.best_score_, rf_grid.best_params_))

In [31]:
df_tuned = pd.DataFrame(best_models, columns=['Model', 'CV Accuracy', 'Best Params'])
print(df_tuned.sort_values(by='CV Accuracy', ascending=False))

                 Model  CV Accuracy  \
2          Naive Bayes     0.907407   
3        Random Forest     0.838478   
1                  SVM     0.829175   
0  Logistic Regression     0.824419   

                                         Best Params  
2                                            Default  
3  {'max_depth': 3, 'min_samples_split': 5, 'n_es...  
1                          {'C': 1, 'kernel': 'rbf'}  
0                                           {'C': 1}  
