In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler

# Define the evaluation function
def evaluation(Y_test, Y_pred):
    acc = accuracy_score(Y_test, Y_pred)
    rcl = recall_score(Y_test, Y_pred)
    f1 = f1_score(Y_test, Y_pred)

    metric_dict = {
        'accuracy': round(acc, 3),
        'recall': round(rcl, 3),
        'F1 score': round(f1, 3),
    }

    return print(metric_dict)

# Load the dataset
df = pd.read_csv("heart.csv")

# Preprocessing and feature scaling
scaler = MinMaxScaler()
features = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']
df[features] = scaler.fit_transform(df[features])

# Create features and target variable
X = df.drop("target", axis=1).values
Y = df.target.values

# Split the data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0, test_size=0.2)

# Define classifiers
classifiers = {
    'KNN': KNeighborsClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}

# Hyperparameter tuning using GridSearchCV
best_models = {}
for name, clf in classifiers.items():
    param_grid = {}  # Add hyperparameter grid for each classifier

    # Perform GridSearchCV
    grid_search = GridSearchCV(clf, param_grid, cv=5, verbose=True)
    grid_search.fit(X_train, Y_train)
    
    # Save the best model
    best_models[name] = grid_search.best_estimator_

    # Evaluate the model
    Y_pred = grid_search.predict(X_test)
    print(f"Best model for {name}:")
    evaluation(Y_test, Y_pred)

# Save the best model as .h5 file
import joblib
joblib.dump(best_models['Random Forest'], 'best_model.joblib')


Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best model for KNN:
{'accuracy': 0.859, 'recall': 0.832, 'F1 score': 0.86}
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best model for Random Forest:
{'accuracy': 1.0, 'recall': 1.0, 'F1 score': 1.0}
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best model for SVM:
{'accuracy': 0.902, 'recall': 0.907, 'F1 score': 0.907}


['best_model.h5']