In [41]:
#Importing libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
from scipy.stats import randint

np.random.seed(42)
#Importing datasets
heart = pd.read_csv('heartFailure.csv')
X = heart.drop('HeartDisease', axis=1)
y = heart.HeartDisease.astype(int)

#Separating categorical and numerical features
category_features = [col for col in X.columns if X[col].dropna().apply(lambda x: isinstance(x, str)).all()]
numeric_features = [col for col in X.columns if X[col].dropna().apply(lambda x: isinstance(x, (int, float))).all()]

#Creating preprocessing for categorical and numerical features
categorical_transformer = Pipeline(steps= [
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('oneHot', OneHotEncoder(handle_unknown='ignore'))])

numeric_transformer = Pipeline(steps= [
    ('imputer', SimpleImputer(strategy='mean'))])

#Combining preprocessor in ColumnTransformer
preprocessor = ColumnTransformer([
    ('cat', categorical_transformer, category_features),
    ('num', numeric_transformer, numeric_features)])

#Creating a Pipeline for Preprocessing and Model
model = Pipeline(steps= [
    ('preprocessor', preprocessor),
    ('model', RandomForestClassifier())])

#Splitting trianing and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
def model_evaluation(y_test, y_pred):
    print(f'AccuracyScore: {accuracy_score(y_test, y_pred)*100:.3f}%')
    print(f'PrecisionScore: {precision_score(y_test, y_pred):.3f}')
    print(f'RecallScore: {recall_score(y_test, y_pred):.3f}')
    print(f'F1-Score: {f1_score(y_test, y_pred):.3f}')
model_evaluation(y_test, y_pred)

AccuracyScore: 87.500%
PrecisionScore: 0.912
RecallScore: 0.869
F1-Score: 0.890


**Hyperparameter Tunning Using RandomSearchCV**

In [53]:
np.random.seed(42)
param_grids = {
    'model__n_estimators': randint(10, 1500),
    'model__max_depth': randint(2, 30),
    'model__max_features': ['sqrt', 'log2'],
    'model__bootstrap': [True, False],
    'model__min_samples_leaf': randint(2, 22),
    'model__min_samples_split': randint(3, 26),
    'preprocessor__num__imputer__strategy': ['mean', 'median']
}
random_clf = RandomizedSearchCV(estimator=model, param_distributions=param_grids, n_iter=22, cv=5, verbose=0, n_jobs=-1)
random_clf.fit(X_train, y_train)

In [54]:
best_param = random_clf.best_params_
y_random_pred = random_clf.predict(X_test)
best_param

{'model__bootstrap': False,
 'model__max_depth': 28,
 'model__max_features': 'sqrt',
 'model__min_samples_leaf': 2,
 'model__min_samples_split': 5,
 'model__n_estimators': 622,
 'preprocessor__num__imputer__strategy': 'mean'}

In [55]:
model_evaluation(y_test, y_random_pred)

AccuracyScore: 88.043%
PrecisionScore: 0.897
RecallScore: 0.897
F1-Score: 0.897


**Hyperparameter Tunning Using GridSearchCV**

In [57]:
param_grid = {
    'model__n_estimators': [356, 522, 789],
    'model__max_depth': [10, 12, 17],
    'model__max_features': ['sqrt', 'log2'],
    'model__bootstrap': [True, False],
    'model__min_samples_leaf': [3, 7],
    'model__min_samples_split': [11, 14],
    'preprocessor__num__imputer__strategy': ['mean', 'median']
}
grid_clf = GridSearchCV(estimator=model, param_grid=param_grid, scoring=None, n_jobs=-1, verbose=0)
grid_clf.fit(X_train, y_train)

  _data = np.array(data, dtype=dtype, copy=copy,


In [60]:
best_params = grid_clf.best_params_

In [61]:
y_grid_pred = grid_clf.predict(X_test)

In [62]:
model_evaluation(y_test, y_grid_pred)

AccuracyScore: 88.043%
PrecisionScore: 0.897
RecallScore: 0.897
F1-Score: 0.897
