In [27]:
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt 
import seaborn as sns 
import missingno as msno 
import os  

# Splitting Data
from sklearn.model_selection import train_test_split, GridSearchCV

## models
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score

## Pkl file
import joblib

In [4]:
File_Path = os.path.join(os.getcwd(), 'heart_disease_uci_encoding.csv')
df = pd.read_csv(File_Path)
df.head()

Unnamed: 0,age,gender,dataset,chest_pain_type,resting_blood_pressure,fasting_blood_sugar,maximum_heart_rate_achieved,exercise_induced_angina,oldpeak,num,age_cholesterol_ratio,bp_cholesterol_ratio,heart_rate_stress,risk_index
0,63,1,0,3,145.0,1,150.0,0,2.3,0,0.269231,0.619658,2.34375,228.0
1,67,1,0,0,160.0,0,108.0,1,1.5,1,0.233449,0.557491,1.588235,338.0
2,67,1,0,0,120.0,0,129.0,1,2.6,1,0.291304,0.521739,1.897059,220.0
3,37,1,0,2,130.0,0,187.0,0,3.5,0,0.14741,0.517928,4.921053,193.0
4,41,0,0,1,130.0,0,172.0,0,1.4,0,0.2,0.634146,4.095238,162.0


### Splitting Data

In [7]:
X = df.drop(columns = ['num'])
y = df['num']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = True, random_state = 42)

In [11]:
print(f'shape of X train --> {X_train.shape}')
print(f'shape of X test --> {X_test.shape}')
print(f'shape of y train --> {y_train.shape}')
print(f'shape of y test --> {y_test.shape}')

shape of X train --> (734, 13)
shape of X test --> (184, 13)
shape of y train --> (734,)
shape of y test --> (184,)


### hyperparameter_tuning Random Forest

In [19]:
# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the RandomForestClassifier
rand_forest = RandomForestClassifier(random_state=42)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=rand_forest, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


In [20]:
# Get the best model
best_rand_forest = grid_search.best_estimator_

In [21]:
best_rand_forest

In [22]:
# Predict on test data with the best model
y_pred_rand_forest = best_rand_forest.predict(X_test)

In [23]:
accuracy_rand_forest = accuracy_score(y_test, y_pred_rand_forest)
accuracy_percent_rand_forest = accuracy_rand_forest * 100

print(f"Accuracy rand_forest: {accuracy_percent_rand_forest:.2f}%")
print(f"Best parameters: {grid_search.best_params_}")

Accuracy rand_forest: 81.52%
Best parameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}


## hyperparameter_tuning adaBoost

In [34]:
# Define the base estimator
base_estimator = DecisionTreeClassifier(max_depth=1, random_state=42)

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0],
    'estimator__max_depth': [1, 2, 3]
}

# Initialize the AdaBoostClassifier
AdaBoost = AdaBoostClassifier(estimator=base_estimator, random_state=42)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=AdaBoost, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [35]:
# Get the best model
best_AdaBoost = grid_search.best_estimator_

In [36]:
best_AdaBoost

In [37]:
# Predict on test data with the best model
y_pred_AdaBoost = best_AdaBoost.predict(X_test)

In [38]:
# Calculate accuracy
accuracy_AdaBoost = accuracy_score(y_test, y_pred_AdaBoost)
accuracy_percent_AdaBoost = accuracy_AdaBoost * 100

print(f"Accuracy AdaBoost: {accuracy_percent_AdaBoost:.2f}%")
print(f"Best parameters: {grid_search.best_params_}")

Accuracy AdaBoost: 80.98%
Best parameters: {'estimator__max_depth': 2, 'learning_rate': 0.01, 'n_estimators': 200}


In [44]:
# Save Random Forest model
joblib.dump(rand_forest, "rand_forest_model.pkl")

# Save AdaBoost model
joblib.dump(AdaBoost, "adaboost_model.pkl")

['adaboost_model.pkl']