In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [2]:
df = pd.read_csv('../Data/Raw/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [3]:
df['cp'].unique()

array([0, 1, 2, 3], dtype=int64)

In [4]:
df['fbs'].unique()

array([0, 1], dtype=int64)

In [5]:
df['slope'].unique()

array([2, 0, 1], dtype=int64)

In [6]:
df['thal'].unique()

array([3, 2, 1, 0], dtype=int64)

In [7]:
df.isna().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [8]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [9]:
X=df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal']]
y=df['target']

In [10]:
X_train,X_test , y_train, y_test = train_test_split(X, y, test_size=0.3 ,random_state= 228)

In [11]:
rand_forest= RandomForestClassifier()
rf_grid={
    
'n_estimators':[10,30,50,60,70,90],
'max_depth': [100,500,1000,2000,5000,10000]
}
RF_grid=GridSearchCV(rand_forest,rf_grid,cv=10)
RF_grid.fit(X,y)

In [12]:
RF_grid.best_params_

{'max_depth': 500, 'n_estimators': 10}

In [13]:
random_forest=RandomForestClassifier(max_depth=100, n_estimators=10)
random_forest.fit(X_train.values,y_train.values)
random_forest.score(X_test,y_test)



0.9837662337662337

In [14]:
random_forest.predict

<bound method ForestClassifier.predict of RandomForestClassifier(max_depth=100, n_estimators=10)>

In [15]:
import pickle
filename = '../Models/heart_model.pkl'
pickle.dump(random_forest, open(filename, 'wb'))

In [16]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
y_pred = random_forest.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 score:', f1)

Accuracy: 0.9837662337662337
Precision: 1.0
Recall: 0.9659863945578231
F1 score: 0.9826989619377162


