Import libraries

In [1]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


Import datasets

In [2]:
dataset = pd.read_csv('heart_new.csv') 
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

Splitting into training and test sets

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, stratify=y)

Training the model

In [4]:
from sklearn.model_selection import GridSearchCV
n_trees = list(range(50,100,10))
depth = list(range(4,7))
split = list(range(2,4))

param_grid = {'max_depth':depth,'min_samples_split':split,'n_estimators':n_trees}
rf_grid = RandomForestClassifier(n_jobs=-1)
gird_search_rf = GridSearchCV(rf_grid,param_grid,cv=5,verbose=1)
gird_search_rf.fit(X_train,y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


GridSearchCV(cv=5, estimator=RandomForestClassifier(n_jobs=-1),
             param_grid={'max_depth': [4, 5, 6], 'min_samples_split': [2, 3],
                         'n_estimators': [50, 60, 70, 80, 90]},
             verbose=1)

Make predictions

In [None]:
print("Best score: ",gird_search_rf.best_score_)
print("Best parameters: ",gird_search_rf.best_params_)

Best score:  0.8750026446630699
Best parameters:  {'max_depth': 5, 'min_samples_split': 3, 'n_estimators': 90}


In [None]:
rf_best = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=5, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, 
            min_samples_split=5, n_estimators=90, n_jobs=-1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
rf_best.fit(X_train, y_train)
print("Training set score : ",rf_best.score(X_train,y_train))
print("Test set score : ",rf_best.score(X_test,y_test))
y_pred = rf_best.predict(X_test)


Training set score :  0.9113372093023255
Test set score :  0.8782608695652174
Y pred:  [0 0 1 1 1 0 1 1 1 1 0 0 1 1 1 0 1 1 1 0 1 1 0 1 1 0 0 0 1 1 1 0 0 1 1 0 0
 1 1 1 0 0 0 1 0 1 0 1 0 1 1 1 0 1 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 1 1 0 1 1
 1 0 1 1 0 1 0 0 1 0 1 0 1 0 1 1 0 0 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 0
 1 1 1 0 0 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 0 0 0 0 0 1 1 0 1 0
 0 0 1 1 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1 1 1 0 1 1 0 1 1 1 1 0 1 0 0 0 1 1 1
 1 1 1 1 0 1 1 0 0 0 1 0 0 1 0 0 1 0 1 1 0 1 1 0 1 1 0 0 0 0 0 0 1 1 1 0 0
 0 1 1 1 0 0 0 0]


Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test, y_pred))  #86.08% / 87.82% accurate

[[ 89  14]
 [ 14 113]]
0.8782608695652174
