In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

### <div class='alert alert-info'>getting the data ready

In [2]:
df=pd.read_csv("heart-disease.csv")
df.head(2)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1


In [3]:
x=df.drop('target', axis=1)
y=df['target']

x.head(2)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2


In [4]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64

### <div class='alert alert-info'> Selecting the model </div>

In [5]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()

### <div class='alert alert-info'>training and testing the data</div>

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=.1)

### <div class='alert alert-info'> Predicting the data</div>

- ### <font color='purple'> Finding the pattern within the data

In [7]:
model.fit(x_train, y_train);

- ### <font color='purple'>Making the predicting</font>

In [8]:
data_predict = model.predict(x_test)

In [9]:
data_predict

array([1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 0], dtype=int64)

### <div class='alert alert-info'>Evaluate the model</div>

In [10]:
model.score(x_train, y_train)

1.0

In [11]:
model.score(x_test, y_test)

0.8709677419354839

- ### <font color='purple'> Selecting the model for the evaluation </font>

In [12]:
from sklearn.metrics import accuracy_score, classification_report, precision_score

In [13]:
accuracy_score(y_test, data_predict)

0.8709677419354839

In [14]:
precision_score(y_test, data_predict)

0.9411764705882353

In [15]:
classification_report(y_test, data_predict)

'              precision    recall  f1-score   support\n\n           0       0.79      0.92      0.85        12\n           1       0.94      0.84      0.89        19\n\n    accuracy                           0.87        31\n   macro avg       0.86      0.88      0.87        31\nweighted avg       0.88      0.87      0.87        31\n'

### <div class='alert alert-info'>Improving the model</div>

In [16]:
np.random.seed(100)

for i in range(10, 100, 10):
    print(f'The testing is done one {i} estimators')
    model=RandomForestClassifier(n_estimators=i).fit(x_train, y_train)
    print(f'Model accuracy on test data set{model.score(x_test, y_test)}')

The testing is done one 10 estimators
Model accuracy on test data set0.8709677419354839
The testing is done one 20 estimators
Model accuracy on test data set0.8387096774193549
The testing is done one 30 estimators
Model accuracy on test data set0.9032258064516129
The testing is done one 40 estimators
Model accuracy on test data set0.9032258064516129
The testing is done one 50 estimators
Model accuracy on test data set0.9032258064516129
The testing is done one 60 estimators
Model accuracy on test data set0.8387096774193549
The testing is done one 70 estimators
Model accuracy on test data set0.8709677419354839
The testing is done one 80 estimators
Model accuracy on test data set0.9354838709677419
The testing is done one 90 estimators
Model accuracy on test data set0.9032258064516129


- #### <font color='purple'>The highest accuracy was when the n_estimators value was 60

### <div class='alert alert-info'>Saving a model then load </div>

- ### <font color='purple'>Saving into pickle format </font>

In [17]:
import pickle

pickle.dump(model, open("Basic_model_prediction_1.pkl", 'wb'))

- ### <font color='purple'>Loading the data</font>

In [18]:
loaded_model = pickle.load(open("Basic_model_prediction_1.pkl", 'rb'))
loaded_model.score(x_test, y_test)

0.9032258064516129