# 3. Fit the model/algorithm on our data and use it to make predictions

## 3.1 Fitting the model to the data

Different names for:
* `X` = features, feature variables, data
* `y` = labels, target, target variables

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

In [2]:
heart_disease = pd.read_csv("assets/csv/heart-disease.csv")
heart_disease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
from sklearn.ensemble import RandomForestClassifier

np.random.seed(42)

clf = RandomForestClassifier()

X = heart_disease.drop("target", axis=1)
y = heart_disease["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.8524590163934426

In [4]:
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2


In [5]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64

## 3.2 Make predictions using a machine learning model

In [14]:
clf.predict(X_test.iloc[0:62]), np.array(y_test.iloc[0:62])

(array([0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
        1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0]),
 array([0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0,
        0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0]))

In [15]:
clf.predict_proba(X_test.iloc[0:62]), np.array(y_test.iloc[0:62])

(array([[0.89, 0.11],
        [0.49, 0.51],
        [0.43, 0.57],
        [0.84, 0.16],
        [0.18, 0.82],
        [0.14, 0.86],
        [0.36, 0.64],
        [0.95, 0.05],
        [0.99, 0.01],
        [0.47, 0.53],
        [0.26, 0.74],
        [0.7 , 0.3 ],
        [0.11, 0.89],
        [0.95, 0.05],
        [0.03, 0.97],
        [0.02, 0.98],
        [0.01, 0.99],
        [0.84, 0.16],
        [0.95, 0.05],
        [0.98, 0.02],
        [0.51, 0.49],
        [0.89, 0.11],
        [0.38, 0.62],
        [0.29, 0.71],
        [0.26, 0.74],
        [0.34, 0.66],
        [0.2 , 0.8 ],
        [0.22, 0.78],
        [0.83, 0.17],
        [0.15, 0.85],
        [0.94, 0.06],
        [0.92, 0.08],
        [0.96, 0.04],
        [0.62, 0.38],
        [0.46, 0.54],
        [0.89, 0.11],
        [0.44, 0.56],
        [0.16, 0.84],
        [0.33, 0.67],
        [0.08, 0.92],
        [0.13, 0.87],
        [0.17, 0.83],
        [0.18, 0.82],
        [0.38, 0.62],
        [0.32, 0.68],
        [0