In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
heart_disease=pd.read_csv("data/heart-disease.csv")

X=heart_disease.drop('target',axis=1)
y=heart_disease["target"]

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier


np.random.seed(50)

clf=RandomForestClassifier()

cv_acc=cross_val_score(clf,X,y,cv=5)
cv_acc

array([0.83606557, 0.8852459 , 0.81967213, 0.85      , 0.75      ])

In [3]:
# Cross validated accuracy
print(f"The cross validation accuracy is: {np.mean(cv_acc)*100:.2f} %")

The cross validation accuracy is: 82.82 %


In [6]:
# Precision
np.random.seed(50)
cv_pre=cross_val_score(clf,X,y,cv=5,scoring='precision')
print(f"The precision is: {np.mean(cv_pre)*100:.2f} %")

The precision is: 83.01 %


In [5]:
# Recall
np.random.seed(50)
cv_recall=cross_val_score(clf,X,y,cv=5,scoring='recall')
print(f"The recall is: {np.mean(cv_recall)*100:.2f} %")

The recall is: 86.67 %


In [9]:
# F1 score
np.random.seed(50)
cv_f1=cross_val_score(clf,X,y,cv=5,scoring='f1')
print(f"The f1 score is: {np.mean(cv_f1):.2f} %")

The f1 score is: 0.85 %


### Regression model metrics using scoring paramater

In [15]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_boston

boston=load_boston()

X=pd.DataFrame(boston["data"],columns=boston["feature_names"])
y=boston["target"]

np.random.seed(42)
model=RandomForestRegressor();

R2_score=cross_val_score(model,X,y,scoring=None)
R2_score

array([0.77231143, 0.86035935, 0.74664002, 0.47632078, 0.26630379])

In [17]:
np.random.seed(42)
cross_val_score(model,X,y,scoring="neg_mean_absolute_error")

array([-2.11419608, -2.58716832, -3.33976238, -3.78563366, -3.32941584])

In [19]:
np.random.seed(42)
cross_val_score(model,X,y,scoring="neg_mean_squared_error")

array([ -7.86326927, -12.71243178, -20.29089194, -45.93287403,
       -19.50774726])

## Using evaluation metrics directly from scikit-learn function

### Classification 

In [68]:
from sklearn.metrics import accuracy_score,f1_score,recall_score,precision_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

heart_disease=pd.read_csv("data/heart-disease.csv")

X=heart_disease.drop('target',axis=1)
y=heart_disease["target"]

np.random.seed(69)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
clf=RandomForestClassifier().fit(X_train,y_train)

y_preds=clf.predict(X_test)

# Metrics
print("Classifier metrics:")
print(f"Accuracy: {accuracy_score(y_test,y_preds)*100:.3f}%")
print(f"Precision: {precision_score(y_test,y_preds):.3f}")
print(f"Recall: {recall_score(y_test,y_preds):.3f}")
print(f"F1 Score: {f1_score(y_test,y_preds):.3f}")

Classifier metrics:
Accuracy: 85.246%
Precision: 0.871
Recall: 0.844
F1 Score: 0.857


### Regression

In [70]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_boston

boston=load_boston()

np.random.seed(45)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

X=pd.DataFrame(boston["data"],columns=boston["feature_names"])
y=boston["target"]


model=RandomForestRegressor().fit(X_train,y_train);

y_preds=model.predict(X_test)

# Metrics
print("Regressor metrics:")
print(f"R2 score: {r2_score(y_test,y_preds):.3f}")
print(f"Mean absolute Error: {mean_absolute_error(y_test,y_preds):.3f}")
print(f"Mean Squared Error: {mean_squared_error(y_test,y_preds):.3f}")

Regressor metrics:
R2 score: 0.907
Mean absolute Error: 2.377
Mean Squared Error: 10.081
