In [1]:
%autosave 1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

heart_disease = pd.read_csv("resources/heart-disease.csv")
heart_disease.head()

Autosaving every 1 seconds


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Using the `scoring` parameter

In [2]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

np.random.seed(42)

X = heart_disease.drop("target", axis=1)
y = heart_disease["target"]

clf = RandomForestClassifier(n_estimators=100)

In [3]:
np.random.seed(42)

# Cross-validation accuracy. Since it's none for classification models accuracy is used.
cv_acc = cross_val_score(clf, X, y, cv=5, scoring=None)
cv_acc

array([0.81967213, 0.90163934, 0.83606557, 0.78333333, 0.78333333])

In [4]:
print(f'The cross-validated accuracy is: {np.mean(cv_acc) * 100:.2f}%')

The cross-validated accuracy is: 82.48%


In [5]:
np.random.seed(42)

cv_acc = cross_val_score(clf, X, y, cv=5, scoring="accuracy")
cv_acc

array([0.81967213, 0.90163934, 0.83606557, 0.78333333, 0.78333333])

In [6]:
print(f'The cross-validated accuracy is: {np.mean(cv_acc) * 100:.2f}%')

The cross-validated accuracy is: 82.48%


In [7]:
np.random.seed(42)

cv_precision = cross_val_score(clf, X, y, cv=5, scoring="precision")
cv_precision

array([0.82352941, 0.93548387, 0.84848485, 0.79411765, 0.76315789])

In [8]:
print(f'The cross-validated precision is: {np.mean(cv_precision) * 100:.2f}%')

The cross-validated precision is: 83.30%


In [9]:
np.random.seed(42)

cv_recall = cross_val_score(clf, X, y, cv=5, scoring="recall")
cv_recall

array([0.84848485, 0.87878788, 0.84848485, 0.81818182, 0.87878788])

In [10]:
print(f'The cross-validated recall is: {np.mean(cv_recall) * 100:.2f}%')

The cross-validated recall is: 85.45%


### Regression Models

In [11]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing 

np.random.seed(42)

housing = fetch_california_housing()
housing_df = pd.DataFrame(housing["data"], columns=housing["feature_names"])

X = housing_df
y = housing["target"]

clf = RandomForestRegressor()

In [12]:
np.random.seed(42)

cv_r2 = cross_val_score(clf, X, y, cv=3, scoring=None)
cv_r2

array([0.62156985, 0.72075819, 0.62130937])

In [13]:
print(f'The cross-validated r^2 is: {np.mean(cv_r2) * 100:.2f}%')

The cross-validated r^2 is: 65.45%


In [14]:
np.random.seed(42)

cv_mae = cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error")
cv_mae

array([-0.51017222, -0.33368897, -0.5403795 ])

In [15]:
print(f'The cross-validated MAE is: {np.mean(cv_mae) * 100:.2f}%')

The cross-validated MAE is: -46.14%


In [16]:
np.random.seed(42)

cv_mse = cross_val_score(clf, X, y, cv=3, scoring="neg_mean_absolute_error")
cv_mse

array([-0.51754565, -0.42678172, -0.50640477])

In [17]:
print(f'The cross-validated MSE is: {np.mean(cv_mse) * 100:.2f}%')

The cross-validated MSE is: -48.36%
