# Machine Learning with Python Cookbook
# Ch 11: Model Evaluation

## 11.1 Cross-Validating Models
Create a pipeline that preprocesses the data, trains the model, and eveluates it using cross-validation:

In [1]:
# Load libraries
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [2]:
# Load digits dataset
digits = datasets.load_digits()

In [3]:
# Create features matrix
features = digits.data

In [4]:
# Create target vector
target = digits.target

In [5]:
# Create standardizer
standardizer = StandardScaler()

In [6]:
# Create logistic regression object
logit = LogisticRegression()

In [7]:
# Create a pipeline that standardizes, then runs logistic regression
pipeline = make_pipeline(standardizer, logit)

In [8]:
# Create a k-Fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=1)

In [9]:
# Conduct k-fold cross-validation
cv_results = cross_val_score(pipeline, # Pipeline
                             features, # Feature matrix
                             target, # Target vector
                             cv=kf, # Performance metric
                             scoring="accuracy", # Loss function
                             n_jobs=-1) # Use all CPU cores

In [10]:
# Calculate mean
cv_results.mean()

0.9693916821849783

In [11]:
# View score for all 10 folds
cv_results

array([0.97777778, 0.98888889, 0.96111111, 0.94444444, 0.97777778,
       0.98333333, 0.95555556, 0.98882682, 0.97765363, 0.93854749])

When we `fit` our standardization object, `standardizer`, we calculate the mean and variance of only the training set. Then we apply that tranformation (using `transform`) to both the training and test sets:

In [12]:
# Import library
from sklearn.model_selection import train_test_split

In [13]:
# Create training and test sets
features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size=0.1, random_state=1)

In [14]:
# Fit standardizer to training set
standardizer.fit(features_train)

StandardScaler()

In [15]:
# Apply to both training and test sets
features_train_std = standardizer.transform(features_train)
features_test_std = standardizer.transform(features_test)

First, create a pipeline that preprocesses the data (e.g. `standardizer`) and then trains a model (logistic regression, `logit`):

In [16]:
# Create a pipeline
pipeline = make_pipeline(standardizer, logit)

Then run KFCV using that pipeline (and scikit does all the work for us):

In [17]:
# Do k-fold cross-validation
cv_results = cross_val_score(pipeline, # Pipeline
                             features, # Feature matrix
                             target, # Target vector
                             cv=kf, # Performance metric
                             scoring="accuracy", # Loss function
                             n_jobs=-1) # Use all CPU cores

## 11.2 Creating a Baseline Regression Model
Use sklearn's `DummyRegressor` to create a simple model to use as a baseline:

In [18]:
# Load libraries
from sklearn.datasets import load_boston
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import train_test_split

In [19]:
# Load data
boston = load_boston()

In [20]:
# Create features
features, target = boston.data, boston.target

In [21]:
# Make test and training split
features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state=0)

In [22]:
# Create a dummy regressor
dummy = DummyRegressor()

In [23]:
# "Train" dummy regressor
dummy.fit(features_train, target_train)

DummyRegressor()

In [24]:
# Get R-squared score
dummy.score(features_test, target_test)

-0.001119359203955339

To compare, train our model and evaluate the performance score:

In [25]:
# Load library
from sklearn.linear_model import LinearRegression

In [26]:
# Train simple linear regression model
ols = LinearRegression()
ols.fit(features_train, target_train)

LinearRegression()

In [27]:
# Get R-squared score
ols.score(features_test, target_test)

0.6354638433202118

In [28]:
# Create dummy regressor that predicts 20's for everything
clf = DummyRegressor(strategy='constant', constant=20)
clf.fit(features_train, target_train)

DummyRegressor(constant=array(20), strategy='constant')

In [29]:
# Evaluate score
clf.score(features_test, target_test)

-0.06510502029325727

## 11.3 Creating a Baseline Classification Model
Use sklearn's `DummyClassifier` to create a simple baseline classifier to compare against your model:

In [30]:
# Load libraries
from sklearn.datasets import load_iris
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split

In [31]:
# Load data
iris = load_iris()

In [32]:
# Create target vector and feature matrix
features, target = iris.data, iris.target

In [33]:
# Split into training and test set
features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state=0)

In [34]:
# Create dummy classifier
dummy = DummyClassifier(strategy='uniform', random_state=1)

In [35]:
# "Train" model
dummy.fit(features_train, target_train)

DummyClassifier(random_state=1, strategy='uniform')

In [36]:
# Get accuracy score
dummy.score(features_test, target_test)

0.42105263157894735

## 11.4 Evaluating Binary Classifier Predictions
Measure accuracy in three-fold (the default # of folds) cross-validation by setting `scoring=’accuracy’`:

### Accuracy

In [37]:
# Load libraries
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification

In [38]:
# Generate features matrix and target vector
X, y = make_classification(n_samples=10000, 
                           n_features=3, 
                           n_informative=3, 
                           n_redundant=0, 
                           n_classes=2, 
                           random_state=1)

In [39]:
# Create logistic regression
logit = LogisticRegression()

In [40]:
# Cross-validate model using accuracy
cross_val_score(logit, X, y, scoring='accuracy')

array([0.9555, 0.95  , 0.9585, 0.9555, 0.956 ])

In [41]:
cross_val_score(logit, X, y, scoring='accuracy').mean()

0.9550999999999998

### Precision

In [42]:
# Cross-validate model using precision
cross_val_score(logit, X, y, scoring='precision')

array([0.95963673, 0.94820717, 0.9635996 , 0.96149949, 0.96060606])

In [43]:
cross_val_score(logit, X, y, scoring='precision').mean()

0.9587098102922853

### Recall

In [44]:
# Cross-validate model using recall
cross_val_score(logit, X, y, scoring='recall')

array([0.951, 0.952, 0.953, 0.949, 0.951])

In [45]:
cross_val_score(logit, X, y, scoring='recall').mean()

0.9511999999999998

### F<sub>1</sub> Score
The F<sub>1</sub> score is a measure of correctness achieved in positive prediction (of observations labeled as positive, how many are actually positive):

In [46]:
# Cross-validate model using F1
cross_val_score(logit, X, y, scoring='f1')

array([0.95529884, 0.9500998 , 0.95827049, 0.95520886, 0.95577889])

In [47]:
cross_val_score(logit, X, y, scoring='f1').mean()

0.954931376985931

If we already have the true y values and the predicted y values, we can calculate metrics like accuracy and recall directly:

In [48]:
# Load library
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [49]:
# Create training and test split
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size=0.1, 
                                                    random_state=1)

In [50]:
# Predict values for training target vector
y_hat = logit.fit(X_train, y_train).predict(X_test)

In [51]:
# Calculate accuracy
accuracy_score(y_test, y_hat)

0.947