# Cross-Validation and Out of Bag Error

Cross validation and out of bag error are techniques to essentially train your model on the entire dataset for model evaluation rather than using a single train/test split

In [5]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict

In [6]:
df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### Separate our Predictors from out Label Column

In [7]:
X = df.drop('Name', 1)
y = df['Name']

## Cross-Validation

In [9]:
# model
forest = RandomForestClassifier(n_estimators=100)

# use cross_val_score() to get ths scores for each train/test split
scores = cross_val_score(forest, X, y, cv = 10)
print(scores)
print()
print('Average score:', np.mean(scores))

# use cross_val_predict() to get predictions from each trani/test split if you want to see a confusion matrix
cv_predictions = cross_val_predict(forest, X, y, cv = 10)

# confusion matrix
pd.crosstab(y, cv_predictions, rownames=['Actual'], colnames = ['Predicted:'], margins = True)

[1.         0.93333333 1.         0.93333333 0.93333333 0.93333333
 0.86666667 1.         1.         1.        ]

Average score: 0.96


Predicted:,Iris-setosa,Iris-versicolor,Iris-virginica,All
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Iris-setosa,50,0,0,50
Iris-versicolor,0,47,3,50
Iris-virginica,0,3,47,50
All,50,50,50,150


## Out of Bag Error

**Warning:** Out of bag error can only be used with ensemble classifiers

In [10]:
# model -- the only change is to set the 'oob_score' argument to 'True'
oob_forest = RandomForestClassifier(oob_score = True, n_estimators = 100)

# train
oob_forest.fit(X, y)

# Out of bag score
oob_forest.oob_score_

0.96