# K fold

In [2]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import numpy as np

In [3]:
iris = load_iris()
features = iris.data
label = iris.target

In [5]:
dt_clf = DecisionTreeClassifier(random_state=156)

In [7]:
# set fold size 5
kfold = KFold(n_splits=5)
# accuracy per fold
cv_acc = []

In [8]:
features.shape

(150, 4)

In [9]:
for train_idx, test_idx in kfold.split(features):
    X_train, X_test = features[train_idx], features[test_idx]
    y_train, y_test = label[train_idx], label[test_idx]
    
    dt_clf.fit(X_train, y_train)
    pred = dt_clf.predict(X_test)
    acc = accuracy_score(y_test, pred)
    cv_acc.append(np.round(acc, 4))

In [10]:
cv_acc

[1.0, 0.9667, 0.8667, 0.9333, 0.7333]

In [11]:
print('average accuracy: ', np.mean(cv_acc))

average accuracy:  0.9


# Stratified K fold

In [12]:
from sklearn.model_selection import StratifiedKFold

In [13]:
skfold = StratifiedKFold(n_splits=3)
# accuracy per fold
cv_acc = []

for train_idx, test_idx in skfold.split(features, label):
    X_train, X_test = features[train_idx], features[test_idx]
    y_train, y_test = label[train_idx], label[test_idx]
    
    dt_clf.fit(X_train, y_train)
    pred = dt_clf.predict(X_test)
    acc = accuracy_score(y_test, pred)
    cv_acc.append(np.round(acc, 4))
    
print('average accuracy: ', np.mean(cv_acc))    

average accuracy:  0.9666666666666667


# cross_val_score()

In [14]:
from sklearn.model_selection import cross_val_score, cross_validate

In [16]:
scores = cross_val_score(dt_clf, features, label, scoring='accuracy', cv=3)
print('Accuracy per set: ', scores)
print('Average accuracy: ', np.round(np.mean(scores),4))

Accuracy per set:  [0.98 0.94 0.98]
Average accuracy:  0.9667
