# *k*-Fold Cross Validation

In [None]:
from sklearn.model_selection import KFold

# x, y: array

dt_clf = DecisionTreeClassifier(random_state=12345)

# 5-fold CV
kfold = KFold(n_splits=5, shuffle=False)
cv_index = kfold.split(X)

cv_accuracy = []
ind = 1

for train_index, test_index in cv_index:
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    dt_clf.fit(X_train , y_train)
    accuracy = dt_clf.score(X_test, y_test)

    print(f'{ind}번째 Cross Validation 정확도: {accuracy:.2%}')
    cv_accuracy.append(accuracy)

    ind += 1

print(f'''-------------------------------------------
Cross Validation 정확도 평균: {np.mean(cv_accuracy):.2%}''')

# Stratified *k*-Fold Cross Validation

In [None]:
from sklearn.model_selection import StratifiedKFold

dt_clf = DecisionTreeClassifier(random_state=12345)
skfold = StratifiedKFold(n_splits=3)
ind=1
cv_accuracy=[]

# Stratified 5-fold CV
skfold = StratifiedKFold(n_splits=5, shuffle=False)
cv_index = skfold.split(X, y)

cv_accuracy = []
ind = 1

for train_index, test_index in cv_index:
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    dt_clf.fit(X_train , y_train)
    accuracy = dt_clf.score(X_test, y_test)

    print(f'{ind}번째 Cross Validation 정확도: {accuracy:.2%}')
    cv_accuracy.append(accuracy)

    ind += 1

print(f'''-------------------------------------------
Cross Validation 정확도 평균: {np.mean(cv_accuracy):.2%}''')

# GridSearch Cross Validation

In [None]:
from sklearn.model_selection import GridSearchCV

# hyperparameter
parameters = {'max_depth': range(1,5), 'min_samples_split': range(2,5)}

# GridSearch Cross Validation
grid_dt = GridSearchCV(dt_clf, param_grid=parameters, cv=3, refit=True, return_train_score=True)
grid_dt.fit(X_train, y_train)

# GridSearchCV result
scores_df = pd.DataFrame(grid_dt.cv_results_)
scores_df[['params', 'mean_test_score', 'rank_test_score',
           'split0_test_score', 'split1_test_score', 'split2_test_score']]

In [None]:
# best params
print(f'best params:', grid_dt.best_params_)
print(f'best score {grid_dt.best_score_:.2%}')

# RandomSearch Cross Validation

In [None]:
from sklearn.model_selection import RandomizedSearchCV

random_dt = RandomizedSearchCV(estimator=dt_clf, param_distributions=parameters, n_iter=10,
                               cv=3, random_state=12345, n_jobs=-1)
random_dt.fit(X_train,y_train)

# RandomizedSearchCV result
scores_df = pd.DataFrame(random_dt.cv_results_)
scores_df[['params', 'mean_test_score', 'rank_test_score',
           'split0_test_score', 'split1_test_score', 'split2_test_score']]

In [None]:
# best params
print(f'best params:', random_dt.best_params_)
print(f'best score {random_dt.best_score_:.2%}')