# Chapter: 4
## Section: Model validation strategy

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
# importing different cross-validation functions
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

modle_random_state = 42
X, y = load_breast_cancer(return_X_y=True)
rf_init = RF(random_state=modle_random_state)
# validating using hold-out validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=10)
rf_fit = rf_init.fit(X_train, y_train)
print("ROC-AUC of RF using hold-out validation: {}".format(roc_auc_score(y_true = y_test, y_score = rf_fit.predict_proba(X_test)[:, 1])))
# validating using k-fold (k=5) cross-validation
kfold_cv = KFold(n_splits = 5, shuffle=True, random_state=10)
scores_kfold_cv = cross_val_score(rf_init, X, y, cv = kfold_cv, scoring = "roc_auc")
print("Average ROC-AUC of RF using kfold(k=5) cross-validation: {}".format(scores_kfold_cv.mean()))
# validating using stratified k-fold (k=5) cross-validation
stratified_kfold_cv = StratifiedKFold(n_splits = 5, shuffle=True, random_state=10)
scores_strat_kfold_cv = cross_val_score(rf_init, X, y, cv = stratified_kfold_cv, scoring = "roc_auc")
print("Average ROC-AUC of RF using kfold(k=5) cross-validation: {}".format(scores_strat_kfold_cv.mean()))

ROC-AUC of RF using hold-out validation: 0.9968220338983051
Average ROC-AUC of RF using kfold(k=5) cross-validation: 0.9862334952898
Average ROC-AUC of RF using kfold(k=5) cross-validation: 0.9891855114320721
