In [11]:
from sklearn.datasets import make_classification
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score
from xgboost import XGBRFClassifier

In [8]:
# get the dataset
def get_dataset():
    X, y = make_classification(
        n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7
    )
    return X, y

In [9]:
# get a list of models to evaluate
def get_models():
    models = dict()
    for v in arange(0.1, 1.1, 0.1):
        key = "%.1f" % v
        models[key] = XGBRFClassifier(
            n_estimators=100, subsample=0.9, colsample_bynode=v
        )
    return models

In [12]:
# evaluate a give model using cross-validation
def evaluate_model(model, X, y):
    # define the model evaluation procedure
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    # evaluate the model
    scores = cross_val_score(model, X, y, scoring="accuracy", cv=cv, n_jobs=-1)
    return scores

In [17]:
arange(0.1, 1.1, 0.1)

array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [15]:
from numpy import arange, mean, std

# define dataset
X, y = get_dataset()
# get the models to evaluate
models = get_models()
# evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
    # evaluate the model and collect the results
    scores = evaluate_model(model, X, y)
    # store the results
    results.append(scores)
    names.append(name)
    # summarize performance along the way
    print(">%s %.3f (%.3f)" % (name, mean(scores), std(scores)))

>0.1 0.889 (0.032)
>0.2 0.891 (0.036)
>0.3 0.887 (0.032)
>0.4 0.886 (0.030)
>0.5 0.878 (0.033)
>0.6 0.874 (0.031)
>0.7 0.869 (0.027)
>0.8 0.867 (0.027)
>0.9 0.856 (0.023)
>1.0 0.846 (0.027)


In [22]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
model = XGBRFClassifier()
score = cross_val_score(model, X, y, scoring="accuracy", cv=cv, n_jobs=-1)
mean(score)

0.8736666666666666

In [23]:
from xgboost import XGBClassifier

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
model = XGBClassifier()
score = cross_val_score(model, X, y, scoring="accuracy", cv=cv, n_jobs=-1)
mean(score)

0.9246666666666666