In [1]:
# rf_gp_minimize.py
import numpy as np
import pandas as pd
import xgboost as xgb

from functools import partial

from sklearn import metrics
from sklearn import model_selection


from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope

In [2]:
def optimize(params, x, y):
    # initialize model with current parameters
    model = xgb.XGBClassifier(**params)
    # initialize stratified k-fold
    kf = model_selection.StratifiedKFold(n_splits=10)
    # initialize accuracy list
    accuracies = []
    # loop over all folds
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest  = x[test_idx]
        ytest  = y[test_idx]
        # fit model for current fold
        model.fit(xtrain, ytrain)
        #create predictions
        preds = model.predict(xtest)
        # calculate and append accuracy
        fold_accuracy = metrics.accuracy_score(ytest, preds)
        accuracies.append(fold_accuracy)

    # return negative accuracy
    return -1 * np.mean(accuracies)

In [3]:
# read the training data
df = pd.read_csv("../input/train_10folds.csv")
# here we have training features
X = df.drop(["target", "kfold"], axis=1).values
# and the targets
y = df.target.values

# define a parameter space
param_space = { "eta": hp.uniform("eta", 0.01, 1),
                "gamma": hp.uniform("gamma", 0, 1),
                "max_depth": scope.int(hp.quniform("max_depth", 3, 25, 1)),
                "min_child_weight":  scope.int(hp.quniform("min_child_weight", 1, 10, 1)),
                "subsample": hp.uniform("subsample", 0.5, 1),
                "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1),
                "lambda": hp.uniform("lambda", 0, 1),
                "alpha": hp.uniform("alpha", 0, 1),
                "booster": hp.choice("booster", ["gbtree", "gblinear", "dart"])
                }


# partial function
optimization_function = partial(optimize, x=X, y=y)

# initialize trials to keep logging information
trials = Trials()

# run hyperopt
hopt = fmin(fn=optimization_function,
            space=param_space,
            algo=tpe.suggest,
            max_evals=30,
            trials=trials
            )
print(hopt)

  0%|          | 0/30 [07:53<?, ?trial/s, best loss=?]



KeyboardInterrupt

