In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import GridSearchCV
from sklearn import metrics

import xgboost as xgb

## Setup data

In [2]:
fires = pd.read_csv("./forestfires.csv")

FEATURE_NAMES = [
    "DC",
    "temp",
    "RH",
    "wind",
]

X, y = fires[FEATURE_NAMES], np.log(fires["area"]+1)

In [3]:
CustomScorer = metrics.make_scorer(
    lambda y, y_pred: np.sqrt(metrics.mean_squared_error(y, y_pred)), greater_is_better=False
)

## Gamma Parameter Tuning

In [4]:
learning_rate_range = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 2.0, 3.0, 5.0, 10.0]
max_depth_range = [1, 2, 4, 5, 10, 15, 20, 25]
n_estimators_range = [10, 20, 50, 100, 150, 200, 500]

In [5]:
grid_search = GridSearchCV(
    xgb.XGBRegressor(),
    param_grid={"learning_rate": learning_rate_range,
                "max_depth": max_depth_range,
                "n_estimators": n_estimators_range},
    cv=10,
    n_jobs=1,
    scoring="neg_mean_absolute_error",
)

In [8]:
grid_search.fit(X, y)

KeyboardInterrupt: 

GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1),
       fit_params=None, iid='warn', n_jobs=1,
       param_grid={'learning_rate': [1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1], 'max_depth': [1, 2, 4, 5, 10, 15, 20, 25], 'n_estimators': [10, 20, 50, 100, 150, 200, 500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_absolute_error', verbose=0)

In [70]:
grid_search.best_estimator_

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=1e-06, max_delta_step=0,
       max_depth=5, min_child_weight=1, missing=None, n_estimators=10,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [71]:
grid_search.best_score_

-1.098958515062574

In [72]:
grid_search.best_params_

{'learning_rate': 1e-06, 'max_depth': 5, 'n_estimators': 10}

In [73]:
grid_search.cv_results_



{'mean_fit_time': array([0.00147483, 0.00182149, 0.00292969, 0.00475845, 0.00654907,
        0.00840037, 0.01948376, 0.00163665, 0.00216699, 0.00383663,
        0.0066004 , 0.00938323, 0.01207702, 0.02880907, 0.00211241,
        0.00319686, 0.0064364 , 0.01176112, 0.01701694, 0.02230408,
        0.05422399, 0.00244315, 0.00384231, 0.00800335, 0.01493812,
        0.02195473, 0.02890706, 0.07047675, 0.00432918, 0.00763142,
        0.01757119, 0.0340116 , 0.05057852, 0.06710131, 0.16645107,
        0.00592382, 0.01081059, 0.02556813, 0.05004303, 0.07478821,
        0.09932196, 0.24776044, 0.00650826, 0.01198184, 0.02843435,
        0.05596809, 0.08347378, 0.1109426 , 0.27591693, 0.00663562,
        0.0122431 , 0.02910433, 0.05722723, 0.08527479, 0.11335111,
        0.28222024, 0.0014327 , 0.00177639, 0.00289495, 0.00474644,
        0.00671871, 0.00845411, 0.01945705, 0.00159936, 0.0021714 ,
        0.00382471, 0.00663269, 0.00940669, 0.01212924, 0.02887156,
        0.00212588, 0.00319564,