In [24]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

## Setup data

In [25]:
fires = pd.read_csv("./forestfires.csv")

FEATURE_NAMES = [
    "DC",
    "temp",
    "RH",
    "wind",
]

X, y = fires[FEATURE_NAMES], np.log(fires["area"]+1)

In [26]:
CustomScorer = metrics.make_scorer(
    lambda y, y_pred: -np.sqrt(metrics.mean_squared_error(y, y_pred)), greater_is_better=False
)

## Gamma Parameter Tuning

In [27]:
min_samples_split_range = [2, 3, 4, 5, 6, 7, 8, 9, 10]
max_depth_range = [1, 2, 4, 5, 10, 15, 20, 25]
n_estimators_range = [10, 20, 50, 100, 150, 200, 500]

In [28]:
grid_search = GridSearchCV(
    RandomForestRegressor(),
    param_grid={"min_samples_split": min_samples_split_range,
                "max_depth": max_depth_range,
                "n_estimators": n_estimators_range},
    cv=10,
    n_jobs=1,
    scoring=CustomScorer,
)

In [29]:
grid_search.fit(X, y)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=1,
       param_grid={'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10], 'max_depth': [1, 2, 4, 5, 10, 15, 20, 25], 'n_estimators': [10, 20, 50, 100, 150, 200, 500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=make_scorer(<lambda>, greater_is_better=False), verbose=0)

In [30]:
grid_search.best_estimator_

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=25,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=4,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [31]:
grid_search.best_score_

1.6207422471236623

In [32]:
grid_search.best_params_

{'max_depth': 25, 'min_samples_split': 4, 'n_estimators': 10}

In [33]:
grid_search.cv_results_



{'mean_fit_time': array([0.00403252, 0.00710762, 0.01647723, 0.03213117, 0.04813576,
        0.06549904, 0.15712316, 0.00399508, 0.00713642, 0.0164923 ,
        0.03212171, 0.04780881, 0.06328223, 0.15713854, 0.00398283,
        0.00711217, 0.016506  , 0.03208857, 0.04774134, 0.06333001,
        0.15911777, 0.0039757 , 0.00711911, 0.01651099, 0.03208933,
        0.04780815, 0.0633893 , 0.15698023, 0.00398643, 0.00710893,
        0.0165112 , 0.03211899, 0.0478183 , 0.06353128, 0.15763979,
        0.0040261 , 0.00712132, 0.01650047, 0.03217816, 0.04777281,
        0.06354251, 0.15722613, 0.00401769, 0.00713968, 0.01667142,
        0.03226748, 0.04796245, 0.06393661, 0.15786824, 0.00401492,
        0.00713658, 0.01651943, 0.03224642, 0.04785397, 0.06353347,
        0.15722923, 0.00399947, 0.00715098, 0.01655526, 0.03219023,
        0.047948  , 0.06336653, 0.15742383, 0.00445509, 0.00808711,
        0.0188355 , 0.0368305 , 0.05492227, 0.07282476, 0.18095264,
        0.00445554, 0.00807757,