In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

## Setup data

In [2]:
fires = pd.read_csv("./forestfires.csv")

FEATURE_NAMES = [
    "DC",
    "temp",
    "RH",
    "wind",
]

X, y = fires[FEATURE_NAMES], np.log(fires["area"]+1)

In [3]:
CustomScorer = metrics.make_scorer(
    lambda y, y_pred: np.sqrt(metrics.mean_squared_error(y, y_pred)), greater_is_better=False
)

## Gamma Parameter Tuning

In [4]:
min_samples_split_range = [2, 3, 4, 5, 6, 7, 8, 9, 10]
max_depth_range = [1, 2, 4, 5, 10, 15, 20, 25]
n_estimators_range = [10, 20, 50, 100, 150, 200, 500]

In [5]:
grid_search = GridSearchCV(
    RandomForestRegressor(),
    param_grid={"min_samples_split": min_samples_split_range,
                "max_depth": max_depth_range,
                "n_estimators": n_estimators_range},
    cv=10,
    n_jobs=-1,
    scoring="neg_mean_absolute_error",
)

In [6]:
grid_search.fit(X, y)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10], 'max_depth': [1, 2, 4, 5, 10, 15, 20, 25], 'n_estimators': [10, 20, 50, 100, 150, 200, 500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_absolute_error', verbose=0)

In [7]:
grid_search.best_estimator_

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=1,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [8]:
grid_search.best_score_

-1.2023540259431234

In [9]:
grid_search.best_params_

{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 10}

In [10]:
grid_search.cv_results_



{'mean_fit_time': array([0.11997349, 0.11325822, 0.12440317, 0.1503252 , 0.18801982,
        0.21575167, 0.42991853, 0.14116151, 0.11642578, 0.12165899,
        0.15296421, 0.18270905, 0.21495047, 0.40539773, 0.1298645 ,
        0.11734149, 0.12017028, 0.14738836, 0.17487674, 0.20921221,
        0.43972161, 0.13626025, 0.11215501, 0.12211609, 0.1587378 ,
        0.19074671, 0.24939427, 0.4283812 , 0.15232954, 0.11273859,
        0.13733914, 0.15365798, 0.31836257, 0.59129639, 0.78966191,
        0.25946984, 0.26438568, 0.30855625, 0.33126864, 0.39476154,
        0.45915432, 0.71464713, 0.27258987, 0.27959101, 0.28427198,
        0.32473078, 0.39146383, 0.49093778, 0.78920741, 0.26587741,
        0.27452393, 0.26709127, 0.33917069, 0.39047682, 0.4500962 ,
        1.10730939, 0.29853914, 0.30340667, 0.32604797, 0.38706548,
        0.46768653, 0.53238668, 0.93429642, 0.30355496, 0.30503657,
        0.32801647, 0.38715575, 0.45374949, 0.54111261, 0.97834184,
        0.28395941, 0.30196602,

In [25]:
y_raw = fires["area"]

In [26]:
y_pred = grid_search.predict(X)

a = np.argmin(y_pred)
b = np.argmax(y_pred)

In [31]:
np.exp(y_pred[a])

2.6690294402535693

In [32]:
np.exp(y_pred[b])

4.806777542267727

In [33]:
y_raw[a]

0.0

In [34]:
y_raw[b]

26.43