# GridSearch

**Crea un 'iterador' de parámetros para el ajuste de modelos, permitiendo acceder a los valores más  óptimos**

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import numpy as np
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.25, random_state=33)

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

reg_tree = DecisionTreeRegressor()
param_grid = {
        'max_depth' : [2,4,6],
        'max_features' : [2, 4, 6, 8]
}

**sklearn.model_selection.GridSearchCV**

- **estimator:** This is assumed to implement the scikit-learn estimator interface. (model)
- **param_grid:** Dictionary with parameters names (string) as keys and lists of parameter settings to try as values
- **scoring:** Evaluate the predictions on the test set
- **cv:** Cross validation splitting strategy (default 3-fold cross validation)
- **return_train_score:** If _false_ the **cv_results_** attribute will not include training scores

In [6]:
grid_search = GridSearchCV(reg_tree, param_grid, scoring='neg_mean_squared_error', cv=5, return_train_score=True)

In [28]:
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': [2, 4, 6], 'max_features': [2, 4, 6, 8]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='neg_mean_squared_error', verbose=0)

## Propiedades del GridSearch

**GridSearchCV.best_estimator_:** estimator that was choosen by the search, i.e. estimator which gave the highest score (or smallest loss if specified) on left out data.

It returns the model with the best parameters for the prediction

In [8]:
grid_search.best_estimator_

DecisionTreeRegressor(criterion='mse', max_depth=6, max_features=8,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

**GridSearchCV.best_score_:** Mean cross-validated score of the **best_estimator**

In [9]:
grid_search.best_score_

-22.022719399037612

**GridSearch.best_params_:** Dictionary with the parameter setting that gave the best results on the hold data.

In [10]:
grid_search.best_params_

{'max_depth': 6, 'max_features': 8}

**grid_search.cv_results_['mean_train_score']:** returns all the mean values for the score in each combination of parameters

In [11]:
grid_search.cv_results_['mean_train_score']

array([-53.29220827, -39.36366908, -29.96967777, -25.27489449,
       -29.60333848, -17.64033311, -11.60024103, -12.45769363,
        -9.35656927,  -6.39640804,  -5.77371195,  -4.5890441 ])

**Now for example we can calculate the RMSE for the train set**

the negative sign is because we define the scoring parameter as **neg_mean_squared_error**

In [12]:
np.sqrt(-grid_search.cv_results_['mean_train_score'])

array([7.30015125, 6.27404726, 5.47445685, 5.02741429, 5.44089501,
       4.20003966, 3.40591266, 3.52954581, 3.05885097, 2.52911211,
       2.40285496, 2.14220543])

**We can also check the scores for the test set**

Is logic if the test scores are higher than the train scores, because thos eare new values that are beign evaluated

In [13]:
np.sqrt(-grid_search.cv_results_['mean_test_score'])

array([7.49109823, 7.15457377, 6.51338587, 5.44155004, 6.42544483,
       5.90538413, 5.04565996, 5.04168207, 5.82283041, 5.48117981,
       5.60662419, 4.69283703])

# RandomizedSearch

*************************************************
### Pregunta: Diferencia entre cv y n_iter      
*************************************************

**Muy similar al _GridSearch_, pero en este caso los valores son creados aleatoreamente desde un rango definido**

RandomizedSearchCV(estimator, param_distributions, n_iter=10, scoring=None, n_jobs=None, iid=’warn’, refit=True, cv=’warn’, verbose=0, pre_dispatch=‘2*n_jobs’, random_state=None, error_score=’raise-deprecating’, return_train_score=False)

In [14]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint

In [15]:
param_grid = {
    'max_depth' : sp_randint(2,6),
    'max_features' : sp_randint(2,8)
}

In [16]:
rand_search = RandomizedSearchCV(reg_tree, param_distributions=param_grid, cv=5, n_iter=10, scoring='neg_mean_squared_error', return_train_score=True)

In [29]:
rand_search.fit(X_train, y_train)

RandomizedSearchCV(cv=5, error_score='raise-deprecating',
          estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
          fit_params=None, iid='warn', n_iter=10, n_jobs=None,
          param_distributions={'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001ECDE8625C0>, 'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001ECDE862048>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [18]:
rand_search.best_params_

{'max_depth': 5, 'max_features': 6}

In [19]:
rand_search.best_estimator_

DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=6,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [20]:
# Comparamos los resultados entre Randomizedsearch y GridSearch
print("Promedio GridSearch RMSE: ", np.sqrt(-grid_search.cv_results_['mean_test_score']).mean())
print("Promedio RandomizedSearch RMSE: ", np.sqrt(-rand_search.cv_results_['mean_test_score']).mean())

Promedio GridSearch RMSE:  5.885187528791412
Promedio RandomizedSearch RMSE:  5.782411573339254


# Cambiando el scoring de GridSearch (r2)

In [21]:
param_grid = {'max_depth' : [2,4,6], 'max_features' : [2,4,6,8]}
grid_search_r2 = GridSearchCV(reg_tree, param_grid, cv=5, scoring='r2', return_train_score=True)

In [30]:
grid_search_r2.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': [2, 4, 6], 'max_features': [2, 4, 6, 8]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='r2', verbose=0)

## Evaluación en test set

In [23]:
from sklearn.metrics import mean_squared_error

In [24]:
y_pred_gs = grid_search.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred_gs))

4.993633813631636

In [25]:
y_pred_rs = rand_search.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred_rs))

4.738218851253739

In [26]:
y_pred_r2 = grid_search_r2.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred_r2))

5.212817689546255