# 5.1 - Ajuste

### GridSearching  -  CrossValidation


![grid](images/grid.png)

![cv](images/cv.ppm)

In [1]:
import warnings
warnings.simplefilter('ignore')

import numpy as np

from sklearn.datasets import make_circles, load_boston
from sklearn.model_selection import train_test_split as tts

from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.ensemble import RandomForestClassifier as RFC

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [2]:
def grid(modelo, param):

    grid=GridSearchCV(modelo,    # modelo de machine learning
                      param,     # dictio dnd key=nombre del hiperparametro, value=lista de valores
                      cv=5,      # 5 cortes del 20% de los datos
                      n_jobs=-1  # para que use todos los cores de la maquina
                     )

    grid.fit(X_train, y_train)

    print('Acierto train: {:.2f}'.format(grid.score(X_train, y_train)))
    print('Acierto test: {:.2f}'.format(grid.score(X_test, y_test)))
    print('Mejores parametros: {}'.format(grid.best_params_))
    print('Mejopr acierto cv: {:.2f}'.format(grid.best_score_))
    
    return grid.best_estimator_.fit(X_train, y_train)


In [3]:
X=load_boston().data
y=load_boston().target

In [4]:
X_train, X_test, y_train, y_test = tts(X, y)

In [5]:
rfr=RFR()
rfr.fit(X_train, y_train)

train_score=rfr.score(X_train, y_train)
test_score=rfr.score(X_test, y_test)

train_score, test_score

(0.9790460336137403, 0.8196245613215921)

In [15]:
params={'n_estimators': [290, 300, 310],
        'criterion': ['mse', 'mae'],
        'max_leaf_nodes': [9, 15, 25]}

In [16]:
grid(rfr, params)

Acierto train: 0.95
Acierto test: 0.82
Mejores parametros: {'criterion': 'mse', 'max_leaf_nodes': 25, 'n_estimators': 300}
Mejopr acierto cv: 0.82


RandomForestRegressor(max_leaf_nodes=25, n_estimators=300)

In [17]:
modelo=grid(rfr, params)

Acierto train: 0.95
Acierto test: 0.82
Mejores parametros: {'criterion': 'mse', 'max_leaf_nodes': 25, 'n_estimators': 290}
Mejopr acierto cv: 0.82


In [18]:
modelo.predict(X_test)[:10]

array([34.46029809, 28.79196578, 20.77288687, 20.79084666, 20.81192283,
       15.19056171, 14.84496539, 21.29627023, 47.7839033 , 20.76885543])

### Random GridSearching

In [20]:
X, y = make_circles(noise=0.2, factor=0.5, random_state=1)

In [21]:
X[:10]

array([[-0.38289117, -0.09084004],
       [-0.02096229, -0.47787416],
       [-0.39611596, -1.28942694],
       [-0.61813018, -0.06383715],
       [ 0.70347838, -0.18703837],
       [-0.45970463,  0.69477465],
       [-0.45091682, -0.71570524],
       [-0.45562004, -0.13406016],
       [-0.26513904,  0.40812871],
       [-0.15474648,  0.41406973]])

In [22]:
y[:10]

array([1, 1, 0, 1, 1, 0, 0, 1, 1, 1])

In [24]:
rfc=RFC()

In [27]:
n_estimators=[int(x) for x in np.linspace(200, 2000, 10)]

criterion=["gini", "entropy"]

min_samples_split=[2, 5, 10, 20]

bootstrap=[True, False]

In [28]:
params={'n_estimators': n_estimators,
        'criterion': criterion,
        'min_samples_split':min_samples_split,
        'bootstrap': bootstrap}

In [30]:
rf_random=RandomizedSearchCV(estimator=rfc,
                             param_distributions=params,
                             n_iter=100,
                             cv=3,
                            n_jobs=-1)

In [32]:
rf_random.fit(X, y)

RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'bootstrap': [True, False],
                                        'criterion': ['gini', 'entropy'],
                                        'min_samples_split': [2, 5, 10, 20],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]})

In [33]:
rf_random.best_params_

{'n_estimators': 400,
 'min_samples_split': 10,
 'criterion': 'entropy',
 'bootstrap': True}

In [35]:
rf_random.best_score_

0.8606654783125371

In [37]:
#rf_random.cv_results_

In [38]:
rf_random.best_estimator_

RandomForestClassifier(criterion='entropy', min_samples_split=10,
                       n_estimators=400)

In [39]:
rf_random.best_estimator_.fit(X, y)

RandomForestClassifier(criterion='entropy', min_samples_split=10,
                       n_estimators=400)

### HyperOpt (GridSearching bayesiano)