# Machine Learning GridSearch Hyperparameters - Regression

## Importing Libs

In [15]:
# Standard libs
import time
import numpy as np
import pandas as pd

# Model Selection libs
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

# Machine Learning libs
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import SGDRegressor

In [4]:
df_abt_reg = pd.read_csv('/Users/dellacorte/py-projects/data-science/supervised-learning-regression-reference/databases/house-price.csv', sep=';')
df_abt_reg.head()

Unnamed: 0,zoneamento,tam_terreno,forma_terreno,qualidade_geral,condicao,ano_construcao,qualidade_aquecedor,ar_condicionado,tam_primeiro_andar,tam_segundo_andar,...,qtde_banheiros,qtde_comodos,qtde_lareiras,qtde_carros_garagem,tam_garagem,tam_piscina,qualidade_piscina,mes_venda,ano_venda,preco
0,RL,785.03035,Reg,7,5,2003,Ex,Y,79.524968,79.339162,...,30,8,0,2,50.910844,0.0,NotAv,2,2008,846510.0
1,RL,891.8688,Reg,6,8,1976,Ex,Y,117.243586,0.0,...,2,6,1,2,42.73538,0.0,NotAv,5,2007,734478.4
2,RL,1045.15875,IR1,7,5,2001,Ex,Y,85.47076,80.453998,...,2,6,1,2,56.485024,0.0,NotAv,9,2008,907410.0
3,RL,887.22365,IR1,7,5,1915,Gd,Y,89.279783,70.234668,...,1,7,1,3,59.643726,0.0,NotAv,2,2006,568400.0
4,RL,1324.79678,IR1,8,5,2000,Ex,Y,106.373935,97.826859,...,2,9,1,3,77.666908,0.0,NotAv,12,2008,1015000.0


In [5]:
!pip install feature-engine -U

Collecting feature-engine
  Downloading feature_engine-1.8.3-py2.py3-none-any.whl.metadata (9.9 kB)
Collecting scikit-learn>=1.4.0 (from feature-engine)
  Using cached scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl.metadata (31 kB)
Downloading feature_engine-1.8.3-py2.py3-none-any.whl (378 kB)
Using cached scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl (12.1 MB)
Installing collected packages: scikit-learn, feature-engine
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
  Attempting uninstall: feature-engine
    Found existing installation: feature-engine 1.3.0
    Uninstalling feature-engine-1.3.0:
      Successfully uninstalled feature-engine-1.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pdpbox 0.3.0 requires xgb

In [6]:
cat_vars = []
num_vars = ['tam_terreno', 'qualidade_geral', 'ano_construcao', 'tam_primeiro_andar', 'tam_segundo_andar', 'tam_sala_estar',
            'qtde_banheiros',	'qtde_comodos',	'qtde_lareiras',	'qtde_carros_garagem',	'tam_garagem', 'ano_venda']
modeling_vars = cat_vars + num_vars

X = df_abt_reg[modeling_vars].copy()
y = df_abt_reg['preco'].copy()

### LinearRegression

The main hyperparameters that we can use in GridSearch:

* `fit_intercept`: Whether or not to compute the intercept. If set to `False`, the intercept will not be computed.

In [11]:
t1 = time.time()

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
pipeline = Pipeline(steps=[('linear_regression', LinearRegression())])

parametros = {
  'linear_regression__fit_intercept': [True, False],
}

grid_search = GridSearchCV(pipeline, parametros, scoring='neg_median_absolute_error', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X, y)

print()
print('='*100)
print(grid_search.best_params_)
print('\nIt took {} seconds for GridSearch for LinearRegression'.format(time.time() - t1))
print('='*100)
print()

Fitting 5 folds for each of 2 candidates, totalling 10 fits


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core import (



{'linear_regression__fit_intercept': False}

It took 2.0857908725738525 seconds for GridSearch for LinearRegression



### SGDRegressor

The main hyperparameters that we can use in GridSearch:

* `fit_intercept`: Whether or not to compute the intercept. If set to `False`, the intercept will not be computed.

* `learning_rate`: Learning rate, values ​​to test: `['constant', 'optimal', 'adaptive']`.

* `penalty`: Regularization to be used: `['l2', 'l1', None]`

* `early_stopping`: Whether to use early stopping to end training when the validation score is not improving: `[True, False]`

* `n_iter_no_change`: Number of iterations without improvements before ending training: `[5, 30, 50]`

In [12]:
t2 = time.time()

pipeline = Pipeline(steps=[('sdg_reg', SGDRegressor())])

parametros = {
  'sdg_reg__fit_intercept': [True, False],
  'sdg_reg__learning_rate': ['constant', 'optimal', 'adaptive'],
  'sdg_reg__penalty': ['l2', 'l1', None],
  'sdg_reg__early_stopping': [True, False],
  'sdg_reg__n_iter_no_change': [5, 30, 50]
}

grid_search = GridSearchCV(pipeline, parametros, scoring='neg_median_absolute_error', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X, y)

print()
print('='*100)
print(grid_search.best_params_)
print('\nIt took {} seconds for GridSearch for SGDRegressor'.format(time.time() - t2))
print('='*100)
print()

Fitting 5 folds for each of 108 candidates, totalling 540 fits





{'sdg_reg__early_stopping': False, 'sdg_reg__fit_intercept': True, 'sdg_reg__learning_rate': 'adaptive', 'sdg_reg__n_iter_no_change': 30, 'sdg_reg__penalty': 'l2'}

It took 5.811716079711914 seconds for GridSearch for SGDRegressor





### Ridge

The main hyperparameters that we can use in GridSearch:

* `alpha`: Regularization strength. Must be a positive decimal value. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values ​​specify stronger regularization.

* `fit_intercept`: Whether or not to compute the intercept. If set to `False`, the intercept will not be computed.

In [14]:
t3 = time.time()

pipeline = Pipeline(steps=[('ridge_regression', Ridge())])

parametros = {
  'ridge_regression__alpha': [0.01, 0.1, 1.0, 10, 100],
  'ridge_regression__fit_intercept': [True, False],
}

grid_search = GridSearchCV(pipeline, parametros, scoring='neg_median_absolute_error', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X, y)

print()
print('='*100)
print(grid_search.best_params_)
print('\nIt took {} seconds for GridSearch for Ridge'.format(time.time() - t3))
print('='*100)
print()

Fitting 5 folds for each of 10 candidates, totalling 50 fits

{'ridge_regression__alpha': 100, 'ridge_regression__fit_intercept': False}

It took 0.18117880821228027 seconds for GridSearch for Ridge



### Lasso

The main hyperparameters that we can use in GridSearch:

* `alpha`: Constne that multiplies the L1 term. The default is `1.0`. If `alpha=0` is the equivalent of Linear Regression.

* `fit_intercept`: Whether or not to compute the intercept. If set to `False`, the intercept will not be computed.

In [16]:
t4 = time.time()

pipeline = Pipeline(steps=[('lasso', Lasso())])

parametros = {
  'lasso__alpha': [0.01, 0.1, 1.0, 10, 100],
  'lasso__fit_intercept': [True, False],
}

grid_search = GridSearchCV(pipeline, parametros, scoring='neg_median_absolute_error', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X, y)

print()
print('='*100)
print(grid_search.best_params_)
print('\nIt took {} seconds for GridSearch for Lasso'.format(time.time() - t4))
print('='*100)
print()

Fitting 5 folds for each of 10 candidates, totalling 50 fits


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c


{'lasso__alpha': 100, 'lasso__fit_intercept': False}

It took 0.2935349941253662 seconds for GridSearch for Lasso



  model = cd_fast.enet_coordinate_descent(
