#**Regularización**

Recordemos que una regresión lineal es un modelo de la forma
$$
\hat{y}=α+β_1x_1+\beta_2x_2+...\beta_nx_n+ϵ
$$


donde nuestro objetivo es hallar $\alpha$ y $\beta=(\beta_1,\beta_2, ..., \beta_n)$ que minimicen la expresión

$$
\sum_{i=1}^{n}(y_i-\alpha-\beta_ix_i)^2
$$

En el caso de la regresión polinomial tenemos:

$$
f(x)=x_0+\beta_0x_0+\beta_1x_1+\beta_2x_0x_1+\beta_3x_0^2+\beta_4x_1^2+\beta_5x_0^3+...
$$

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import cross_validate

In [None]:
vehiculos = pd.read_csv('vehiculos_procesado.csv')
train_data = vehiculos[['desplazamiento', 'cilindros', 'consumo']]
target = vehiculos['co2']

In [None]:
train_data.head()

Unnamed: 0,desplazamiento,cilindros,consumo
0,2.5,4.0,17
1,4.2,6.0,13
2,2.5,4.0,16
3,4.2,6.0,13
4,3.8,6.0,16


In [None]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet

modelo_ols = LinearRegression()
modelo_ols.fit(train_data, target)
modelo_ols.coef_

array([ 11.76787991,   1.23791071, -19.80355606])

In [None]:
Lasso?

In [None]:
modelo_lasso = Lasso(alpha=0.01)
modelo_lasso.fit(train_data, target)
modelo_lasso.coef_

array([ 11.75313049,   1.24110849, -19.80552093])

In [None]:
modelo_ridge = Ridge(alpha=0.01)
modelo_ridge.fit(train_data, target)
modelo_ridge.coef_

array([ 11.76786979,   1.23791734, -19.80355624])

In [None]:
modelo_elastic = ElasticNet(alpha=0.01)
modelo_elastic.fit(train_data, target)
modelo_elastic.coef_

array([ 11.58528346,   1.35435282, -19.8074592 ])

Para medir la complejidad del modelo, vamos a usar la funcion de numpy.linalg.norm que es una función que calcula varios tipos de normas.

In [None]:
def norma_l1(coeficientes):
    return np.linalg.norm(coeficientes, ord=1)

def norma_l2(coeficientes):
    return np.linalg.norm(coeficientes, ord=2)

print(norma_l1(modelo_ols.coef_))
print(norma_l2(modelo_ols.coef_))

32.809346680206325
23.069379124496862


In [None]:
def norma_l1_cv(estimador):
  return norma_l1(estimador.coef_)

def norma_l2_cv(estimador):
  return norma_l2(estimador.coef_)

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
PolynomialFeatures?

In [None]:
transformador_polinomial = PolynomialFeatures(degree=5)
transformador_polinomial.fit(train_data)

In [None]:
var_polinomiales = transformador_polinomial.transform(train_data)

In [None]:
var_polinomiales.shape

(35539, 56)

In [None]:
variables_polinomiales = PolynomialFeatures(5).fit_transform(train_data)
variables_polinomiales.shape

(35539, 56)

Ahora vamos a evaluar los distintos tipos de regularizacion

In [None]:
RESULTADOS ={}

In [None]:
RESULTADOS['ols'] = {
    'norm_l1' : norma_l1_cv(LinearRegression().fit(variables_polinomiales, target)),
    'norm_l2' : norma_l2_cv(LinearRegression().fit(variables_polinomiales, target))
}


In [None]:
RESULTADOS['lasso']={
    'norm_l1' : norma_l1_cv(Lasso(alpha=1,tol=0.01, max_iter=500).fit(variables_polinomiales, target)),
    'norm_l2' : norma_l2_cv(Lasso(alpha=1,tol=0.01, max_iter=500).fit(variables_polinomiales, target))
}


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [None]:
RESULTADOS['ridge']={
    'norm_l1' : norma_l1_cv(Ridge(alpha=1,tol=0.01, max_iter=500).fit(variables_polinomiales, target)),
    'norm_l2' : norma_l2_cv(Ridge(alpha=1,tol=0.01, max_iter=500).fit(variables_polinomiales, target))
}


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


In [None]:
RESULTADOS['elastic']={
    'norm_l1' : norma_l1_cv(ElasticNet(alpha=1,tol=0.01, max_iter=500).fit(variables_polinomiales, target)),
    'norm_l2' : norma_l2_cv(ElasticNet(alpha=1,tol=0.01, max_iter=500).fit(variables_polinomiales, target))
}

In [None]:
rs=pd.DataFrame(RESULTADOS).T
display(rs)

Unnamed: 0,norm_l1,norm_l2
ols,10853.748047,3922.194758
lasso,43.43314,34.094357
ridge,457.524202,109.550869
elastic,21.002924,8.997096


In [None]:
l1_ols = norma_l1_cv(LinearRegression().fit(variables_polinomiales, target))
l2_ols = norma_l2_cv(LinearRegression().fit(variables_polinomiales, target))

rs['l1_reduction'] = (l1_ols - rs['norm_l1'])/l1_ols
rs['l2_reduction'] = (l2_ols - rs['norm_l2'])/l2_ols




Unnamed: 0,norm_l1,norm_l2,l1_reduction,l2_reduction
ols,10853.748047,3922.194758,0.0,0.0
lasso,43.43314,34.094357,0.995998,0.991307
ridge,457.524202,109.550869,0.957846,0.972069
elastic,21.002924,8.997096,0.998065,0.997706


10853.748047385083
3922.194758373462


Debido a que la regularización ElasticNet combina las otras dos (lasso y ridge), era obvio que esta regularización tendria mejor desempeño entonces nos quedamos con el modelo de ElasticNet

In [None]:
ElasticNet(alpha=1,tol=0.01, max_iter=500).fit(variables_polinomiales, target).coef_

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -1.33310440e+00,
        4.89645007e+00,  6.09439149e+00, -1.31816563e+00,  2.37723778e+00,
       -3.10050434e+00, -6.41934303e-01, -7.84374220e-01, -9.59081564e-02,
       -7.18954629e-02,  7.19478059e-03, -4.57252233e-02, -6.37787774e-02,
        1.22426563e-02,  3.46550752e-02,  4.26955780e-02,  2.66673958e-03,
        1.39812144e-02, -8.09891082e-03,  8.76409316e-03, -1.69690074e-02,
       -2.05177215e-03,  1.54020194e-03, -6.29758134e-03, -2.92855315e-03,
       -6.86058449e-04,  1.42133650e-03, -9.92879254e-04,  1.28704557e-04,
        5.03220762e-04,  6.61024458e-04,  5.80200778e-05,  3.41020839e-03,
        2.27563082e-03,  2.71125911e-03, -1.90350591e-03, -7.82250851e-05,
        6.56665428e-04, -7.34759017e-04, -9.12167373e-04, -2.43120629e-04,
        1.60164558e-04,  3.89147755e-04, -5.45731423e-04, -3.49391006e-04,
        3.45240438e-05,  3.46406797e-05,  2.59823852e-04, -6.35175029e-05,
       -9.14762151e-05,  