In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Lars, ElasticNet, Lasso, Ridge, BayesianRidge
pd.set_option('display.float_format', lambda x: "{:,.2f}".format(x))

In [6]:
boston = load_boston()
dc_scores = {}

In [7]:
df = pd.DataFrame(data = boston["data"], columns=boston["feature_names"])

In [8]:
df["target"] = boston["target"]

In [9]:
df.columns

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'target'],
      dtype='object')

In [10]:
tgt = "target"
ls_pred = [x for x in df.columns if x not in [tgt]]

In [11]:
X = df[ls_pred]
y = df[tgt]

In [14]:
df.shape

(506, 14)

## Regresión Lineal

In [12]:
linreg = LinearRegression()

In [13]:
linreg.fit(X, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [15]:
ls_res = cross_val_score(estimator = linreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [16]:
np.mean(ls_res), np.std(ls_res)

(0.11405301290098702, 0.7129565737131489)

In [17]:
linreg.intercept_

36.4594883850902

In [18]:
linreg.coef_

array([-1.08011358e-01,  4.64204584e-02,  2.05586264e-02,  2.68673382e+00,
       -1.77666112e+01,  3.80986521e+00,  6.92224640e-04, -1.47556685e+00,
        3.06049479e-01, -1.23345939e-02, -9.52747232e-01,  9.31168327e-03,
       -5.24758378e-01])

In [20]:
dc_scores.update({str(linreg).split("(")[0]: np.mean(ls_res)})

In [21]:
dc_scores

{'LinearRegression': 0.11405301290098702}

## Regresión LARS

In [22]:
larsreg = Lars()

In [24]:
larsreg.fit(X, y)

Lars(copy_X=True, eps=2.220446049250313e-16, fit_intercept=True,
   fit_path=True, n_nonzero_coefs=500, normalize=True, positive=False,
   precompute='auto', verbose=False)

In [25]:
ls_res = cross_val_score(estimator = larsreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [26]:
np.mean(ls_res), np.std(ls_res)

(0.11388643958845568, 0.7128425802035522)

In [27]:
larsreg.intercept_

36.95133114391086

In [28]:
larsreg.coef_

array([-1.09921668e-01,  4.77494449e-02,  3.42654195e-02,  2.67396468e+00,
       -1.82501210e+01,  3.80245356e+00,  9.79271355e-04, -1.48628516e+00,
        3.20424027e-01, -1.31267747e-02, -9.60367186e-01,  9.38636165e-03,
       -5.26015958e-01])

In [29]:
dc_scores.update({str(larsreg).split("(")[0]: np.mean(ls_res)})

In [30]:
dc_scores

{'LinearRegression': 0.11405301290098702, 'Lars': 0.11388643958845568}

## Regresión Cresta

In [31]:
ridgereg = Ridge()

In [34]:
ridgereg.fit(X, y)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [37]:
ls_res = cross_val_score(estimator = ridgereg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [38]:
np.mean(ls_res), np.std(ls_res)

(0.15964912159959443, 0.6571117136643805)

In [39]:
ridgereg.intercept_

31.597669818274024

In [40]:
ridgereg.coef_

array([-1.04595278e-01,  4.74432243e-02, -8.80467889e-03,  2.55239322e+00,
       -1.07770146e+01,  3.85400020e+00, -5.41453810e-03, -1.37265353e+00,
        2.90141589e-01, -1.29116463e-02, -8.76074394e-01,  9.67327945e-03,
       -5.33343225e-01])

In [41]:
dc_scores.update({str(ridgereg).split("(")[0]: np.mean(ls_res)})

In [42]:
dc_scores

{'LinearRegression': 0.11405301290098702,
 'Lars': 0.11388643958845568,
 'Ridge': 0.15964912159959443}

## Regresión Lasso

In [43]:
lassreg = Lasso()

In [44]:
lassreg.fit(X, y)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [45]:
ls_res = cross_val_score(estimator = lassreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [46]:
np.mean(ls_res), np.std(ls_res)

(0.28536940819936757, 0.3451753067035699)

In [47]:
lassreg.intercept_

41.05693374499339

In [48]:
lassreg.coef_

array([-0.06343729,  0.04916467, -0.        ,  0.        , -0.        ,
        0.9498107 ,  0.02090951, -0.66879   ,  0.26420643, -0.01521159,
       -0.72296636,  0.00824703, -0.76111454])

In [49]:
dc_scores.update({str(lassreg).split("(")[0]: np.mean(ls_res)})

In [50]:
dc_scores

{'LinearRegression': 0.11405301290098702,
 'Lars': 0.11388643958845568,
 'Ridge': 0.15964912159959443,
 'Lasso': 0.28536940819936757}

## Red elástica

In [51]:
elasnet = ElasticNet()

In [52]:
elasnet.fit(X, y)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [55]:
ls_res = cross_val_score(estimator = elasnet, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [56]:
np.mean(ls_res), np.std(ls_res)

(0.3224290833531883, 0.32042136520898984)

In [57]:
elasnet.intercept_

42.22956397215434

In [58]:
elasnet.coef_

array([-0.08037077,  0.05323951, -0.0126571 ,  0.        , -0.        ,
        0.93393555,  0.0205792 , -0.76204391,  0.30156906, -0.01643916,
       -0.7480458 ,  0.00833878, -0.75842612])

In [59]:
dc_scores.update({str(elasnet).split("(")[0]: np.mean(ls_res)})

In [60]:
dc_scores

{'LinearRegression': 0.11405301290098702,
 'Lars': 0.11388643958845568,
 'Ridge': 0.15964912159959443,
 'Lasso': 0.28536940819936757,
 'ElasticNet': 0.3224290833531883}

## Regresión Bayesiana

In [61]:
bayreg = BayesianRidge()

In [62]:
bayreg.fit(X, y)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [63]:
ls_res = cross_val_score(estimator = bayreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [65]:
np.mean(ls_res), np.std(ls_res)

(0.21349419349625304, 0.5805043006083164)

In [66]:
bayreg.intercept_

27.551928678732757

In [67]:
bayreg.coef_

array([-0.10143602,  0.04973881, -0.04383578,  1.89480596, -2.14177578,
        3.67398399, -0.01061762, -1.24522265,  0.28021623, -0.01406253,
       -0.79726073,  0.01003674, -0.56188247])

In [68]:
dc_scores.update({str(bayreg).split("(")[0]: np.mean(ls_res)})

In [69]:
dc_scores

{'LinearRegression': 0.11405301290098702,
 'Lars': 0.11388643958845568,
 'Ridge': 0.15964912159959443,
 'Lasso': 0.28536940819936757,
 'ElasticNet': 0.3224290833531883,
 'BayesianRidge': 0.21349419349625304}

In [70]:
resul = pd.DataFrame(columns=[])
alfas = pd.DataFrame(columns=[])
for model in [linreg, larsreg, ridgereg, lassreg, elasnet, bayreg]:
    resul[str(model).split("(")[0]] = model.coef_
    alfas[str(model).split("(")[0]] = [model.intercept_]

In [71]:
resul["features"] = ls_pred
resul = resul.set_index("features")
alfas["features"] = ["intercepto"]
alfas = alfas.set_index("features")

In [72]:
resul = alfas.append(resul)

In [73]:
resul

Unnamed: 0_level_0,LinearRegression,Lars,Ridge,Lasso,ElasticNet,BayesianRidge
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
intercepto,36.46,36.95,31.6,41.06,42.23,27.55
CRIM,-0.11,-0.11,-0.1,-0.06,-0.08,-0.1
ZN,0.05,0.05,0.05,0.05,0.05,0.05
INDUS,0.02,0.03,-0.01,-0.0,-0.01,-0.04
CHAS,2.69,2.67,2.55,0.0,0.0,1.89
NOX,-17.77,-18.25,-10.78,-0.0,-0.0,-2.14
RM,3.81,3.8,3.85,0.95,0.93,3.67
AGE,0.0,0.0,-0.01,0.02,0.02,-0.01
DIS,-1.48,-1.49,-1.37,-0.67,-0.76,-1.25
RAD,0.31,0.32,0.29,0.26,0.3,0.28
