# Linear Regression  vs Ridge vs Lasso 

In [1]:
import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn import datasets

In [4]:
X,y = datasets.load_diabetes(return_X_y=True, as_frame=True)
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [5]:
y.head()

0    151.0
1     75.0
2    141.0
3    206.0
4    135.0
Name: target, dtype: float64

In [8]:
from sklearn.linear_model import Ridge, Lasso, LinearRegression

linreg = LinearRegression().fit(X, y)
ridge = Ridge(alpha=0.2).fit(X, y)
lasso = Lasso(alpha=0.2).fit(X, y)

coefs = pd.DataFrame({
    "coef_linreg": pd.Series(linreg.coef_, index = X.columns),
    "coef_ridge": pd.Series(ridge.coef_, index = X.columns),
    "coef_lasso": pd.Series(lasso.coef_, index= X.columns)})\

coefs\
    .applymap(lambda x: int(x))\
    .style.applymap(lambda x: 'color: red' if x == 0 else 'color: black')

Unnamed: 0,coef_linreg,coef_ridge,coef_lasso
age,-10,7,0
sex,-239,-182,-75
bmi,519,457,511
bp,324,284,234
s1,-792,-48,0
s2,476,-78,0
s3,101,-189,-170
s4,177,119,0
s5,751,400,450
s6,67,97,0


## Ridge vs Lasso 

source : https://www.analyticsvidhya.com/blog/2022/01/different-types-of-regression-models/

<img src="rl.png" >

## Interpretation avec les p-values

In [9]:
import statsmodels.api as sm
ols = sm.OLS(y, sm.add_constant(X)).fit()
ols.summary()

0,1,2,3
Dep. Variable:,target,R-squared:,0.518
Model:,OLS,Adj. R-squared:,0.507
Method:,Least Squares,F-statistic:,46.27
Date:,"Mon, 19 Dec 2022",Prob (F-statistic):,3.8299999999999998e-62
Time:,13:54:56,Log-Likelihood:,-2386.0
No. Observations:,442,AIC:,4794.0
Df Residuals:,431,BIC:,4839.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,152.1335,2.576,59.061,0.000,147.071,157.196
age,-10.0099,59.749,-0.168,0.867,-127.446,107.426
sex,-239.8156,61.222,-3.917,0.000,-360.147,-119.484
bmi,519.8459,66.533,7.813,0.000,389.076,650.616
bp,324.3846,65.422,4.958,0.000,195.799,452.970
s1,-792.1756,416.680,-1.901,0.058,-1611.153,26.802
s2,476.7390,339.030,1.406,0.160,-189.620,1143.098
s3,101.0433,212.531,0.475,0.635,-316.684,518.770
s4,177.0632,161.476,1.097,0.273,-140.315,494.441

0,1,2,3
Omnibus:,1.506,Durbin-Watson:,2.029
Prob(Omnibus):,0.471,Jarque-Bera (JB):,1.404
Skew:,0.017,Prob(JB):,0.496
Kurtosis:,2.726,Cond. No.,227.0


In [37]:
linreg = LinearRegression().fit(X, y)
ridge = Ridge(alpha=0.2).fit(X, y)
lasso = Lasso(alpha=0.2).fit(X, y)

coefs = pd.DataFrame({
    "coef_linreg": pd.Series(linreg.coef_, index = X.columns),
    "coef_ridge": pd.Series(ridge.coef_, index = X.columns),
    "coef_lasso": pd.Series(lasso.coef_, index = X.columns),
    "p-values":  pd.Series(ols.pvalues*100, index = ols.pvalues.index[1:])})

coefs\
    .applymap(lambda x: int(x))\
    .style.applymap(lambda x: 'color: red' if x == 0 else 'color: black')

Unnamed: 0,coef_linreg,coef_ridge,coef_lasso,p-values
age,-10,7,0,86
sex,-239,-182,-75,0
bmi,519,457,511,0
bp,324,284,234,0
s1,-792,-48,0,5
s2,476,-78,0,16
s3,101,-189,-170,63
s4,177,119,0,27
s5,751,400,450,0
s6,67,97,0,30
