In [1]:
from sklearn.datasets import load_diabetes
import pandas as pd

In [2]:
data = load_diabetes()

In [3]:
X = data.data

In [4]:
y = data.target

In [10]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]], shape=(442, 10))

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.75,random_state=42)

In [53]:
from sklearn.linear_model import Lasso
import numpy as np
alphas = [0.0001,0.001,0.01,0.1,1,10,100,1000,10000]
lasso_values = pd.DataFrame(columns=np.concatenate([['alphas'],data.feature_names]))
for i in alphas:
    L1 = Lasso(alpha=i)
    L1.fit(X_train,y_train)
    lasso_values.loc[len(lasso_values)]=np.concatenate([[i],L1.coef_])
    

In [54]:
lasso_values

Unnamed: 0,alphas,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.0001,47.743064,-241.901337,532.069249,381.478601,-912.058886,503.389061,113.86228,268.242908,693.459611,26.346296
1,0.001,47.683401,-241.095267,532.952448,380.720449,-854.088499,459.590525,86.082829,257.001941,672.332806,26.541601
2,0.01,45.311371,-232.014811,540.937002,374.676427,-484.590533,167.27856,-65.743793,212.322577,532.552116,27.375244
3,0.1,0.0,-151.414211,540.862298,337.221319,-85.189476,-0.0,-262.902069,0.0,418.247026,9.925653
4,1.0,0.0,-0.0,398.385831,46.175421,0.0,0.0,-0.0,0.0,238.187309,0.0
5,10.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0
6,100.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0
7,1000.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0
8,10000.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0


In [55]:
from sklearn.linear_model import Ridge
import numpy as np
alphas = [0.0001,0.001,0.01,0.1,1,10,100,1000,10000]
ridge_values = pd.DataFrame(columns=np.concatenate([['alphas'],data.feature_names]))
for i in alphas:
    R1 = Ridge(alpha=i)
    R1.fit(X_train,y_train)
    ridge_values.loc[len(ridge_values)]=np.concatenate([[i],R1.coef_])
    

In [56]:
ridge_values

Unnamed: 0,alphas,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.0001,47.82071,-241.915766,532.149458,381.436282,-906.903126,499.204878,111.782965,267.865031,691.352713,26.452445
1,0.001,48.381664,-241.272301,533.490847,380.390465,-815.288463,427.754704,71.002884,255.003655,656.134684,27.504959
2,0.01,50.760628,-236.231352,536.041371,373.825241,-426.93534,127.149873,-100.43191,200.100975,505.469879,33.912978
3,0.1,52.866087,-200.50867,493.959172,339.279834,-118.063006,-80.910795,-215.280522,153.414567,365.682045,62.422629
4,1.0,50.552012,-67.722224,278.301228,197.622638,-6.245836,-26.226726,-151.394331,120.323359,215.854463,101.755774
5,10.0,17.837632,-2.019277,60.239603,45.072168,13.963264,10.035979,-38.747421,39.883162,53.329939,34.865539
6,100.0,2.290874,0.121801,7.033136,5.322279,2.143349,1.717252,-4.690358,5.104204,6.440267,4.3937
7,1000.0,0.235658,0.016582,0.715912,0.542449,0.224378,0.18141,-0.479381,0.52493,0.658125,0.450967
8,10000.0,0.023633,0.001704,0.07172,0.05435,0.022542,0.018241,-0.048044,0.052642,0.065957,0.045216


# Here, we can see that the increase in alpha decreases the coefficient to zero in Lasso Regression
# It also decreases the coefficient in Ridge Regression but never to zero

# Ridge Regression (L2 penalty, λ∑w_i^2):
# - Uses squared magnitude of coefficients.
# - Encourages small values but never forces exact zero.

# Lasso Regression (L1 penalty, λ∑|w_i|):
# - Uses absolute magnitude of coefficients.
# - Can shrink some coefficients exactly to zero, leading to sparsity.

# Mathematical Summary:
# Lasso (L1): Uses subtraction of λ, which can force coefficients to zero.
# Ridge (L2): Uses division, which only shrinks coefficients but never makes them exactly zero.

# Lasso's sparsity property:
# - Since Lasso can shrink some coefficients to exactly zero, it performs automatic feature selection.
# - This helps in reducing model complexity and improving interpretability.

# Observation in the diabetes dataset:
# - Despite regularization, the coefficient of BMI increases while others decrease for small α values.
# - This happens because BMI is the most predictive feature, and as Lasso,Ridge both shrinks other coefficients, 
#   BMI compensates by absorbing their predictive power.
# - However, for **larger α values**, regularization dominates, and BMI’s coefficient starts decreasing normally.
