*    This program runs a LASSO regression
*                  y = a + b_1 x_1 + b_2 x_2 + ... + b_n x_n  + e
*   the objective function is to minimize
*                            (1 / (2 * n_samples)) * ||y - X*B||^2_2 + alpha * ||B||_1
*   where the L_1 normal, absolute value, of the coefficents are penalized. 
*  Note:  the smaller the alpha, the least the penality

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

np.random.seed(1234)                       # allows we get the same results each time
                                        # and allow use the same data in Ridge 
                                        
n = 15
x = np.random.randn(100,n)               # Generate x's from N(0,1)
b = np.zeros((n,1))                      # To store the coefficients

b[2]= 1                                  # assume 2 true nonzero coefficients
b[4] = -0.8

y = np.matmul(x,b) + np.random.randn(100,1)*0.5     # the simulated y's 
y.shape = (100,)                                  # a colum vector of 100 by 1

#  Traditional OLS regression analysis

reg = LinearRegression()                        # shorthand the regression function
reg.fit(x, y)                        # runs a traditional regression of y on x

print(reg.intercept_)          # the intercept
print(reg.coef_)                    # the slopes 
 

In [None]:
# A simple Lasso with a given alpha

from sklearn import linear_model

alpha = 0.5

lesso = linear_model.Lasso(alpha)
lesso.fit(x,y)

print(lesso.intercept_)          # the intercept
print(lesso.coef_)                    # the slopes 

In [None]:
# A simple Lasso with a vector of alpha


m = 3
sequ = np.arange(m) + 1                # create a vector (1, 2, 3, ...,m)
alpha = sequ / m                     # alpha = (1/m, 2/m, 3/m, ...,1)
 
for i in range (m):
    lesso = linear_model.Lasso(alpha[i])
    lesso.fit(x,y)
    print(alpha[i])
    print(lesso.intercept_)            # the intercept
    print(lesso.coef_)                    # the slopes 

In [None]:
#  Which alpha to choose? 

# Cross-validation (see https://scikit-learn.org/stable/modules/cross_validation.html) is often used.
# The idea is to slice the data into, say, cv = 5 pieces, and check how well the chosen alpha works for 1/5
# of the remaining data. Certain average performance is taken. 

from sklearn.linear_model import LassoCV
                                  
xx = np.array(x)  
yy = y
 
LCV = LassoCV(cv=5).fit(xx, yy)

print('The best choice of alpha')
print(LCV.alpha_) 
print('The intercept and slopes ')
print(LCV.intercept_)            # the intercept
print(LCV.coef_)       
 
 

In [None]:
# Double Check 

alpha = LCV.alpha_

lesso = linear_model.Lasso(alpha)
lesso.fit(x,y)

print(lesso.intercept_)          # the intercept
print(lesso.coef_)                    # the slopes 