*    This program runs a Ridge regression
*                  y = a + b_1 x_1 + b_2 x_2 + ... + b_n x_n  + e
*   the objective function is to minimize
*                            (1 / (2 * n_samples)) * ||y - X*B||^2_2 + alpha * ||B||^2_2
*   where the regularization is given by the L2-norm (or the regression is penalized by the L2-norm). 

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

np.random.seed(1234)                       # allows we get the same results each time
                                        # and allow use the same data in Ridge 
                                        
n = 15
x = np.random.randn(100,n)               # Generate x's from N(0,1)
b = np.zeros((n,1))                      # To store the coefficients

b[2]= 1                                  # assume 2 true nonzero coefficients
b[4] = -0.8

y = np.matmul(x,b) + np.random.randn(100,1)*0.5     # the simulated y's 
y.shape = (100,)

#  Traditional OLS regression analysis

reg = LinearRegression()                        # shorthand the regression function
reg.fit(x, y)                        # runs a traditional regression of y on x

print(reg.intercept_)          # the intercept
print(reg.coef_)                    # the slopes 
 

0.05861953919348764
[-0.04699999  0.00814037  1.04098819 -0.02979184 -0.84455877  0.05846838
  0.11144969 -0.01746125 -0.01601501  0.06301223 -0.11337782 -0.06213142
  0.0475468  -0.02151424  0.01259393]


In [None]:
# A simple Ridge with a given alpha

from sklearn import linear_model

alpha = 0.5

ridge = linear_model.Ridge(alpha)
ridge.fit(x,y)

print(ridge.intercept_)          # the intercept
print(ridge.coef_)                    # the slopes 

0.0596356170888418
[-0.04756846  0.00622885  1.03500552 -0.02911105 -0.84005009  0.05834171
  0.11191723 -0.01657402 -0.01557068  0.06401372 -0.11253928 -0.06216672
  0.04727796 -0.02202547  0.01139859]


In [None]:
# A simple Lasso with a vector of alpha

m = 3
sequ = np.arange(m) + 1                # create a vector (1, 2, 3, ...,m)
alpha = sequ / m                     # alpha = (1/m, 2/m, 3/m, ...,1)
 
for i in range (m):
    ridge = linear_model.Ridge(alpha[i])
    ridge.fit(x,y)
    print(alpha[i])
    print(ridge.intercept_)            # the intercept
    print(ridge.coef_)                    # the slopes 

0.3333333333333333
0.0592983845638167
[-0.04738072  0.00685903  1.0369905  -0.02933667 -0.84154675  0.05838389
  0.11176363 -0.01686657 -0.01571733  0.06368457 -0.11281781 -0.06215581
  0.04736677 -0.02185659  0.01179372]
0.6666666666666666
0.05997140623468994
[-0.04775448  0.00560553  1.03302967 -0.02888673 -0.8385596   0.05829956
  0.11206862 -0.0162846  -0.01542547  0.06433828 -0.11226173 -0.06217679
  0.04718993 -0.02219284  0.01100675]
1.0
0.0606387035277951
[-0.04812141  0.00437913  1.02910509 -0.02844196 -0.83559696  0.05821539
  0.11236487 -0.01571502 -0.01513929  0.06497388 -0.11170956 -0.06219445
  0.04701619 -0.02252313  0.01023275]


In [None]:
#  Which alpha to choose? 

from sklearn.linear_model import RidgeCV

xx = np.array(x)  
yy = y 
 
RCV = RidgeCV(cv=5).fit(xx, yy)

print(RCV.alpha_) 
print(RCV.intercept_)            # the intercept
print(RCV.coef_)       
 

In [None]:
# Double Check 

alpha = RCV.alpha_

ridge = linear_model.Ridge(alpha)
ridge.fit(x,y)

print(ridge.intercept_)          # the intercept
print(ridge.coef_)                    # the slopes 