In [1]:
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#A code for the adaptive lasso
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

import numpy as np
from scipy.optimize import minimize

In [2]:
#define functions
def maker(N,n_vars,p):
    """A function to generate Monte Carlo linear regression data"""
    x = []    #an empty list to hold the data
    y = np.zeros(N)    #an array to hold the dependent variable
    b = []    #an empty list to hold the true bs
    i = 1
    while i <= n_vars:    #loop over the variables we want to create
        x_i = np.random.normal(loc = 0.0, scale = 1.0, size = N)    #generate the data
        x.append(x_i)    #add it to the list of data
        if np.random.uniform(0,1) < p:    #if the variable matters...
            b_i = np.random.normal(loc = 0.0, scale = 1.0)    #draw a random effect for this variable
        else:
            b_i = 0    #otherwise set it's true effect equal to 0.
        b.append(b_i)    #add it to the list of effects
        y = y + b_i*x_i    #add the variable effect to the dependent variable
        i += 1    #index up i
        
    
    b_i = np.random.normal(loc = 0.0, scale = 1.0)    #draw a random intercept
    b.append(b_i)    #append this intercept to the effects
    y = b_i + y + np.random.normal(loc = 0.0, scale = 1.0, size = N)    #add the normally distributed error term and the intercept
    return [np.array(x),np.array(y),np.array(b)]

In [3]:
def adaptive_lasso(b,X,y,alpha,gamma):
    """A function to return the value of the adaptive lasso objective function."""
    Beta_OLS = np.linalg.inv(X.T.dot(X)).dot(X.T.dot(y))    #compute the OLS coefficients
    return sum((y - X.dot(b))**2) + alpha*sum((1.0/(abs(Beta_OLS)**gamma))*abs(b))    #compute and return the objective function value

In [4]:
def Adaptive_Lasso_tuner(X,y,gamma,lo,hi,step):
    """A function to tune the alpha parameter in the adaptive lasso."""
    b = np.random.uniform(size = len(X[0]))*0.1    #generate random starting values
    grid = np.arange(lo,hi,step=step)    #set up the alpha grid
    error = []    #an empty list to hold the error
    for g in grid:    #loop over grid points
        print(g)
        model = minimize(adaptive_lasso, x0 = b, args = (X,y,g,gamma), method = 'BFGS')    #optimize the adaptive lasso
        coefficients = model.x    #extract the coefficients
        error.append(sum((y - X.dot(coefficients))**2))    #compute the within sample error
    error = np.array(error)    #change to numpy array for vectorization
    error = error/sum(error)    #normalize so that it acts as a weight
    alpha = sum(error*grid)    #choose alpha as the weighted average
    model = minimize(adaptive_lasso, x0 = b, args = (X,y,alpha,gamma), method = 'BFGS')    #optimize the adaptive lasso using the tuned alpha
    coefficients = model.x    #extract the coefficients
    return [coefficients,alpha]

In [5]:
#make the data and put it into the correct form
N = 10000
n_var = 20
n_extra = 4
Data = maker(N,n_var,p=0.6)    #make the data
X = Data[0]    #pull out the independent variables
X = np.c_[X.T,np.ones(N)]    #add a column of 1s to it for the constant
y = Data[1]    #pull out the dependent variable    


#run the model
model = Adaptive_Lasso_tuner(X,y,1.0,0,10,0.5)    #tune the model

print(model[0])
print(Data[2])

0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
8.5
9.0
9.5
[  1.10332199e+00  -5.19537449e-09  -7.85735212e-09   3.88490652e-01
   1.60156997e-02  -5.78358963e-08  -3.19075058e-09  -8.42216498e-09
  -8.53849983e-09  -7.67663257e-09   8.88430233e-01  -1.44626895e-01
  -6.15157001e-09  -1.23576056e+00   2.90256125e-01  -6.40686054e-01
  -1.00308452e+00  -6.64228803e-09  -5.72444102e-09  -2.23235637e-02
  -2.48364859e-01]
[ 1.10539068  0.          0.          0.40649126  0.          0.          0.
  0.          0.          0.          0.88884301 -0.15617119  0.
 -1.23354496  0.29468643 -0.64324459 -1.00241751  0.          0.          0.
 -0.25759094]
