In [1]:
import numpy as np
import pandas as pd

In [206]:
data = pd.read_csv("data/Coding2_Data.csv")

var_names = data.columns
y = data[['Y']].to_numpy()
X = data.drop(['Y'], axis = 1).to_numpy()
print(X.shape, y.shape)

(506, 13) (506, 1)


In [207]:
def one_var_lasso(v, z, penalty):
    n = v.shape[0]
    temp = z.T @ z
    a = (z.T @ v)

    if a < -penalty:
        return (a + (penalty * 506)) / temp
    elif a > penalty:
        return (a - (penalty * 506)) / temp
    else:
        return 0

In [208]:
def MyLasso(X, y, lam_seq, maxit = 100):
    # Input
    # X: n-by-p design matrix without the intercept 
    # y: n-by-1 response vector 
    # lam.seq: sequence of lambda values (arranged from large to small)
    # maxit: number of updates for each lambda 
    
    # Output
    # B: a (p+1)-by-len(lam.seq) coefficient matrix 
    #    with the first row being the intercept sequence 
    n, p = X.shape
    nlam = len(lam_seq)
    B = np.zeros((p+1, nlam))
    
    ##############################
    # YOUR CODE: 
    # (1) newX = Standardizad X; 
    # (2) Record the centers and scales used in (1) 
    ##############################
    newX = np.copy(X)
    means = np.zeros(M)
    sd = np.zeros(M)
    for j in range(M):
        means[j] = np.mean(newX[:, j])
        sd[j] = np.mean(newX[:, j])
        newX[:, j] = (newX[:, j] - means[j]) / sd[j]
    y_mean = np.mean(y)

    # Initilize coef vector b and residual vector r
    b = np.zeros(p)
    r = y
    
    # Triple nested loop
    for m in range(nlam):
        for step in range(maxit):
            for j in range(p):
                X_j = newX[:, j].reshape(-1,1)
                r = r + X_j * b[j]
                b[j] = one_var_lasso(r, X_j, lam_seq[m])
                r = r - X_j * b[j]
        B[1:, m] = b 
        
    ##############################
    # YOUR CODE:
    # Scale back the coefficients;
    # Update the intercepts stored in B[, 0]
    ##############################
    for m in range(nlam):
        B[0, m] = y_mean
        B[1:, m] *= sd
    print(means)
    
    return(B)

In [201]:
log_lam_seq = np.linspace(-1, -8, num = 80)
lam_seq = np.exp(log_lam_seq)
myout = MyLasso(X, y, lam_seq, maxit = 100)

[-0.78043626  1.13636364  2.16019166  0.06916996 -0.61002573  1.83186406
  5.06079038  1.1880324   1.86766099  5.93140464  2.15005374  3.56674032
  3.41767318]


In [203]:
lasso_coef = pd.read_csv("data/Coding2_lasso_coefs.csv").to_numpy()
print(lasso_coef[:, 0])

[3.03451287 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.        ]


In [202]:
for j in range(len(lam_seq)):
    print(myout[:, j])

[ 3.03451287e+00 -1.09017860e-02 -3.06395778e-03 -1.16146848e-01
  5.84916232e-04 -8.83547657e-02  9.31310039e-01  1.64499058e-01
 -2.21326344e-01  1.27820734e-01 -2.91125178e+00 -2.03330020e-01
  5.24083742e-01 -3.18739620e+00]
[ 3.03451287e+00 -1.07319183e-02 -2.65175105e-03 -1.11328184e-01
  5.79937672e-04 -8.97966380e-02  9.72164748e-01  1.64457782e-01
 -2.24479803e-01  1.37203503e-01 -3.26649219e+00 -2.01774731e-01
  5.27616318e-01 -3.17722635e+00]
[ 3.03451287e+00 -1.05762797e-02 -2.27447972e-03 -1.06917059e-01
  5.75380812e-04 -9.11158996e-02  1.00955592e+00  1.64419008e-01
 -2.27365220e-01  1.45789245e-01 -3.59162188e+00 -2.00351452e-01
  5.30850367e-01 -3.16791977e+00]
[ 3.03451287e+00 -1.04338386e-02 -1.92919924e-03 -1.02879976e-01
  5.71210352e-04 -9.23232939e-02  1.04377650e+00  1.64383521e-01
 -2.30005966e-01  1.53646956e-01 -3.88918209e+00 -1.99048861e-01
  5.33810185e-01 -3.15940235e+00]
[ 3.03451287e+00 -1.03034758e-02 -1.61319695e-03 -9.91852194e-02
  5.67393529e-04 -9