In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso, RidgeCV, LassoLarsCV, LassoLarsIC
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

In [2]:
def lasso(X, y):
    model_aic = LassoLarsIC(criterion="aic")
    reg_aic = model_aic.fit(X, y)
    y_pred_aic = reg_aic.predict(X)
    mse_aic = mean_squared_error(y, y_pred_aic)

    model_bic = LassoLarsIC(criterion="bic")
    reg_bic = model_bic.fit(X, y)
    y_pred_bic = reg_bic.predict(X)
    mse_bic = mean_squared_error(y, y_pred_bic)

    loo = LeaveOneOut()
    model_cv = LassoLarsCV(cv=loo)
    reg_cv = model_cv.fit(X, y)
    y_pred_cv = reg_cv.predict(X)
    mse_cv = mean_squared_error(y, y_pred_cv)

    return np.array([mse_aic, mse_bic, mse_cv])


def Adaptive_lasso(X, y, initial_coefs, gamma=1):
    weights = 1 / (np.abs(initial_coefs) + np.finfo(float).eps) ** gamma
    X_weighted = X * weights

    mse_results = lasso(X_weighted, y)
    return mse_results

In [3]:
def ridge(X, y):
    alphas = np.logspace(0, 5, 10)
    aic_values = []
    bic_values = []
    n = len(y)
    p = X.shape[1]
    U, D, VT = np.linalg.svd(X)
    D2 = D ** 2
    for alpha in alphas:
        model = Ridge(alpha=alpha)
        model.fit(X, y)
        df = np.sum(D2 / (D2 + alpha))
        rss = mean_squared_error(y, model.predict(X)) * n
        aic = np.log(rss) + df * 2 / n
        bic = np.log(rss) + df * np.log(n) / n
        aic_values.append(aic)
        bic_values.append(bic)

    best_alpha_aic = alphas[np.argmin(aic_values)]
    best_alpha_bic = alphas[np.argmin(bic_values)]

    mse_aic = mean_squared_error(y, Ridge(alpha=best_alpha_aic).fit(X, y).predict(X))
    mse_bic = mean_squared_error(y, Ridge(alpha=best_alpha_bic).fit(X, y).predict(X))

    loo = LeaveOneOut()
    model_cv = RidgeCV(cv=loo)
    reg_cv = model_cv.fit(X, y)
    y_pred_cv = reg_cv.predict(X)
    mse_cv = mean_squared_error(y, y_pred_cv)

    return np.array([mse_aic, mse_bic, mse_cv])


def Adaptive_ridge(X, y, initial_coefs,gamma=1):
    weights = 1 / (np.abs(initial_coefs) + np.finfo(float).eps) ** gamma
    X_weighted = X * weights
    mse_results = ridge(X_weighted, y)
    return mse_results

In [4]:
def data_set_creation(p, ro, is_sparse):
    n = 100
    r_squared = 0.8
    if is_sparse:
        beta = (2/np.sqrt(n)) * np.less_equal(np.arange(1, p+1), np.sqrt(p))
    else:
        beta = 5/(np.arange(1, p+1) * np.sqrt(n))
        
    Cov = np.array([[rho ** abs(i-j) for j in range(p)] for i in range(p)])
    beta = np.array(beta).reshape(-1, 1)
    sigma = np.sqrt((1 - r_squared) / r_squared * (beta.T @ Cov @ beta).item())
    epsilon = np.random.normal(loc=0, scale=sigma, size=n).reshape(-1, 1)
    
    X = np.random.multivariate_normal(np.zeros(p), Cov, size=n)  
    Y = X @ beta + epsilon
    return X, Y

In [5]:
trials = 100
result = []
indexes = []

for is_sparse in [True, False]:
    for p in [10, 25, 50]:
        for rho in [0, 0.25, 0.5]:
            indexes.append(f"{'Sparse:' if is_sparse else 'Dense:'} p={p} rho={rho}")
            
            accumulator = np.zeros(12)

            for trial in range(trials):
                X, Y = data_set_creation(p, rho, is_sparse)
                ols_model = LinearRegression()
                ols_model.fit(X, Y)
                
                mse_values = np.concatenate([
                    lasso(X, Y),      
                    Adaptive_lasso(X, Y,ols_model.coef_), 
                    ridge(X, Y),     
                    Adaptive_ridge(X, Y,ols_model.coef_) 
                ])
                accumulator += mse_values
            result.append(accumulator / trials)
            
columns = [f"{est} ({tune})" for est in ["Lasso", "Adaptive lasso", "Ridge", "Adaptive ridge"] for tune in ["AIC", "BIC", "LOO-CV"]]

In [6]:
pd.DataFrame(result, index=indexes, columns=columns)

Unnamed: 0,Lasso (AIC),Lasso (BIC),Lasso (LOO-CV),Adaptive lasso (AIC),Adaptive lasso (BIC),Adaptive lasso (LOO-CV),Ridge (AIC),Ridge (BIC),Ridge (LOO-CV),Adaptive ridge (AIC),Adaptive ridge (BIC),Adaptive ridge (LOO-CV)
Sparse: p=10 rho=0,0.02817,0.029509,0.028303,0.027191,0.0274,0.027147,0.027223,0.027389,0.027144,0.027148,0.027179,0.027144
Sparse: p=10 rho=0.25,0.037802,0.03985,0.038054,0.036694,0.037205,0.036565,0.036696,0.037737,0.036559,0.036576,0.036623,0.036559
Sparse: p=10 rho=0.5,0.050216,0.052456,0.050578,0.048467,0.049208,0.048204,0.048779,0.049492,0.048158,0.048203,0.048411,0.048158
Sparse: p=25 rho=0,0.042496,0.048734,0.043021,0.037433,0.03838,0.037132,0.037475,0.040405,0.037112,0.03713,0.03722,0.037112
Sparse: p=25 rho=0.25,0.065442,0.072863,0.066296,0.056546,0.059976,0.056153,0.058311,0.06647,0.056068,0.056143,0.056516,0.056068
Sparse: p=25 rho=0.5,0.09662,0.107695,0.097204,0.084978,0.103768,0.083961,0.085589,0.101864,0.082161,0.08247,0.084101,0.082161
Sparse: p=50 rho=0,0.051746,0.066345,0.052341,0.035462,0.042381,0.034314,0.035392,0.060558,0.033821,0.033855,0.034061,0.03382
Sparse: p=50 rho=0.25,0.086302,0.106284,0.088072,0.060563,0.087214,0.05993,0.058348,0.090219,0.053204,0.053392,0.054207,0.053203
Sparse: p=50 rho=0.5,0.135149,0.164266,0.138635,0.112675,0.159065,0.123069,0.09427,0.142392,0.083408,0.084196,0.10465,0.083405
Dense: p=10 rho=0,0.084494,0.085332,0.084469,0.084219,0.084351,0.084178,0.084407,0.084841,0.084177,0.084185,0.084211,0.084177
