In [2]:
import numpy as np
from numba import njit
from sklearn.linear_model import Lasso
from itertools import product
import pandas as pd
from sklearn.linear_model import LinearRegression
from scipy import linalg

In [3]:
@njit
def soft_threshold_numba(rho, lamda, w):
    if rho < -lamda * w:
        return rho + lamda * w
    elif rho > lamda * w:
        return rho - lamda * w
    else:
        return 0.0

In [4]:
@njit
def get_lamda_path_numba(X, y):
    epsilon = 0.0001
    K = 100
    m, p = X.shape

    y = y.reshape((m, 1))
    sx = X
    sy = y

    lambda_max = np.max(np.abs(np.sum(sx * sy, axis=0))) / m
    lamda_path = np.exp(
        np.linspace(np.log(lambda_max), np.log(lambda_max * epsilon), np.int64(K))
    )

    return lamda_path

In [5]:
@njit
def count_non_zero_coeffs(theta_vec):
    s = 0
    for i in theta_vec:
        if np.abs(i) > 1e-04:
            s += 1
    return s

In [6]:
@njit
def lasso_numba(
    X,
    y,
    lamda_path=None,
    penalty_factors=None,
    theta=None,
    num_iters=100,
    intercept=True,
    thresh=1e-7,
    active_thresh=1e-7,
    warm_start=True,
):

    m, p = X.shape

    x_mean = np.zeros((p,), dtype=np.float64)

    for i in range(p):
        x_mean[i] = X[:, i].mean()

    x_std = np.zeros((p,), dtype=np.float64)

    for i in range(p):
        x_std[i] = X[:, i].std()

    y_mean = np.mean(y)
    y_std = np.std(y)

    X_standardized = (X - x_mean) / x_std
    y_standardized = (y - y_mean) / y_std

    if intercept:
        X_tmp = np.ones((m, p + 1))
        X_tmp[:, 1:] = X
        X = X_tmp

    if lamda_path is None:
        path = m * get_lamda_path_numba(X=X_standardized, y=y_standardized)
    else:
        path = m * lamda_path

    if intercept:
        X_tmp = np.ones((m, p + 1))
        X_tmp[:, 1:] = X_standardized
        X_standardized = X_tmp

    m, p = X_standardized.shape

    if theta is None:
        theta = np.zeros((p, 1))

    if penalty_factors is None:
        penalty_factors = np.ones((p, 1))

    lamdas = []
    thetas = []
    thetas_nat = []
    BIC = []

    for lamda in path:
        if not warm_start:
            theta = np.zeros((p, 1))
        sec_check_all_converged = False
        active_set = np.arange(p)
        active_set_converged = False

        for _i in range(num_iters):
            if (active_set.size != 0) and (not active_set_converged):
                active_set_converged_check = np.full((len(active_set),), False)
                active_set_update = np.full((len(active_set),), True)

                for subindex, j in enumerate(active_set):
                    w_j = penalty_factors[j].item()

                    y_pred = X_standardized @ theta

                    rho = 0.0
                    z = 0.0

                    for obs in range(m):
                        rho += X_standardized[obs, j].item() * (
                            y_standardized[obs].item()
                            - y_pred[obs].item()
                            + theta[j].item() * X_standardized[obs, j].item()
                        )
                        z += np.square(X_standardized[obs, j].item())

                    if intercept:
                        if j == 0:
                            tmp = rho / z
                            if np.abs(tmp) < active_thresh:
                                active_set_update[subindex] = False
                            if np.abs(theta[j] - tmp) < thresh:
                                active_set_converged_check[subindex] = True
                            theta[j] = tmp
                        else:
                            tmp = (1 / z) * soft_threshold_numba(rho, lamda, w_j)
                            if np.abs(tmp) < active_thresh:
                                active_set_update[subindex] = False
                            if np.abs(theta[j] - tmp) < thresh:
                                active_set_converged_check[subindex] = True
                            theta[j] = tmp

                    else:
                        tmp = (1 / z) * soft_threshold_numba(rho, lamda, w_j)
                        if np.abs(tmp) < active_thresh:
                            active_set_update[subindex] = False
                        if np.abs(theta[j] - tmp) < thresh:
                            active_set_converged_check[subindex] = True
                        theta[j] = tmp

                active_set_converged = np.all(active_set_converged_check)
                active_set = active_set[active_set_update]

            elif not sec_check_all_converged:
                active_set = np.arange(p)

                active_set_converged_check = np.full((len(active_set),), False)
                active_set_update = np.full((len(active_set),), True)

                m, p = X_standardized.shape

                for subindex, j in enumerate(active_set):
                    w_j = penalty_factors[j].item()

                    y_pred = X_standardized @ theta
                    rho = 0.0
                    z = 0.0

                    for obs in range(m):
                        rho += X_standardized[obs, j].item() * (
                            y_standardized[obs].item()
                            - y_pred[obs].item()
                            + theta[j].item() * X_standardized[obs, j].item()
                        )
                        z += np.square(X_standardized[obs, j].item())

                    if intercept:
                        if j == 0:
                            tmp = rho / z
                            if np.abs(tmp) < active_thresh:
                                active_set_update[subindex] = False
                            if np.abs(theta[j] - tmp) < thresh:
                                active_set_converged_check[subindex] = True
                            theta[j] = tmp
                        else:
                            tmp = (1 / z) * soft_threshold_numba(rho, lamda, w_j)
                            if np.abs(tmp) < active_thresh:
                                active_set_update[subindex] = False
                            if np.abs(theta[j] - tmp) < thresh:
                                active_set_converged_check[subindex] = True
                            theta[j] = tmp

                    else:
                        tmp = (1 / z) * soft_threshold_numba(rho, lamda, w_j)
                        if np.abs(tmp) < active_thresh:
                            active_set_update[subindex] = False
                        if np.abs(theta[j] - tmp) < thresh:
                            active_set_converged_check[subindex] = True
                        theta[j] = tmp

                active_set_converged = np.all(active_set_converged_check)
                active_set = active_set[active_set_update]

                if active_set_converged:
                    sec_check_all_converged = True
                    break
            else:
                break

        if not intercept:
            theta_tmp = theta.flatten() / x_std * y_std
        if intercept:
            theta_0 = (
                theta.flatten()[0] - np.sum((x_mean / x_std) * theta.flatten()[1:])
            ) * y_std + y_mean
            theta_betas = theta.flatten()[1:] / x_std * y_std
            theta_tmp = np.ones((p,))
            theta_tmp[1:] = theta_betas
            theta_tmp[0] = theta_0

        m, p = X.shape
        theta_bic = np.ones((p, 1))
        theta_bic[:, 0] = theta_tmp
        residuals_hat = np.sum(np.square(y - X @ theta_bic))
        df_lamda = count_non_zero_coeffs(theta_vec=theta_bic.flatten())
        BIC_lasso = residuals_hat / (m * y_std ** 2) + np.log(m) / m * df_lamda

        lamdas.append(lamda / m)
        thetas.append(np.copy(theta).flatten())
        thetas_nat.append(theta_tmp)
        BIC.append(BIC_lasso)

    return lamdas, thetas, thetas_nat, BIC

In [7]:
def adaptive_lasso(
    X,
    y,
    intercept=True,
    lamda_path=None,
    gamma_path=None,
    first_stage="Lasso",
    num_iters=100,
):

    m, p = X.shape

    if gamma_path is None:
        path_gamma = np.array([0.001, 0.01, 0.1, 0.5, 1, 2, 3, 4, 6, 8])
    else:
        path_gamma = gamma_path

    if first_stage == "OLS":
        reg = LinearRegression(fit_intercept=intercept).fit(X, y)
        coeffs = reg.coef_.T
    elif first_stage == "Lasso":
        res = lasso_numba(X=X, y=y)

        index_lamda_opt = np.where(res[3] == np.amin(res[3]))[0][0]
        coeffs = np.delete(res[1][index_lamda_opt], 0).reshape((p, 1))

    else:
        raise AssertionError(
            "This feature has so far only been implemented for OLS and Lasso as its first-stage estimators."
        )

    coeffs[np.abs(coeffs) < 1.00e-15] = 1.00e-15

    results = []
    weight_path = []
    for gamma in path_gamma:

        if intercept:
            weights = np.ones((p + 1, 1))
            weights[1:, :] = 1.0 / np.abs(coeffs) ** gamma
        else:
            weights = 1.0 / np.abs(coeffs) ** gamma

        res = lasso_numba(
            X,
            y,
            lamda_path=lamda_path,
            penalty_factors=weights,
            theta=None,
            num_iters=num_iters,
            intercept=intercept,
            thresh=1e-7,
            active_thresh=1e-7,
            warm_start=True,
        )
        
        weight_path.append(weights)
        results.append(res)

    return path_gamma, results, weight_path

In [8]:
def adaptive_lasso(
    X,
    y,
    intercept=True,
    lamda_path=None,
    gamma_path=None,
    first_stage="Lasso",
    num_iters=100,
    out_as_df = False
):

    m, p = X.shape

    if gamma_path is None:
        path_gamma = np.array([0.001, 0.01, 0.1, 0.5, 1, 2, 3, 4, 6, 8])
    else:
        path_gamma = gamma_path

    if first_stage == "OLS":
        reg = LinearRegression(fit_intercept=intercept).fit(X, y)
        coeffs = reg.coef_.T
    elif first_stage == "Lasso":
        res = lasso_numba(X=X, y=y)

        index_lamda_opt = np.where(res[3] == np.amin(res[3]))[0][0]
        coeffs = np.delete(res[1][index_lamda_opt], 0).reshape((p, 1))

    else:
        raise AssertionError(
            "This feature has so far only been implemented for OLS and Lasso as its first-stage estimators."
        )

    coeffs[np.abs(coeffs) < 1.00e-15] = 1.00e-15

    results = []
    weight_path = []
    for gamma in path_gamma:

        if intercept:
            weights = np.ones((p + 1, 1))
            weights[1:, :] = 1.0 / np.abs(coeffs) ** gamma
        else:
            weights = 1.0 / np.abs(coeffs) ** gamma

        res = lasso_numba(
            X,
            y,
            lamda_path=lamda_path,
            penalty_factors=weights,
            theta=None,
            num_iters=num_iters,
            intercept=intercept,
            thresh=1e-7,
            active_thresh=1e-7,
            warm_start=True,
        )
        
        weight_path.append(weights)
        results.append(res)

    if out_as_df:
        lamda_p = results[0][0]
        df = pd.DataFrame(list(product(path_gamma, lamda_p)), columns=['gamma', 'lamda'])
        df["theta_std"] = np.nan
        df["theta_nat"] = np.nan
        df["gamma_weights"] = np.nan
        df = df.astype(object)
        df = df.set_index(['gamma', 'lamda'])

        for id_gamma, gamma in enumerate(path_gamma):
            for idx, lamda in enumerate(results[id_gamma][0]):
                index = (gamma, lamda)
                df.at[index, "theta_std"] = results[id_gamma][1][idx]
                df.at[index, "theta_nat"] = results[id_gamma][2][idx]
                df.at[index, "gamma_weights"] = weight_path[id_gamma]
        
        return df
    else:
        return path_gamma, results, weight_path

In [9]:
def get_lamda_path(X, y, epsilon=0.0001, K=100):
    m, p = X.shape

    y = y.reshape((m, 1))
    sx = X
    sy = y

    lambda_max = np.max(np.abs(np.sum(sx * sy, axis=0))) / m
    lamda_path = np.exp(
        np.linspace(
            start=np.log(lambda_max), stop=np.log(lambda_max * epsilon), num=np.int64(K)
        )
    )

    return lamda_path

In [33]:
np.random.seed(seed=5)
n = 3000
X = np.random.rand(n,45)
#y = np.array(13* X[:,7] + X[:,3] + X[:,5] + X[:,6] +X[:,7] +X[:,8]- 1.5 * X[:,0] - 14.5 * X[:,1] + 5 + np.random.normal(0,1,n), dtype=np.float64).reshape(-1,1)
y = np.array(3* X[:,7] + X[:,3] + X[:,5] + X[:,6] +X[:,7] + 4* X[:,8]- 1.5 * X[:,0] - 14.5 * X[:,1] + 5 + np.random.normal(0,1,n), dtype=np.float64).reshape(-1,1)

In [44]:
res2 = adaptive_lasso(
    X = X_fold_1,
    y = y_fold_1,
    intercept=True,
    lamda_path=np.array([0.001, 0.1]),
    gamma_path=np.array([1, 2, 3]),
    first_stage="OLS",
    num_iters=100,
    out_as_df=True)

In [54]:
ret

(array([[11.51781973],
        [ 6.71835755],
        [ 3.68746959],
        ...,
        [-3.05343886],
        [ 8.37169908],
        [-1.48658643]]),
 1493.2991648428774)

In [13]:
def make_prediction(X, y, theta_nat, intercept=True):
    if intercept:
        X = np.insert(X, 0, 1, axis=1)
        
    y_hat = X @ theta_nat.reshape((X.shape[1],1))
    mse = np.sum(np.square(y - y_hat))
    return y_hat, mse

In [45]:
ret = make_prediction(X_fold_2, y_fold_2, res2.at[(1,0.001), "theta_nat"])

In [14]:
def cv_adaptive_lasso(X, y, intercept=True, first_stage="OLS"):
    n, p = X.shape

    x_mean = X.mean(axis=0)
    x_std = X.std(axis=0)

    y_mean = np.mean(y)
    y_std = np.std(y)

    X_standardized = (X - x_mean) / x_std
    y_standardized = (y - y_mean) / y_std

    indices = np.random.permutation(n)
    fold_1_idx, fold_2_idx = indices[:int(n/2)], indices[int(n/2):]
    X_fold_1, X_fold_2 = X[fold_1_idx,:], X[fold_2_idx,:]
    y_fold_1, y_fold_2 = y[fold_1_idx,:], y[fold_2_idx,:]

    gamma_path = np.array([0.001, 0.01, 0.1, 0.5, 1, 2, 3, 4, 6, 8, 10])
    lamda_path = get_lamda_path(X=X_standardized, y=y_standardized)

    trained_on_fold_1 = adaptive_lasso(
        X = X_fold_1,
        y = y_fold_1,
        intercept=True,
        lamda_path=lamda_path,
        gamma_path=gamma_path,
        first_stage=first_stage,
        num_iters=100,
        out_as_df=True)

    trained_on_fold_2 = adaptive_lasso(
        X = X_fold_2,
        y = y_fold_2,
        intercept=True,
        lamda_path=lamda_path,
        gamma_path=gamma_path,
        first_stage=first_stage,
        num_iters=100,
        out_as_df=True)

    trained_on_fold_1["mse_1"] = np.nan
    trained_on_fold_2["mse_2"] = np.nan

    prod = product(trained_on_fold_1.index.get_level_values('gamma').unique(),trained_on_fold_1.index.get_level_values('lamda').unique())
    for gamma, lamda in prod:
        index = (gamma, lamda)
        y_hat_1, mse_1 = make_prediction(X= X_fold_2, 
                        y=y_fold_2,
                        theta_nat= trained_on_fold_1.at[index, "theta_nat"],
                        intercept = intercept)
        
        y_hat_2, mse_2 = make_prediction(X= X_fold_1, 
                    y=y_fold_1,
                    theta_nat= trained_on_fold_2.at[index, "theta_nat"],
                    intercept = intercept)

        trained_on_fold_1.at[index, "mse_1"] = mse_1
        trained_on_fold_2.at[index, "mse_2"] = mse_2

    cv_overview = trained_on_fold_1.merge(trained_on_fold_2, how='left', on=["gamma", "lamda"])[['mse_1','mse_2']]
    cv_overview['mean_mse'] = cv_overview.mean(axis=1)

    params_opt = cv_overview.iloc[cv_overview['mean_mse'].argmin(),].name

    return cv_overview, params_opt


In [45]:
cv_results, params_opt = cv_adaptive_lasso(X, y)

In [46]:
cv_results

Unnamed: 0_level_0,Unnamed: 1_level_0,mse_1,mse_2,mean_mse
gamma,lamda,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.001,0.901418,33721.212523,31152.473731,32436.843127
0.001,0.821338,29262.041339,26778.135797,28020.088568
0.001,0.748373,25450.900941,23158.504993,24304.702967
0.001,0.681890,22275.846171,20164.377359,21220.111765
0.001,0.621312,19629.856214,17688.580372,18659.218293
...,...,...,...,...
10.000,0.000131,1574.756664,1554.159949,1564.458306
10.000,0.000119,1574.711370,1554.106361,1564.408866
10.000,0.000109,1574.670327,1554.057803,1564.364065
10.000,0.000099,1574.633117,1554.013782,1564.323449


In [70]:
def get_conf_intervals(lamda, weights, theta_std, theta_nat, X, X_std, intercept, y, y_std):
    n, p = X.shape
    if intercept:
        X_with_intercept = np.insert(X, 0, 1, axis=1)
        X_std_with_intercept = np.insert(X_std, 0, 1, axis=1)
        
        sigma_hat_nat = 1/ n * np.sum(np.square(y - X_with_intercept @ theta_nat.reshape((len(theta_nat),1))))
        sigma_hat_std = 1/ n * np.sum(np.square(y_std - X_std_with_intercept @ theta_std.reshape((len(theta_std),1))))
    else:
        sigma_hat_nat = 1/ n * np.sum(np.square(y - X @ theta_nat.reshape((len(theta_nat),1))))
        sigma_hat_std = 1/ n * np.sum(np.square(y_std - X_std @ theta_std.reshape((len(theta_std),1))))
    
    if intercept:
        theta_std = np.delete(arr=theta_std, obj=0)
        theta_nat = np.delete(arr=theta_nat, obj=0)
        weights = np.delete(arr=weights, obj=0, axis=0)
    
    weights = weights.flatten()
    
    active_set = np.invert(np.isclose(np.zeros(p), theta_nat, atol=1e-06))
    
    X_active = X[:,active_set]
    X_std_active = X_std[:,active_set]
    theta_nat_active = theta_nat[active_set]
    theta_std_active = theta_std[active_set]
    weights_active = weights[active_set]
    
    diag_std = weights_active / theta_std_active
    diag_nat = weights_active / theta_nat_active
    
    sigma_beta_std = np.diag(v=diag_std, k=0)
    sigma_beta_nat = np.diag(v=diag_std, k=0)
    
    main_mat_nat = X_active.T @ X_active + lamda * diag_nat
    main_mat_std = X_std_active.T @ X_std_active + lamda * diag_std
    
    main_mat_nat_inverse = linalg.inv(main_mat_nat)
    main_mat_std_inverse = linalg.inv(main_mat_std)
    
    beta_hat_nat_cov_mat = sigma_hat_nat * (main_mat_nat_inverse @ X_active.T @ X_active @ main_mat_nat_inverse)
    beta_hat_std_cov_mat = sigma_hat_std * (main_mat_std_inverse @ X_std_active.T @ X_std_active @ main_mat_std_inverse)
    
    conf_intervals_nat_upper_bound = theta_nat_active + 1.96 * np.sqrt(np.diag(beta_hat_nat_cov_mat))
    conf_intervals_nat_lower_bound = theta_nat_active - 1.96 * np.sqrt(np.diag(beta_hat_nat_cov_mat))

    conf_intervals_nat = np.column_stack((conf_intervals_nat_lower_bound, conf_intervals_nat_upper_bound))
    
    conf_intervals_std_upper_bound = theta_std_active + 1.96 * np.sqrt(np.diag(beta_hat_std_cov_mat))
    conf_intervals_std_lower_bound = theta_std_active - 1.96 * np.sqrt(np.diag(beta_hat_std_cov_mat))

    conf_intervals_std = np.column_stack((conf_intervals_std_lower_bound, conf_intervals_std_upper_bound))
    
    return {"beta_hat_nat_cov_mat": beta_hat_nat_cov_mat,
            "beta_hat_std_cov_mat": beta_hat_std_cov_mat,
            "conf_intervals_nat": conf_intervals_nat,
            "conf_intervals_std": conf_intervals_std,
            "active_set": active_set
           }

In [93]:
def adaptive_lasso_cv_estimates(X, y):
    n, p = X.shape

    x_mean = X.mean(axis=0)
    x_std = X.std(axis=0)

    y_mean = np.mean(y)
    y_std = np.std(y)

    X_standardized = (X - x_mean) / x_std
    y_standardized = (y - y_mean) / y_std

    cv_results, params_opt = cv_adaptive_lasso(X=X, y=y, intercept=True, first_stage="OLS")
    gamma_opt = params_opt[0]
    lamda_opt = params_opt[1]
    
    train_opt_ada_lasso = adaptive_lasso(X=X,
                                        y=y,
                                        intercept=True,
                                        lamda_path=np.array([lamda_opt]),
                                        gamma_path=np.array([gamma_opt]),
                                        first_stage="OLS",
                                        num_iters=100,
                                        out_as_df = True)
    
    print(train_opt_ada_lasso.iloc[0]["theta_nat"])
    ada_lasso_opt_res = get_conf_intervals(lamda=lamda_opt, 
                       weights=train_opt_ada_lasso.iloc[0]["gamma_weights"],
                       theta_std=train_opt_ada_lasso.iloc[0]["theta_std"], 
                       theta_nat=train_opt_ada_lasso.iloc[0]["theta_nat"],
                       X=X, 
                       X_std=X_standardized, 
                       intercept=True, 
                       y=y, 
                       y_std=y_standardized)
    
    return ada_lasso_opt_res

In [96]:
def adaptive_lasso_cv_estimates(X, y):
    n, p = X.shape

    x_mean = X.mean(axis=0)
    x_std = X.std(axis=0)

    y_mean = np.mean(y)
    y_std = np.std(y)

    X_standardized = (X - x_mean) / x_std
    y_standardized = (y - y_mean) / y_std

    cv_results, params_opt = cv_adaptive_lasso(X=X, y=y, intercept=True, first_stage="OLS")
    gamma_opt = params_opt[0]
    lamda_opt = params_opt[1]
    
    train_opt_ada_lasso = adaptive_lasso(X=X,
                                        y=y,
                                        intercept=True,
                                        lamda_path=np.array([lamda_opt]),
                                        gamma_path=np.array([gamma_opt]),
                                        first_stage="OLS",
                                        num_iters=100,
                                        out_as_df = True)

    ada_lasso_opt_res = get_conf_intervals(lamda=lamda_opt, 
                       weights=train_opt_ada_lasso.iloc[0]["gamma_weights"],
                       theta_std=train_opt_ada_lasso.iloc[0]["theta_std"], 
                       theta_nat=train_opt_ada_lasso.iloc[0]["theta_nat"],
                       X=X, 
                       X_std=X_standardized, 
                       intercept=True, 
                       y=y, 
                       y_std=y_standardized)
    
    selected_support = ada_lasso_opt_res["active_set"]
    conf_intervals_nat = ada_lasso_opt_res["conf_intervals_nat"]
    conf_intervals_std = ada_lasso_opt_res["conf_intervals_std"]
    
    return {"selected_support": selected_support,
            "conf_intervals_nat": conf_intervals_nat,
            "conf_intervals_std": conf_intervals_std}

In [97]:
res2 = adaptive_lasso_cv_estimates(X=X, y=y)

In [101]:
res2["conf_intervals_nat"]

array([[ -1.74954833,  -1.51815879],
       [-14.71077791, -14.4784551 ],
       [  0.8605191 ,   1.0919528 ],
       [  0.80188343,   1.03752253],
       [  0.91275523,   1.14429924],
       [  3.90662793,   4.1437661 ],
       [  3.89514546,   4.13005715]])

In [77]:
res.iloc[0]["theta_nat"]

array([  5.1891341 ,  -1.62169143, -14.594551  ,   0.        ,
         0.95429769,   0.        ,   0.89849782,   1.00953853,
         4.01968161,   4.00693591,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
        -0.02610034,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ])

In [124]:
from sklearn.linear_model import LassoCV

In [134]:
reg = LassoCV(cv=5, random_state=0, fit_intercept=True).fit(X, y.flatten())

In [144]:
t = np.full((4,), False, dtype=bool)

In [140]:
truth = np.array([False, False, False, True])

In [145]:
for idx, val in enumerate(np.array([True, False, True, True])):
    if val:
        if not truth[idx]:
            t[idx] = True

In [166]:
beta_true = np.array([[1],[4.1],[1]])
beta_true

array([[1. ],
       [4.1],
       [1. ]])

In [177]:
n = 1000
number_simulations = 3
simulation_id = np.arange(number_simulations)
identity = [True, False]
p = [100, 500, 950]
link_functions = ["linear_link", "polynomial_link", "sine_link"]
index = product(simulation_id, identity, p, link_functions)

index = pd.MultiIndex.from_tuples(
    index,
    names=("simulation_id", "identity_cov_matrix", "p", "link_function"),
)

df = pd.DataFrame(columns=["share_of_truth_uncovered", 
                           "ratio_total_select_coeffs_true_coeffs",
                           "false_pos_share_true_support",
                           "false_pos_share_right_selection",
                           "linear_effect_coverage"], 
                            index=index)

In [178]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,share_of_truth_uncovered,ratio_total_select_coeffs_true_coeffs,false_pos_share_true_support,false_pos_share_right_selection,linear_effect_coverage
simulation_id,identity_cov_matrix,p,link_function,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,True,100,linear_link,,,,,
0,True,100,polynomial_link,,,,,
0,True,100,sine_link,,,,,
0,True,500,linear_link,,,,,
0,True,500,polynomial_link,,,,,
0,True,500,sine_link,,,,,
0,True,950,linear_link,,,,,
0,True,950,polynomial_link,,,,,
0,True,950,sine_link,,,,,
0,False,100,linear_link,,,,,


In [153]:
(beta_true > conf_int[:,0]) & (beta_true < conf_int[:,1])

array([[ True, False, False],
       [False, False, False],
       [False, False,  True]])

In [168]:
a = np.greater(beta_true,conf_int[:,0].reshape((conf_int.shape[0], 1))) & np.less(beta_true,conf_int[:,1].reshape((conf_int.shape[0], 1)))

In [169]:
a.flatten()

array([ True,  True, False])

In [136]:
res1 = adaptive_lasso(
    X = X,
    y = y,
    intercept=True,
    lamda_path=np.array([0.001, 0.1]),
    gamma_path=np.array([1, 2, 3]),
    first_stage="OLS",
    num_iters=100,
    out_as_df=True)


In [27]:
df = pd.DataFrame(list(product(np.array([1,2,3]), np.array([0.001, 0.1]))), columns=['gamma', 'lamda'])
df["theta_std"] = np.nan
df["theta_nat"] = np.nan
df["gamma_weights"] = np.nan
df=df.astype(object)
df = df.set_index(['gamma', 'lamda'])

for id_gamma, gamma in enumerate(np.array([1,2,3])):
    for idx, lamda in enumerate(res[1][gamma - 1][0]):
        index = (gamma, lamda)
        df.at[index, "theta_std"] = res[1][0][1][idx]
        df.at[index, "theta_nat"] = res[1][0][2][idx]
        df.at[index, "gamma_weights"] = res[2][id_gamma]

In [36]:
res[1][0][0]

[0.001, 0.1]

In [57]:
res2 = lasso_numba(X = X, y = y, lamda_path = np.array([0.01]))

In [58]:
res2

([0.01],
 [array([-3.90915078e-15, -8.28823934e-02, -9.04682129e-01,  0.00000000e+00,
          5.25352557e-02,  0.00000000e+00,  5.17358623e-02,  5.23128027e-02,
          2.42044262e-01,  2.42875760e-01,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00])],
 [array([  5.25080801,  -1.32743865, -14.35309693,   0.        ,
           0.84270478,   0.        ,   0.82766577,   0.82898034,
           3.8467732 ,   3.83412371,   0.        ,   0.        ,
           0.        ,   0.        ,   0.        ,   0.        ])],
 [0.022065843564756927])

In [26]:
1/ n * np.sum(np.square(y_std - X_std @ res2[1][0].reshape((16,1))))

0.20501979168180165

In [14]:
X.shape

(3000, 15)

In [85]:
def sk_learn_lasso(X, y, intercept=True, lamda_path=None):
    
    m, p = X.shape
    
    x_mean = X.mean(axis=0)
    x_std = X.std(axis=0)

    y_mean = np.mean(y)
    y_std = np.std(y)

    X_std = (X - x_mean) / x_std
    y_std = (y - y_mean) / y_std

    if lamda_path is None:
        path = get_lamda_path_numba(X=X_std, y=y_std)
    else: 
        path = lamda_path

    y_std = y_std.flatten()

    lamdas = []
    coeffs = []
    
    for lamda in path:
        reg = Lasso(alpha= lamda, fit_intercept = intercept)
        reg.fit(X_std, y_std)
        
        if intercept:
            coef = np.insert(arr=reg.coef_, obj=0, values=reg.intercept_)
        else:
            coef = reg.coef_
        
        lamdas.append(lamda)
        coeffs.append(np.copy(coef))

    return lamdas, coeffs

In [86]:
def soft_threshold(rho, lamda, w):
    """Soft threshold function used for normalized data and lasso regression"""
    if rho < -lamda * w:
        return rho + lamda * w
    elif rho > lamda * w:
        return rho - lamda * w
    else:
        return 0

In [17]:
from scipy import linalg

In [76]:
def get_conf_intervals(lamda, weights, theta_std, theta_nat, X, X_std, intercept, y, y_std):
    n, p = X.shape
    if intercept:
        X_with_intercept = np.insert(X, 0, 1, axis=1)
        X_std_with_intercept = np.insert(X_std, 0, 1, axis=1)
        
        sigma_hat_nat = 1/ n * np.sum(np.square(y - X_with_intercept @ theta_nat.reshape((len(theta_nat),1))))
        sigma_hat_std = 1/ n * np.sum(np.square(y_std - X_std_with_intercept @ theta_std.reshape((len(theta_std),1))))
    else:
        sigma_hat_nat = 1/ n * np.sum(np.square(y - X @ theta_nat.reshape((len(theta_nat),1))))
        sigma_hat_std = 1/ n * np.sum(np.square(y_std - X_std @ theta_std.reshape((len(theta_std),1))))
    
    if intercept:
        theta_std = np.delete(arr=theta_std, obj=0)
        theta_nat = np.delete(arr=theta_nat, obj=0)
        weights = np.delete(arr=weights, obj=0, axis=0)
    
    weights = weights.flatten()
    
    active_set = np.invert(np.isclose(np.zeros(p), theta_nat, atol=1e-06))
    
    X_active = X[:,active_set]
    X_std_active = X_std[:,active_set]
    theta_nat_active = theta_nat[active_set]
    theta_std_active = theta_std[active_set]
    weights_active = weights[active_set]
    
    diag_std = weights_active / theta_std_active
    diag_nat = weights_active / theta_nat_active
    
    sigma_beta_std = np.diag(v=diag_std, k=0)
    sigma_beta_nat = np.diag(v=diag_std, k=0)
    
    main_mat_nat = X_active.T @ X_active + lamda * diag_nat
    main_mat_std = X_std_active.T @ X_std_active + lamda * diag_std
    
    main_mat_nat_inverse = linalg.inv(main_mat_nat)
    main_mat_std_inverse = linalg.inv(main_mat_std)
    
    beta_hat_nat_cov_mat = sigma_hat_nat * (main_mat_nat_inverse @ X_active.T @ X_active @ main_mat_nat_inverse)
    beta_hat_std_cov_mat = sigma_hat_std * (main_mat_std_inverse @ X_std_active.T @ X_std_active @ main_mat_std_inverse)
    
    conf_intervals_nat_upper_bound = theta_nat_active + 1.96 * np.sqrt(np.diag(beta_hat_nat_cov_mat))
    conf_intervals_nat_lower_bound = theta_nat_active - 1.96 * np.sqrt(np.diag(beta_hat_nat_cov_mat))
    
    conf_intervals_std_upper_bound = theta_std_active + 1.96 * np.sqrt(np.diag(beta_hat_std_cov_mat))
    conf_intervals_std_lower_bound = theta_std_active - 1.96 * np.sqrt(np.diag(beta_hat_std_cov_mat))
    
    return {"beta_hat_nat_cov_mat": beta_hat_nat_cov_mat,
            "beta_hat_std_cov_mat": beta_hat_std_cov_mat,
            "conf_intervals_nat_upper_bound": conf_intervals_nat_upper_bound,
            "conf_intervals_nat_lower_bound": conf_intervals_nat_lower_bound,
            "conf_intervals_std_upper_bound": conf_intervals_std_upper_bound,
            "conf_intervals_std_lower_bound": conf_intervals_std_lower_bound
           }

In [84]:
result = get_conf_intervals(lamda = 0.001, 
                            weights= res[2][0], 
                            theta_std = res[1][0][1][0], 
                            theta_nat = res[1][0][2][0], 
                            X = X, 
                            X_std = X_std, 
                            intercept = True, 
                            y = y, 
                            y_std = y_std)

""" def generate_moderate_correlation(n):

    np.random.seed(seed=8)
    eigvals = np.random.uniform(low=0.01, high=1.0, size=n)

    mean_val = np.mean(eigvals)
    half = int(n/2)
    threefourth = int(half + half / 2)

    eigvals_normalized = eigvals / mean_val
    sort = np.sort(eigvals_normalized)

    gain = 5/6 * np.sum(sort[0:(half)])

    sort[0:half] = sort[0:(half)] / 6
    sort[half:n] = sort[(half):n] + gain / half

    gain2 = 7/8 * np.sum(sort[half:(threefourth)])

    sort[half:threefourth] = sort[half:(threefourth)] / 8
    sort[threefourth:n] = sort[(threefourth):n] + gain2 / (n - threefourth)

    correlation_matrix = random_correlation.rvs(sort)

    cov_matrix = corr2cov(correlation_matrix, np.random.uniform(low=0.3, high=2.0, size=n))
    return cov_matrix """

In [65]:
res2 = lasso_numba(X = X,
            y = y,
            lamda_path = np.array([0.01]),
            intercept = True)

In [93]:
%%time
res = lasso_numba(X = X, y = y, lamda_path = np.array([100, 80, 40,20 ,10,6, 5, 3, 2, 1, 0.5 , 0.01]))

CPU times: user 1.64 s, sys: 280 ms, total: 1.92 s
Wall time: 1.58 s


In [39]:
def adaptive_lasso(X, y, intercept=True, lamda_path= None, gamma_path = None , first_stage = "Lasso", num_iters = 100):
    
    m, p = X.shape
    
    if gamma_path is None:
        path_gamma = np.array([0.001, 0.01, 0.1, 0.5, 1, 2, 3, 4, 6, 8])
    else:
        path_gamma = gamma_path
        
    if first_stage == "OLS":
        reg = LinearRegression(fit_intercept=intercept).fit(X, y)
        coeffs = reg.coef_.T
    elif first_stage == "Lasso":
        res = lasso_numba(X = X, y = y)
        
        index_lamda_opt = np.where(res[3] == np.amin(res[3]))[0][0]
        coeffs = np.delete(res[1][index_lamda_opt], 0).reshape((p,1))
        
    else:
        raise AssertionError("This feature has so far only been implemented for OLS as its first-stage estimator.")
    
    coeffs[np.abs(coeffs) < 1.00e-15] = 1.00e-15
    
    results = []
    for gamma in path_gamma:
        
        if intercept:
            weights = np.ones((p + 1, 1))
            weights[1:, :] = 1.0 / np.abs(coeffs)**gamma
        else:
            weights = 1.0 / np.abs(coeffs)**gamma
        
        res = lasso_numba(X,
                    y,
                    lamda_path=lamda_path,
                    penalty_factors=weights,
                    theta=None,
                    num_iters=num_iters,
                    intercept=intercept,
                    thresh=1e-7,
                    active_thresh=1e-7,
                    warm_start=True)
        
        results.append(res)
    
    return path_gamma, results

In [22]:
from sklearn.linear_model import LinearRegression

In [54]:
%%time
real = adaptive_lasso(X = X, y= y, gamma_path = np.array([1]), lamda_path = np.array([0.0001]), first_stage = "Lasso")
#real = adaptive_lasso(X = X, y= y)

CPU times: user 1.49 s, sys: 4.3 s, total: 5.78 s
Wall time: 217 ms


In [11]:
from sklearn.ensemble import RandomForestRegressor

from boruta import BorutaPy
from sklearn.feature_selection import SelectFromModel

In [5]:
rf = RandomForestRegressor(n_jobs=-1, n_estimators= 500, max_depth=10, random_state=0)
feat_selector = BorutaPy(rf, n_estimators='auto', verbose=0, random_state=1)
feat_selector.fit(X, y.flatten())
feat_selector.support_

array([ True,  True, False,  True, False,  True,  True,  True,  True,
       False, False, False, False, False, False])

In [10]:
mask = np.array([ True,  True, False,  True, False,  True,  True,  True,  True,
       False, False, False, False, False, False])
X[:,mask]

array([[0.417022  , 0.72032449, 0.30233257, ..., 0.18626021, 0.34556073,
        0.39676747],
       [0.67046751, 0.4173048 , 0.14038694, ..., 0.96826158, 0.31342418,
        0.69232262],
       [0.09834683, 0.42110763, 0.53316528, ..., 0.68650093, 0.83462567,
        0.01828828],
       ...,
       [0.65103314, 0.04507603, 0.55380678, ..., 0.54389884, 0.27882502,
        0.28164784],
       [0.16869127, 0.68823469, 0.13224319, ..., 0.40385241, 0.97077539,
        0.32399876],
       [0.56875752, 0.69153019, 0.64573532, ..., 0.88211309, 0.96919384,
        0.88446664]])

In [15]:
regr = RandomForestRegressor(n_jobs=-1, n_estimators= 500, max_depth=10, random_state=0)
regr = regr.fit(X, y.flatten())
model = SelectFromModel(regr, prefit=True, threshold=-np.inf, max_features=7)
model.get_support()

array([ True,  True, False,  True, False,  True,  True,  True,  True,
       False, False, False, False, False, False])

In [19]:
from sklearn.feature_selection import VarianceThreshold

In [21]:
sel = VarianceThreshold(threshold=(0.95 * (1 - 0.95)))
X_new = sel.fit_transform(X)

In [7]:
from sklearn.feature_selection import SelectKBest, f_regression

In [10]:
from sklearn.pipeline import Pipeline 
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

In [11]:
def univariate_feature_selection(X_fold, y_fold):
    n,p = X_fold.shape
    pipe = Pipeline([('scaler', StandardScaler()),
                 ('selector', SelectKBest(f_regression, k=5)),
                 ('final_regression', LinearRegression())])
    search_space = [{'selector__k': np.arange(start = 1 , stop = p)}]
    grid_res = GridSearchCV(pipe, search_space, cv=10, verbose=0)
    grid_res = grid_res.fit(X_fold, y_fold.flatten())
    k_opt = grid_res.best_params_["selector__k"]

    support = SelectKBest(f_regression, k= k_opt).fit(X_fold, y_fold.flatten()).get_support()
    return support

In [12]:
supp = univariate_feature_selection(X, y.flatten())

In [3]:
import statsmodels.api as sm

In [33]:
X_supp.shape

(3000, 12)

In [34]:
if True:
    X_supp = sm.add_constant(X_supp)

In [37]:
mod = sm.OLS(endog= y.flatten(), exog = X_supp)
res = mod.fit()
np.delete(res.conf_int(), 0, 0)

array([[ -1.59239564,  -1.34470709],
       [-14.56968885, -14.32142735],
       [  1.01239042,   1.25960022],
       [  0.81287281,   1.05699538],
       [  1.00283391,   1.24788654],
       [  3.90975224,   4.15413198],
       [  3.74371786,   3.99135159],
       [ -0.12526953,   0.12323143],
       [ -0.20978486,   0.03916869],
       [ -0.1621137 ,   0.08343817],
       [ -0.0656445 ,   0.17723314],
       [ -0.02137892,   0.22289364]])

In [60]:
def get_conf_intervals(lamda, weights, theta_std, theta_nat, X, X_std, intercept, y, y_std):
    n, p = X.shape
    if intercept:
        X_with_intercept = np.insert(X, 0, 1, axis=1)
        X_std_with_intercept = np.insert(X_std, 0, 1, axis=1)
        
        sigma_hat_nat = 1/ n * np.sum(np.square(y - X_with_intercept @ theta_nat.reshape((len(theta_nat),1))))
        sigma_hat_std = 1/ n * np.sum(np.square(y_std - X_std_with_intercept @ theta_std.reshape((len(theta_std),1))))
    else:
        sigma_hat_nat = 1/ n * np.sum(np.square(y - X @ theta_nat.reshape((len(theta_nat),1))))
        sigma_hat_std = 1/ n * np.sum(np.square(y_std - X_std @ theta_std.reshape((len(theta_std),1))))
    
    if intercept:
        theta_std = np.delete(arr=theta_std, obj=0)
        theta_nat = np.delete(arr=theta_nat, obj=0)
        weights = np.delete(arr=weights, obj=0, axis=0)
    
    weights = weights.flatten()
    
    active_set = np.invert(np.isclose(np.zeros(p), theta_nat, atol=1e-06))
    
    X_active = X[:,active_set]
    X_std_active = X_std[:,active_set]
    theta_nat_active = theta_nat[active_set]
    theta_std_active = theta_std[active_set]
    weights_active = weights[active_set]
    
    diag_std = weights_active / theta_std_active
    diag_nat = weights_active / theta_nat_active
    
    sigma_beta_std = np.diag(v=diag_std, k=0)
    sigma_beta_nat = np.diag(v=diag_std, k=0)
    
    main_mat_nat = X_active.T @ X_active + lamda * diag_nat
    main_mat_std = X_std_active.T @ X_std_active + lamda * diag_std
    
    main_mat_nat_inverse = linalg.inv(main_mat_nat)
    main_mat_std_inverse = linalg.inv(main_mat_std)
    
    beta_hat_nat_cov_mat = sigma_hat_nat * (main_mat_nat_inverse @ X_active.T @ X_active @ main_mat_nat_inverse)
    beta_hat_std_cov_mat = sigma_hat_std * (main_mat_std_inverse @ X_std_active.T @ X_std_active @ main_mat_std_inverse)
    
    conf_intervals_nat_upper_bound = theta_nat_active + 1.96 * np.sqrt(np.diag(beta_hat_nat_cov_mat))
    conf_intervals_nat_lower_bound = theta_nat_active - 1.96 * np.sqrt(np.diag(beta_hat_nat_cov_mat))

    conf_intervals_nat = np.column_stack((conf_intervals_nat_lower_bound, conf_intervals_nat_upper_bound))
    
    conf_intervals_std_upper_bound = theta_std_active + 1.96 * np.sqrt(np.diag(beta_hat_std_cov_mat))
    conf_intervals_std_lower_bound = theta_std_active - 1.96 * np.sqrt(np.diag(beta_hat_std_cov_mat))

    conf_intervals_std = np.column_stack((conf_intervals_std_lower_bound, conf_intervals_std_upper_bound))
    
    return {"beta_hat_nat_cov_mat": beta_hat_nat_cov_mat,
            "beta_hat_std_cov_mat": beta_hat_std_cov_mat,
            "conf_intervals_nat": conf_intervals_nat,
            "conf_intervals_std": conf_intervals_std
           }

In [61]:
res_conf = get_conf_intervals(lamda=[0.001], weights=res[2][0], theta_std=res[1][0][1][0], theta_nat=res[1][0][2][0], X=X, X_std=X_std, intercept=True, y=y, y_std= y_std)

In [59]:
np.column_stack((res_conf["conf_intervals_nat_upper_bound"],res_conf["conf_intervals_nat_lower_bound"]))

array([[ -1.34112874,  -1.5686733 ],
       [-14.33003271, -14.55628889],
       [  1.23201101,   1.00246603],
       [  1.03396625,   0.80937016],
       [  1.21558522,   0.98857741],
       [  4.13859844,   3.91018024],
       [  3.97906129,   3.75105304]])

In [62]:
res_conf["conf_intervals_nat"]

array([[ -1.5686733 ,  -1.34112874],
       [-14.55628889, -14.33003271],
       [  1.00246603,   1.23201101],
       [  0.80937016,   1.03396625],
       [  0.98857741,   1.21558522],
       [  3.91018024,   4.13859844],
       [  3.75105304,   3.97906129]])