In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from group_elnet import GroupElnet
import pygrpglmnet as gl 
from pygrpglmnet.group_elnet import *
import numpy as np
import cvxpy as cp

In [3]:
def run_group_elnet(
    X, y, groups, group_sizes, lmda,
):
    n, _ = X.shape
    expanded_groups = np.repeat(np.arange(len(groups)), group_sizes)
    GroupElnet.LOG_LOSSES = True
    GroupElnet_out = GroupElnet(
        groups=expanded_groups,
        group_reg=lmda * 2,
        l1_reg=0,
        frobenius_lipschitz=True,
        scale_reg="group_size",
        subsampling_scheme=10,
        supress_warning=True,
        fit_intercept=False,
        n_iter=10000,
        tol=1e-7,
    )
    GroupElnet_out.fit(X, y)
    return GroupElnet_out.coef_[:, 0]


In [4]:
def run_cvxpy(X, y, groups, group_sizes, alpha, penalty, lmda):
    _, p = X.shape
    beta = cp.Variable(p)
    regul = 0
    for i in range(len(groups)):
        gb = groups[i]
        ge = gb + group_sizes[i]
        bi_norm2 = cp.norm2(beta[gb:ge])
        regul += penalty[i] * (alpha * bi_norm2 + (1-alpha) / 2 * cp.sum_squares(beta[gb:ge]))
    objective = cp.Minimize(0.5 * cp.sum_squares(y - X @ beta) + lmda * regul)
    prob = cp.Problem(objective)
    result = prob.solve()
    return beta.value 


In [38]:
def run_ours(
    X, y, groups, group_sizes, alpha, penalty, 
    user_lmdas=[],
    max_n_lambdas = 100,
    n_lambdas_iter = 5,
    use_strong_rule = True,
    do_early_exit = True,
    verbose_diagnostic = False,
    delta_strong_size = 5,
    max_strong_size = None,
    max_n_cds = 100000,
    thr = 1e-7,
    newton_tol = 1e-8,
    newton_max_iters = 100000,
    min_ratio = 1e-2,
    n_threads = 16,
):
    _, p = X.shape

    if max_strong_size is None:
        max_strong_size = p

    return gl.group_basil(
        X, y, groups, group_sizes, alpha, penalty, user_lmdas,
        max_n_lambdas,
        n_lambdas_iter,
        use_strong_rule,
        do_early_exit,
        verbose_diagnostic,
        delta_strong_size,
        max_strong_size,
        max_n_cds,
        thr,
        newton_tol,
        newton_max_iters,
        min_ratio,
        n_threads,
    )

In [39]:
n = 100
p = 100000
n_groups = int(p/100)
seed = 0

np.random.seed(seed)
X, beta, y, groups, group_sizes = generate_group_elnet_data(
    n, p, n_groups, rho=0, svd_transform=True
).values()

In [40]:
alpha = 1.0
penalty = np.sqrt(group_sizes)

In [41]:
ours_first_out = run_ours(X, y, groups, group_sizes, alpha, penalty)
lmdas = np.array(ours_first_out['lmdas'])
lmdas

array([67.24599854, 64.18956412, 61.27204936, 58.48714016, 55.82880937,
       53.29130382, 50.86913182, 48.55705127, 46.35005835, 44.24337666,
       42.23244691, 40.31291702, 38.48063272, 36.73162858, 35.06211937,
       33.46849193, 31.94729732, 30.49524335, 29.10918748, 27.78612998,
       26.52320749, 25.31768678, 24.16695884, 23.06853326, 22.02003282,
       21.01918835, 20.0638338 , 19.15190158, 18.2814181 , 17.45049943,
       16.6573473 , 15.90024516, 15.17755448, 14.4877112 , 13.82922237,
       13.20066287, 12.60067237, 12.02795236, 11.48126336, 10.95942222,
       10.46129957,  9.98581736,  9.53194655,  9.09870487,  8.68515469,
        8.290401  ,  7.91358948,  7.55390463,  7.210568  ,  6.88283656,
        6.57000102,  6.27138433,  5.98634023,  5.71425183,  5.45453027,
        5.20661345,  4.96996483,  4.74407226,  4.52844685,  4.32262195,
        4.12615212,  3.93861213,  3.75959613,  3.58871669,  3.42560397,
        3.26990499,  3.12128276,  2.97941564,  2.8439966 ,  2.71

In [46]:
lmda_end = int(len(lmdas) * 0.8) + 1
lmda = lmdas[lmda_end-1] 
lmda

1.6274394175294986

In [47]:
group_elnet_beta = run_group_elnet(X, y, groups, group_sizes, lmda)

In [48]:
cvxpy_beta = run_cvxpy(X, y, groups, group_sizes, alpha, penalty, lmda)



In [49]:
ours_out = run_ours(X, y, groups, group_sizes, alpha, penalty, user_lmdas=lmdas[:lmda_end])

In [52]:
ours_beta = np.array(ours_out['betas'].todense())[:, -1]

In [53]:
(
    gl.group_elnet_objective(X, y, groups, group_sizes, alpha, penalty, lmda, group_elnet_beta), 
    gl.group_elnet_objective(X, y, groups, group_sizes, alpha, penalty, lmda, cvxpy_beta),
    gl.group_elnet_objective(X, y, groups, group_sizes, alpha, penalty, lmda, ours_beta),
)

(64827.17973922805, 4473.984773094584, 4473.977597707031)