In [50]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [79]:
from group_lasso import GroupLasso
import pyglstudy as gl 
from pyglstudy.group_lasso import *
import numpy as np
import cvxpy as cp

In [363]:
def run_group_lasso(
    X, y, groups, group_sizes, lmda,
):
    n, _ = X.shape
    expanded_groups = np.repeat(np.arange(len(groups)), group_sizes)
    GroupLasso.LOG_LOSSES = True
    GroupLasso_out = GroupLasso(
        groups=expanded_groups,
        group_reg=lmda * 2,
        l1_reg=0,
        frobenius_lipschitz=True,
        scale_reg="group_size",
        subsampling_scheme=10,
        supress_warning=True,
        fit_intercept=False,
        n_iter=10000,
        tol=1e-7,
    )
    GroupLasso_out.fit(X, y)
    return GroupLasso_out.coef_[:, 0]


In [364]:
def run_cvxpy(X, y, groups, group_sizes, alpha, penalty, lmda):
    _, p = X.shape
    beta = cp.Variable(p)
    regul = 0
    for i in range(len(groups)):
        gb = groups[i]
        ge = gb + group_sizes[i]
        bi_norm2 = cp.norm2(beta[gb:ge])
        regul += penalty[i] * (alpha * bi_norm2 + (1-alpha) / 2 * cp.sum_squares(beta[gb:ge]))
    objective = cp.Minimize(0.5 * cp.sum_squares(y - X @ beta) + lmda * regul)
    prob = cp.Problem(objective)
    result = prob.solve()
    return beta.value 


In [365]:
def run_ours(
    X, y, groups, group_sizes, alpha, penalty, lmda=None,
    max_n_lambdas = 100,
    n_lambdas_iter = 5,
    use_strong_rule = True,
    do_early_exit = True,
    verbose_diagnostic = False,
    delta_strong_size = 1,
    max_strong_size = None,
    max_n_cds = 100000,
    thr = 1e-8,
    newton_tol = 1e-8,
    newton_max_iters = 100000,
    min_ratio = 1e-2,
    n_threads = 16,
):
    _, p = X.shape

    if max_strong_size is None:
        max_strong_size = p

    if lmda is None:
        user_lmdas = []
    else:
        user_lmdas = [lmda] 
    return gl.group_basil(
        X, y, groups, group_sizes, alpha, penalty, user_lmdas,
        max_n_lambdas,
        n_lambdas_iter,
        use_strong_rule,
        do_early_exit,
        verbose_diagnostic,
        delta_strong_size,
        max_strong_size,
        max_n_cds,
        thr,
        newton_tol,
        newton_max_iters,
        min_ratio,
        n_threads,
    )

In [366]:
n = 100
p = 100
n_groups = int(p/20)
seed = 0

np.random.seed(seed)
X, beta, y, groups, group_sizes = generate_group_lasso_data(
    n, p, n_groups, rho=0, svd_transform=True
).values()

In [367]:
alpha = 1.0
penalty = np.sqrt(group_sizes)

In [368]:
lmdas = np.array(run_ours(X, y, groups, group_sizes, alpha, penalty)['lmdas'])
lmdas

array([1.59877172, 1.52610508, 1.45674125, 1.39053011, 1.32732837,
       1.26699925, 1.20941218, 1.15444253, 1.10197133, 1.05188503,
       1.00407524, 0.95843847, 0.91487596, 0.87329344, 0.8336009 ,
       0.79571245, 0.75954609, 0.72502355, 0.69207011, 0.66061446,
       0.63058851, 0.60192729, 0.57456877, 0.54845373, 0.52352566,
       0.49973061, 0.47701708, 0.45533592, 0.4346402 , 0.41488513,
       0.39602796, 0.37802788, 0.36084593, 0.34444493, 0.32878937,
       0.31384539, 0.29958063, 0.28596423, 0.27296671, 0.26055995,
       0.2487171 , 0.23741253, 0.22662176, 0.21632145, 0.20648931,
       0.19710405, 0.18814537, 0.17959387, 0.17143105, 0.16363925,
       0.15620159, 0.14910199, 0.14232507, 0.13585618, 0.1296813 ,
       0.12378709, 0.11816077, 0.11279018, 0.1076637 , 0.10277022,
       0.09809915, 0.09364039, 0.08938429, 0.08532164, 0.08144364,
       0.0777419 , 0.07420841, 0.07083552, 0.06761594, 0.06454269,
       0.06160913, 0.0588089 , 0.05613594, 0.05358448, 0.05114

In [369]:
lmda = lmdas[len(lmdas) // 2] 
lmda

0.15620158914670107

In [370]:
group_lasso_beta = run_group_lasso(X, y, groups, group_sizes, lmda)
group_lasso_beta

array([-0., -0., -0.,  0., -0.,  0.,  0.,  0., -0.,  0., -0., -0., -0.,
        0.,  0., -0.,  0.,  0.,  0.,  0.,  0., -0.,  0., -0., -0., -0.,
        0.,  0., -0., -0.,  0., -0.,  0., -0.,  0., -0.,  0.,  0., -0.,
       -0., -0.,  0., -0., -0.,  0., -0.,  0.,  0.,  0.,  0.,  0., -0.,
       -0.,  0., -0., -0., -0., -0.,  0.,  0., -0.,  0., -0., -0.,  0.,
        0., -0., -0.,  0., -0.,  0., -0., -0., -0.,  0.,  0., -0., -0.,
       -0.,  0.,  0.,  0.,  0.,  0., -0.,  0., -0., -0.,  0., -0.,  0.,
       -0., -0.,  0.,  0., -0., -0.,  0.,  0.,  0.])

In [371]:
cvxpy_beta = run_cvxpy(X, y, groups, group_sizes, alpha, penalty, lmda)

In [372]:
ours_out = run_ours(X, y, groups, group_sizes, alpha, penalty, lmda)
ours_beta = np.array(ours_out['betas'].todense())[:, 0]

In [373]:
(
    gl.objective_data(X, y, groups, group_sizes, alpha, penalty, lmda, group_lasso_beta), 
    gl.objective_data(X, y, groups, group_sizes, alpha, penalty, lmda, cvxpy_beta),
    gl.objective_data(X, y, groups, group_sizes, alpha, penalty, lmda, ours_beta),
)

(41.5489517529887, 11.222484598649165, 11.222484731834104)