In [5]:
import adelie as ad
import numpy as np

from adelie.diagnostic import coefficient, predict
from adelie.solver import grpnet

In [6]:
n = 10000       # number of samples
p = 100         # number of features
n_h1 = p // 2   # number of features with signal
rho = 0.3       # equi-correlation
seed = 0        # random seed

np.random.seed(seed)
W = np.random.normal(0, 1, n)
Z = np.random.normal(0, 1, (n, p))
X = np.sqrt(rho) * W[:, None] + np.sqrt(1-rho) * Z
y = X[:, :n_h1] @ np.random.normal(0, 1, n_h1) + np.sqrt(n_h1) * np.random.normal(0, 1, n)
X = np.asfortranarray(X)

In [7]:
cv_res = ad.cv_grpnet(
    X=X,
    glm=ad.glm.gaussian(y),
    min_ratio=1e-3,
    seed=seed,
    intercept=True,
)

cv_res.preval_preds.shape

100%|[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m| 100/100 [00:00:00<00:00:00, 850.54it/s] [dev:44.0%]
100%|[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m| 100/100 [00:00:00<00:00:00, 959.19it/s] [dev:44.6%]
100%|[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m| 101/101 [00:00:00<00:00:00, 996.62it/s] [dev:44.3%] 
100%|[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m| 100/100 [00:00:00<00:00:00, 974.54it/s] [dev:44.8%]
100%|[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m[1;32m█[0m| 101/101 [00:00:00<00:00:00, 929.90it/s] [dev:45.1%]


(10000,)

In [8]:
manual_preval_preds = np.empty(n)
manual_preval_preds.fill(np.nan)

# same folds as what cv_grpnet used 
np.random.seed(seed)
order = np.random.choice(n, n, replace=False)
n_folds = cv_res.losses.shape[0]
fold_size = n // n_folds
remaining = n % n_folds

best_idx = cv_res.best_idx
lmdas = cv_res.lmdas

for fold in range(n_folds):
    begin = (fold_size + 1) * min(fold, remaining) + max(fold - remaining, 0) * fold_size
    size = fold_size + (fold < remaining)
    test_idx = order[begin:begin+size]
    train_idx = np.setdiff1d(order, test_idx)

    state = grpnet(
        X=X[train_idx],
        glm=ad.glm.gaussian(y[train_idx]),
        intercept=True,
        ddev_tol=0,
        progress_bar=False,
        lmda_path=lmdas
    )

    assert np.allclose(state.lmdas,state.lmda_path)
    beta_best, intercept_best = coefficient(
        lmda=lmdas[best_idx],
        betas=state.betas,
        intercepts=state.intercepts,
        lmdas=state.lmda_path,
    )
    manual_preval_preds[test_idx] = predict(X=X[test_idx], betas=beta_best, intercepts=intercept_best)

np.allclose(cv_res.preval_preds, manual_preval_preds)

True