In [1]:
import sys
sys.path.append("../tests/utils")

In [2]:
from utils import *
from gl0learn import synthetic

In [3]:
import random
from typing import Callable, Tuple

import numpy as np
import pytest
from gl0learn import fit, synthetic
from gl0learn.metrics import nonzeros
from hypothesis import given, settings, HealthCheck, assume, note
from hypothesis.strategies import just, booleans, floats, integers, random_module



@pytest.mark.parametrize('algorithm', ['CD', "CDPSI"])
@given(p=integers(3, 10),
       module=random_module(),
       lXs=random_penalty_values(penalty_strategies=random_penalty(l0=just(True), l1=booleans(), l2=booleans()),
                                 values_strategies={"l0": floats(0.01, 10),
                                                    "l1": floats(0.01, 10),
                                                    "l2": floats(0.01, 10)}))
@settings(max_examples=1000)
def test_super_active_set(algorithm, p, module, lXs):
    # note("p", p)
    # note("module", module)
    # note("lXs", lXs)
    theta_truth = overlap_covariance_matrix(p=p, seed=module.seed, decay=.8)
    x = sample_from_cov(n=30*p**2, cov=theta_truth)

    _, _, _, _, Y, _ = synthetic.preprocess(x, assume_centered=False, cholesky=True)

    test_result = fit(Y, **lXs, initial_active_set=np.inf, super_active_set=0., max_active_set_size=p**2)

    print('----->', test_result.active_set_size[-1])
    assume(test_result.active_set_size[-1] > 0)

    possible_active_set = np.where(np.abs(np.triu(test_result.theta, k=1)) > 0)

    possible_active_set = np.asarray(possible_active_set).T
    if possible_active_set.shape[0] > 1:
        idx = np.sort(np.random.choice(np.arange(test_result.active_set_size[-1]),
                                       size=np.random.randint(1, test_result.active_set_size[-1]),
                                       replace=False))
    else:
        idx = [0]

    initial_super_active_set = possible_active_set[idx, :]

    lXs['l0'] = 0

    theta_init = np.diag(np.diag(test_result.theta))
    for row, col in initial_super_active_set:
        theta_init[row, col] = theta_init[col, row] = test_result.theta[row, col]

    results = fit(Y, **lXs,
                  theta_init=theta_init,
                  initial_active_set=initial_super_active_set,
                  super_active_set=initial_super_active_set,
                  max_active_set_size=p**2)

    cd_indices = top_n_triu_indicies(results.theta, 1)

    np.testing.assert_array_equal(np.asarray(cd_indices).T, initial_super_active_set)

In [4]:
test_super_active_set(algorithm='CD')

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -3.66105
fit loop1
current_iter: 2 cur_objective = -3.66105
fit loop2
current_iter: 3 cur_objective = -4.76637
fit loop3
current_iter: 4 cur_objective = -4.80189
fit loop4
current_iter: 5 cur_objective = -4.80621
fit loop5
current_iter: 6 cur_objective = -4.80686
fit loop6
current_iter: 7 cur_objective = -4.80696
fit loop7
current_iter: 8 cur_objective = -4.80698
fit loop8
current_iter: 9 cur_objective = -4.80698
-----> 3
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -4.49653
fit loop1
current_iter: 2 cur_objective = -4.49653
larger_indices: 
          8
        23

smaller_indices: 
          8
        23

larger_indices: 
          8
        23

smaller_indices: 
          8
        23
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -3.66105
fit loop1
current_iter: 2 cur_objective = -3.66105
fit loop2
current_i



gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -3.66105
fit loop1
current_iter: 2 cur_objective = -3.66105
fit loop2
current_iter: 3 cur_objective = -4.76637
fit loop3
current_iter: 4 cur_objective = -4.80189
fit loop4
current_iter: 5 cur_objective = -4.80621
fit loop5
current_iter: 6 cur_objective = -4.80686
fit loop6
current_iter: 7 cur_objective = -4.80696
fit loop7
current_iter: 8 cur_objective = -4.80698
fit loop8
current_iter: 9 cur_objective = -4.80698
-----> 3
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -4.49653
fit loop1
current_iter: 2 cur_objective = -4.49653

larger_indices: 
          8
        23

smaller_indices: 
          8
        23

larger_indices: 
          8
        23

smaller_indices: 
          8
        23
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -3.66105
fit loop1
current_iter: 2 cur_objective = -3.66105
fit loop2
current_



MultipleFailures: Hypothesis found 2 distinct failures.

In [10]:
class RandomSeeder:
    def __init__(self, seed):
        self.seed = seed

    def __repr__(self):
        return f"RandomSeeder({self.seed!r})"

algorithm = 'CD'
p = 5
module = RandomSeeder(0)
lXs = {'l0': 0.01}

In [16]:
theta_truth = overlap_covariance_matrix(p=p, seed=module.seed, decay=.8)
x = sample_from_cov(n=30*p**2, cov=theta_truth, seed=module.seed)

_, _, _, _, Y, _ = synthetic.preprocess(x, assume_centered=False, cholesky=True)

test_result = fit(Y, **lXs, initial_active_set=np.inf, super_active_set=0., max_active_set_size=p**2)

print('----->', test_result.active_set_size[-1])
assume(test_result.active_set_size[-1] > 0)

possible_active_set = np.where(np.abs(np.triu(test_result.theta, k=1)) > 0)

possible_active_set = np.asarray(possible_active_set).T

active_set_size = test_result.active_set_size[-1]
if possible_active_set.shape[0] > 1:
    num_selected = np.random.randint(1, active_set_size)
    idx = np.sort(np.random.choice(np.arange(active_set_size),
                                   size=num_selected,
                                   replace=False))
else:
    num_selected = 1
    idx = [0]

initial_super_active_set = possible_active_set[idx, :]

lXs['l0'] = 0

theta_init = np.diag(np.diag(test_result.theta))
for row, col in initial_super_active_set:
    theta_init[row, col] = theta_init[col, row] = test_result.theta[row, col]

results = fit(Y, **lXs,
              theta_init=theta_init,
              initial_active_set=initial_super_active_set,
              super_active_set=initial_super_active_set,
              max_active_set_size=p**2)

cd_indices = top_n_triu_indicies(results.theta, num_selected)

np.testing.assert_array_equal(np.asarray(cd_indices).T, initial_super_active_set)

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -2.21557
fit loop1
current_iter: 2 cur_objective = -2.21557
fit loop2
current_iter: 3 cur_objective = -3.09564
fit loop3
current_iter: 4 cur_objective = -3.12897
fit loop4
current_iter: 5 cur_objective = -3.13278
fit loop5
current_iter: 6 cur_objective = -3.13336
fit loop6
current_iter: 7 cur_objective = -3.13346
fit loop7
current_iter: 8 cur_objective = -3.13347
fit loop8
current_iter: 9 cur_objective = -3.13347
-----> 10

larger_indices: 
          2
         4
         9
        19
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -3.11508
fit loop1
current_iter: 2 cur_objective = -3.11508
fit loop2
current_iter: 3 cur_objective = -3.11508

smaller_indices: 
          2
         4
         9
        19

larger_indices: 
          2
         4
         9
        19

smaller_indices: 
          2
         4
         9
        19




In [17]:
idx = np.sort(np.random.choice(np.arange(1),
                                   size=np.random.randint(1, 1),
                                   replace=False))

ValueError: low >= high

In [8]:
np.round(test_result.theta, 3)

array([[ 6.3  , -0.234, -0.409, -0.203,  2.557, -0.13 ],
       [-0.234,  5.758,  2.827,  0.209, -0.198,  0.135],
       [-0.409,  2.827,  5.839,  0.247, -0.229, -0.146],
       [-0.203,  0.209,  0.247,  6.24 , -0.119,  2.004],
       [ 2.557, -0.198, -0.229, -0.119,  6.294, -0.361],
       [-0.13 ,  0.135, -0.146,  2.004, -0.361,  5.966]])

In [9]:
np.round(results.theta, 3)

array([[ 5.24 ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ],
       [ 0.   ,  4.382,  0.   ,  0.   ,  0.   ,  0.   ],
       [ 0.   ,  0.   ,  4.422,  0.   , -0.027,  0.   ],
       [ 0.   ,  0.   ,  0.   ,  5.548,  0.   ,  0.   ],
       [ 0.   ,  0.   , -0.027,  0.   ,  5.238,  0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  5.289]])

In [13]:
initial_super_active_set

array([[0, 2],
       [0, 3],
       [0, 4],
       [1, 2],
       [1, 3],
       [1, 4],
       [2, 3],
       [2, 4],
       [3, 4]])

In [12]:
num_selected

1

In [29]:
initial_super_active_set[:, 0]*6 + initial_super_active_set[:, 1]

array([ 2,  3,  4,  5,  8, 10, 11, 15, 17, 22, 23])

In [33]:
check_is_coordinate_subset(initial_super_active_set, initial_super_active_set[:, :])

False

In [13]:
top_n_triu_indicies(results.theta, 1)

(array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
        3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5]),
 array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 5, 0, 1, 2, 3, 4,
        5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]))

In [16]:
t = np.copy(results.theta)

In [17]:
t = np.triu(t, k=1)

In [21]:
np.sort(np.abs(t).flatten())[::-1]

array([0.02653737, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        ])

In [15]:
top_n_triu_indicies

<function utils.top_n_triu_indicies(x, n)>

In [22]:
initial_super_active_set

array([[0, 2],
       [0, 3],
       [0, 4],
       [0, 5],
       [1, 2],
       [1, 4],
       [1, 5],
       [2, 3],
       [2, 5],
       [3, 4],
       [3, 5]])

In [23]:
initial_super_active_set

array([[0, 2],
       [0, 3],
       [0, 4],
       [0, 5],
       [1, 2],
       [1, 4],
       [1, 5],
       [2, 3],
       [2, 5],
       [3, 4],
       [3, 5]])

In [16]:
cd_indices

(array([1, 3]), array([2, 5]))