In [1]:
import numpy as np
from gl0learn import fit, opt, synthetic

In [2]:
def _sample_data(n: int = 1000, seed: int = 0):
    """


    Example Data!

    >>>from tabulate import tabulate
    ...import numpy as np
    ...coords = np.array([str(t).replace('(','').replace(')','') for t in zip(*np.nonzero(np.ones([5,5])))]).reshape(5,5)
    ...table = tabulate(coords, tablefmt="fancy_grid")
    ...print(table)
    ╒══════╤══════╤══════╤══════╤══════╕
    │ 0, 0 │ 0, 1 │ 0, 2 │ 0, 3 │ 0, 4 │
    ├──────┼──────┼──────┼──────┼──────┤
    │ 1, 0 │ 1, 1 │ 1, 2 │ 1, 3 │ 1, 4 │
    ├──────┼──────┼──────┼──────┼──────┤
    │ 2, 0 │ 2, 1 │ 2, 2 │ 2, 3 │ 2, 4 │
    ├──────┼──────┼──────┼──────┼──────┤
    │ 3, 0 │ 3, 1 │ 3, 2 │ 3, 3 │ 3, 4 │
    ├──────┼──────┼──────┼──────┼──────┤
    │ 4, 0 │ 4, 1 │ 4, 2 │ 4, 3 │ 4, 4 │
    ╘══════╧══════╧══════╧══════╧══════╛

    Suppose:
        Coordinates (0,1) and (1,2) are the initial support
        Coordinates (0,2) and (1,3) are also in the active set
        Coordinates (0,3) and (1,4) are also in the super active set

    Supplying `theta_truth` as a upper triangular diagonally dominate matrix, we can set which of `theta_hat` should be learned first.

    This allows us to check if fit is behaving as expected!
    """
    N = 5
    mu = np.zeros(N)

    theta_truth_tril = (1/8)*np.asarray([[8, 0, 0, 0, 1],
                                         [0, 8, 4, 2, 3],
                                         [0, 0, 8, 6, 5],
                                         [0, 0, 0, 8, 7],
                                         [0, 0, 0, 0, 8]])

    theta_truth = (theta_truth_tril + theta_truth_tril.T)/2

    rng = np.random.default_rng(seed)
    x = rng.multivariate_normal(mu, cov=np.linalg.inv(theta_truth), size=n)

    return theta_truth, x

In [3]:
theta_truth, x = _sample_data()

In [4]:
theta_truth

array([[1.    , 0.    , 0.    , 0.    , 0.0625],
       [0.    , 1.    , 0.25  , 0.125 , 0.1875],
       [0.    , 0.25  , 1.    , 0.375 , 0.3125],
       [0.    , 0.125 , 0.375 , 1.    , 0.4375],
       [0.0625, 0.1875, 0.3125, 0.4375, 1.    ]])

In [5]:
_, _, _,_,Y,_ = synthetic.preprocess(x, assume_centered = False, cholesky=True)
default_theta_init = np.eye(5)
M = np.max(np.abs(theta_truth*(1-default_theta_init)))

# Fit with mosek to find optimal soultion!

In [6]:
fit_mosek = opt.MIO_mosek(Y, l0=0.03, l2=0.1, M=M)

Since mosek is an Mixed Integer Optimization tool, it often returns values very close to integers, but not exactly.

In [7]:
fit_mosek.theta_hat

array([[ 9.78421211e-01, -1.61876031e-07,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [-1.61876031e-07,  1.06405858e+00,  2.64101050e-01,
         1.17383104e-01,  1.47151643e-01],
       [ 0.00000000e+00,  2.64101050e-01,  9.59100877e-01,
         2.97619745e-01,  2.86025672e-01],
       [ 0.00000000e+00,  1.17383104e-01,  2.97619745e-01,
         9.37329207e-01,  4.37499979e-01],
       [ 0.00000000e+00,  1.47151643e-01,  2.86025672e-01,
         4.37499979e-01,  1.01872561e+00]])

In [8]:
fit_mosek.theta_hat[np.abs(fit_mosek.theta_hat) < 1e-6] = 0

In [9]:
fit_mosek.theta_hat

array([[0.97842121, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 1.06405858, 0.26410105, 0.1173831 , 0.14715164],
       [0.        , 0.26410105, 0.95910088, 0.29761974, 0.28602567],
       [0.        , 0.1173831 , 0.29761974, 0.93732921, 0.43749998],
       [0.        , 0.14715164, 0.28602567, 0.43749998, 1.01872561]])

In [93]:
np.matrix.argpartition(fit_mosek.theta_hat, 3, axis=None)

array([15, 16,  1,  9, 10, 20, 11, 21,  8,  7, 12, 23,  5, 13, 14,  4,  3,
       17, 18, 19,  2, 22,  0,  6, 24])

In [174]:
def top_n_triu_indicies(x, n):
    p = x.shape[1]
    x = np.copy(x)
    x[np.tril_indices(p, k=0)] = 0
    value = np.sort(x.flatten())[::-1][n-1]
    
    return np.where(x >= value)

In [175]:
test[np.tril_indices(5, k=0)] = 0

In [176]:
fit_mosek.theta_hat

array([[0.97842551, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.98844736, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.90512582, 0.28451871, 0.26571506],
       [0.        , 0.        , 0.28451871, 0.9402875 , 0.43749997],
       [0.        , 0.        , 0.26571506, 0.43749997, 1.01442219]])

In [180]:
np.testing.assert_array_equal

<function numpy.testing._private.utils.assert_array_equal(x, y, err_msg='', verbose=True)>

In [178]:
top_n_triu_indicies(fit_mosek.theta_hat, 3)

[[0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.28451871 0.26571506]
 [0.         0.         0.         0.         0.43749997]
 [0.         0.         0.         0.         0.        ]]
[0.43749997 0.28451871 0.26571506 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]


TypeError: unhashable type: 'numpy.ndarray'

# Finding a similar solution with gL0Learn

In [10]:
# TODO: Why are we seeing this slight bias on CD l0?
fit_cd = fit(Y, l0=0.0285, l2=0.1, scale_x=False, max_active_set_size=10, initial_active_set=0., super_active_set=0.)
fit_cd.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.16347
fit loop1
current_iter: 2 cur_objective = 5.14856
fit loop2
current_iter: 3 cur_objective = 5.14692
fit loop3
current_iter: 4 cur_objective = 5.14662
fit loop4
current_iter: 5 cur_objective = 5.14367
fit loop5
current_iter: 6 cur_objective = 5.14326
fit loop6
current_iter: 7 cur_objective = 5.14319
fit loop7
current_iter: 8 cur_objective = 5.14317
fit loop8
current_iter: 9 cur_objective = 5.14317


array([[ 0.97842588, -0.        ,  0.        , -0.        ,  0.        ],
       [-0.        ,  1.0644279 ,  0.26460968,  0.11852766,  0.14812423],
       [ 0.        ,  0.26460968,  0.96123445,  0.30073056,  0.28921988],
       [-0.        ,  0.11852766,  0.30073056,  0.94812956,  0.45365061],
       [ 0.        ,  0.14812423,  0.28921988,  0.45365061,  1.03049072]])

In this case, we see very similar solutions between MOSEK and gL0Learn. However, with certain parameters this is mnot always the case.
For example, when we are looking for a much sparser solution. We start seeing peculiarities with gL0Learn's CD iteraiton order.

Using `scipy`'s biscet method, we can quickly find solutions with close to a specified number of non-zeros!

In [11]:
from scipy.optimize import bisect

In [12]:
def make_bisect_paritial_func(desired_nnz: int, Y: np.ndarray, fixed_l2: float, M, tol=1e-6, verbose: bool = True):

    def inner_bisect(l0):
        fit_mosek = opt.MIO_mosek(Y, l0=l0, l2=fixed_l2, M=M)
        theta_hat = fit_mosek.theta_hat
        theta_hat[np.abs(fit_mosek.theta_hat) < tol] = 0
        np.fill_diagonal(theta_hat, 0)

        nnz = np.count_nonzero(theta_hat)//2
        cost = desired_nnz - nnz
        if verbose:
            print(f"MOSEK found solution with {nnz} non-zeros with parameters:")
            print(f"\t l0 = {l0})")
            print(f"\t cost = {cost}")
        return cost

    return inner_bisect



In [13]:
fixed_l2 = 0.01
f = make_bisect_paritial_func(3, Y, fixed_l2=fixed_l2, M=M, tol=1e-6)
opt_l0 = bisect(f, a=0, b=10)

MOSEK found solution with 10 non-zeros with parameters:
	 l0 = 0.0)
	 cost = -7
MOSEK found solution with 0 non-zeros with parameters:
	 l0 = 10.0)
	 cost = 3
MOSEK found solution with 0 non-zeros with parameters:
	 l0 = 5.0)
	 cost = 3
MOSEK found solution with 0 non-zeros with parameters:
	 l0 = 2.5)
	 cost = 3
MOSEK found solution with 0 non-zeros with parameters:
	 l0 = 1.25)
	 cost = 3
MOSEK found solution with 0 non-zeros with parameters:
	 l0 = 0.625)
	 cost = 3
MOSEK found solution with 1 non-zeros with parameters:
	 l0 = 0.3125)
	 cost = 2
MOSEK found solution with 1 non-zeros with parameters:
	 l0 = 0.15625)
	 cost = 2
MOSEK found solution with 4 non-zeros with parameters:
	 l0 = 0.078125)
	 cost = -1
MOSEK found solution with 4 non-zeros with parameters:
	 l0 = 0.1171875)
	 cost = -1
MOSEK found solution with 3 non-zeros with parameters:
	 l0 = 0.13671875)
	 cost = 0


In [14]:
fit_mosek = opt.MIO_mosek(Y, l0=opt_l0, l2=fixed_l2, M=M)
fit_mosek.theta_hat[np.abs(fit_mosek.theta_hat) < 1e-6] = 0
fit_mosek.theta_hat

array([[0.97842551, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.98844736, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.90512582, 0.28451871, 0.26571506],
       [0.        , 0.        , 0.28451871, 0.9402875 , 0.43749997],
       [0.        , 0.        , 0.26571506, 0.43749997, 1.01442219]])

gL0Learn again matches!

Since `max_active_set_size` is large enough to allow CD to iterate and change values!

In [15]:
# TODO: Why are we seeing this slight bias on CD l0?
fit_cd = fit(Y, l0=opt_l0, l2=fixed_l2, scale_x=False, max_active_set_size=10, initial_active_set=0., super_active_set=0.)
fit_cd.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.58232
fit loop1
current_iter: 2 cur_objective = 5.57984
fit loop2
current_iter: 3 cur_objective = 5.57961
fit loop3
current_iter: 4 cur_objective = 5.57959
fit loop4
current_iter: 5 cur_objective = 5.57959


array([[ 0.97842588, -0.        ,  0.        , -0.        ,  0.        ],
       [-0.        ,  0.98844773,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.80047801,  0.        ,  0.        ],
       [-0.        ,  0.        ,  0.        ,  0.8677137 ,  0.37966987],
       [ 0.        ,  0.        ,  0.        ,  0.37966987,  0.95505299]])

However, if we limit `gL0Learn`'s CD algorithim to just find 2 NNZs (which MOSEK is unable to do!), we find the solution is surprisingly different! However, look at the value of theta (3, 4) it is very different than the perviosuly learned theta (3,4)

In [16]:
# TODO: Why are we seeing this slight bias on CD l0?
fit_cd = fit(Y, l0=opt_l0, l2=fixed_l2, scale_x=False, max_active_set_size=3, initial_active_set=np.inf, super_active_set=0.)
fit_cd.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
fit loop2
current_iter: 3 cur_objective = 5.58894
fit loop3
current_iter: 4 cur_objective = 5.5803
fit loop4
current_iter: 5 cur_objective = 5.57965
fit loop5
current_iter: 6 cur_objective = 5.57959
fit loop6
current_iter: 7 cur_objective = 5.57959


array([[0.97842588, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.98844773, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.80047801, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.86720233, 0.37891446],
       [0.        , 0.        , 0.        , 0.37891446, 0.95449015]])

In [17]:
fit_cd.active_set_size

[0, 0, 1, 1, 1, 1, 1]

# Why do we only see 1 NNZ?

Since the `max_active_set_size` is set to 3. The CD algorithim iterates and brings in (3,4) first. Which it should as (3,4) is the largest item in theta_star.
However, now. No other item is worth adding as it first optimized (3,4) in isolation.

If instead, we start it with the l0 penalty to 0 to allow CD to learn over a contious loss function. It still doesn't finds the correct solution! This is becuase of the order at which gL0Learn iterats! Row by Row left to right downwards!

In [18]:
# TODO: Why are we seeing this slight bias on CD l0?
fit_cd = fit(Y, l0=0, l2=0.01, scale_x=False, max_active_set_size=3, initial_active_set=np.inf, super_active_set=0., algorithm='CD')
fit_cd.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
this->params.max_active_set_size = 3
this->active_set.size() = 0
n_to_keep = 3
fit loop2
current_iter: 3 cur_objective = 5.20801
fit loop3
current_iter: 4 cur_objective = 5.18829
fit loop4
current_iter: 5 cur_objective = 5.18635
fit loop5
current_iter: 6 cur_objective = 5.18615
fit loop6
current_iter: 7 cur_objective = 5.18613
fit loop7
current_iter: 8 cur_objective = 5.18613
this->params.max_active_set_size = 3
this->active_set.size() = 3
n_to_keep = 0


array([[0.97842588, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 1.05102699, 0.23079159, 0.        , 0.        ],
       [0.        , 0.23079159, 0.90862251, 0.22450104, 0.        ],
       [0.        , 0.        , 0.22450104, 0.90546316, 0.40966985],
       [0.        , 0.        , 0.        , 0.40966985, 0.97783669]])

In [19]:
# TODO: Why are we seeing this slight bias on CD l0?
fit_cd = fit(Y, l0=0.15, l2=0.01, scale_x=False, max_active_set_size=5, initial_active_set=np.inf, super_active_set=0., algorithm='CDPSI')
fit_cd.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fitpsi called 
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
fit loop2
current_iter: 3 cur_objective = 5.60222
fit loop3
current_iter: 4 cur_objective = 5.59358
fit loop4
current_iter: 5 cur_objective = 5.59293
fit loop5
current_iter: 6 cur_objective = 5.59287
fit loop6
current_iter: 7 cur_objective = 5.59287
Pre psi cost: 5.59287 
PSI iter: 0 
PSI iter: 0 Swapping row: 0
psi_row_fit row =  0 
selected super_active_set start =  {0, 1} 
selected super_active_set end =  {1, 2} 
PSI iter: 0 Swapping row: 1
psi_row_fit row =  1 
selected super_active_set start =  {1, 2} 
selected super_active_set end =  {2, 3} 
PSI iter: 0 Swapping row: 2
psi_row_fit row =  2 
selected super_active_set start =  {2, 3} 
selected super_active_set end =  {3, 4} 
PSI iter: 0 Swapping row: 3
psi_row_fit row =  3 
selected super_active_set start =  {3, 4} 
selected super_active_set end =  {103824001895872, 13} 


array([[0.97842588, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.98844773, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.80047801, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.86720233, 0.37891446],
       [0.        , 0.        , 0.        , 0.37891446, 0.95449015]])

In [20]:
# If we specify the starting inital_active_set, then it works!

In [51]:
def make_bisect_func(desired_nnz: int, Y: np.ndarray, verbose: bool = True, **kwargs):

    def inner_bisect(l0):
        fit_gl0learn = fit(Y, l0=l0, **kwargs)
        theta_hat = fit_gl0learn.theta
        np.fill_diagonal(theta_hat, 0)

        nnz = np.count_nonzero(theta_hat)//2
        cost = desired_nnz - nnz
        if verbose:
            print(f"gl0Learn found solution with {nnz} non-zeros with parameters:")
            print(f"\t l0 = {l0})")
            print(f"\t cost = {cost}")
        return cost

    return inner_bisect

In [234]:
fixed_l2 = 0.01
f = make_bisect_func(2, Y, l2=0.01, scale_x=False, max_active_set_size=10, initial_active_set=np.inf, super_active_set=0., algorithm='CD')
opt_l0, r = bisect(f, a=0, b=10, full_output=True)

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
fit loop2
current_iter: 3 cur_objective = 5.02138
fit loop3
current_iter: 4 cur_objective = 4.9325
fit loop4
current_iter: 5 cur_objective = 4.91523
fit loop5
current_iter: 6 cur_objective = 4.91158
fit loop6
current_iter: 7 cur_objective = 4.91073
fit loop7
current_iter: 8 cur_objective = 4.91051
fit loop8
current_iter: 9 cur_objective = 4.91045
fit loop9
current_iter: 10 cur_objective = 4.91044
fit loop10
current_iter: 11 cur_objective = 4.91044
gl0Learn found solution with 10 non-zeros with parameters:
	 l0 = 0.0)
	 cost = -8
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
gl0Learn found solution with 0 non-zeros with parameters:
	 l0 = 10.0)
	 cost = 2
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective

In [235]:
r

      converged: True
           flag: 'converged'
 function_calls: 45
     iterations: 43
           root: 0.13244885148765206

In [186]:
# TODO: Why are we seeing this slight bias on CD l0?
fit_cd = fit(Y, l0=opt_l0, l2=fixed_l2, scale_x=False, max_active_set_size=10, initial_active_set=np.inf, super_active_set=0., algorithm='CDPSI')
fit_cd.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fitpsi called 
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
fit loop2
current_iter: 3 cur_objective = 5.07144
fit loop3
current_iter: 4 cur_objective = 4.9977
fit loop4
current_iter: 5 cur_objective = 4.98545
fit loop5
current_iter: 6 cur_objective = 4.9831
fit loop6
current_iter: 7 cur_objective = 4.9826
fit loop7
current_iter: 8 cur_objective = 4.98251
fit loop8
current_iter: 9 cur_objective = 4.98249
fit loop9
current_iter: 10 cur_objective = 4.98248
fit loop10
current_iter: 11 cur_objective = 4.9506
fit loop11
current_iter: 12 cur_objective = 4.95022
fit loop12
current_iter: 13 cur_objective = 4.95016
fit loop13
current_iter: 14 cur_objective = 4.95014
fit loop14
current_iter: 15 cur_objective = 4.95014
Pre psi cost: 4.95014 
PSI iter: 0 
PSI iter: 0 Swapping row: 0
psi_row_fit row =  0 
selected super_active_set start =  {0, 1} 
selected super_active_set end =  {1, 2} 
zero_indic

array([[ 0.98346772,  0.        ,  0.        , -0.        ,  0.06323082],
       [ 0.        ,  1.07533387,  0.28315539,  0.13221527,  0.16325069],
       [ 0.        ,  0.28315539,  0.98365381,  0.32496826,  0.31540576],
       [-0.        ,  0.13221527,  0.32496826,  0.97484845,  0.48427529],
       [ 0.06323082,  0.16325069,  0.31540576,  0.48427529,  1.06499503]])

In [225]:
theta_truth, x = _sample_data()

from scipy.optimize import bisect

fixed_l2 = 0.01
_, _, _, _, Y, _ = synthetic.preprocess(x, assume_centered=False, cholesky=True)
f = make_bisect_func(8, Y, l2=fixed_l2, scale_x=False, max_active_set_size=10, initial_active_set=np.inf,
                     super_active_set=0.)
opt_l0 = bisect(f, a=0, b=10)

results = fit(Y, l0=opt_l0, l2=fixed_l2, scale_x=False, max_active_set_size=10, initial_active_set=np.inf,
              super_active_set=0.)
theta = results.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
fit loop2
current_iter: 3 cur_objective = 5.03236
fit loop3
current_iter: 4 cur_objective = 4.93107
fit loop4
current_iter: 5 cur_objective = 4.91545
fit loop5
current_iter: 6 cur_objective = 4.9115
fit loop6
current_iter: 7 cur_objective = 4.91068
fit loop7
current_iter: 8 cur_objective = 4.9105
fit loop8
current_iter: 9 cur_objective = 4.91045
fit loop9
current_iter: 10 cur_objective = 4.91044
fit loop10
current_iter: 11 cur_objective = 4.91044
gl0Learn found solution with 10 non-zeros with parameters:
	 l0 = 0.0)
	 cost = -2
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82604
fit loop1
current_iter: 2 cur_objective = 5.82604
gl0Learn found solution with 0 non-zeros with parameters:
	 l0 = 10.0)
	 cost = 8
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective 

In [226]:
theta

array([[ 0.98708951,  0.        ,  0.        , -0.04409201,  0.05227623],
       [ 0.        ,  1.07535383,  0.28317347,  0.13260801,  0.16326327],
       [ 0.        ,  0.28317347,  0.98345263,  0.32467686,  0.31504568],
       [-0.04409201,  0.13260801,  0.32467686,  0.97761037,  0.48311292],
       [ 0.05227623,  0.16326327,  0.31504568,  0.48311292,  1.06277992]])

In [227]:
theta_truth

array([[1.    , 0.    , 0.    , 0.    , 0.0625],
       [0.    , 1.    , 0.25  , 0.125 , 0.1875],
       [0.    , 0.25  , 1.    , 0.375 , 0.3125],
       [0.    , 0.125 , 0.375 , 1.    , 0.4375],
       [0.0625, 0.1875, 0.3125, 0.4375, 1.    ]])

In [224]:
theta[np.tril_indices(5, k=-1)]

array([ 0.        ,  0.        ,  0.28316425, -0.        ,  0.13230798,
        0.32488579,  0.06323236,  0.16328137,  0.31544008,  0.48438003])

# What if we rearamnge theta so that CD algorithims hit the proper values first? Does it find the right values?

In [22]:
theta_truth_tril = (1/8)*np.asarray([[8, 0, 0, 0, 1],
                                     [0, 8, 4, 2, 3],
                                     [0, 0, 8, 6, 5],
                                     [0, 0, 0, 8, 7],
                                     [0, 0, 0, 0, 8]])


In [23]:
theta_truth_tril[::-1,::-1].T

array([[1.   , 0.875, 0.625, 0.375, 0.125],
       [0.   , 1.   , 0.75 , 0.25 , 0.   ],
       [0.   , 0.   , 1.   , 0.5  , 0.   ],
       [0.   , 0.   , 0.   , 1.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 1.   ]])

In [24]:
N = 5
mu2 = np.zeros(N)

theta_truth_tril = (1/8)*np.asarray([[8, 0, 0, 0, 1],
                                     [0, 8, 4, 2, 3],
                                     [0, 0, 8, 6, 5],
                                     [0, 0, 0, 8, 7],
                                     [0, 0, 0, 0, 8]])

theta_truth_tril = theta_truth_tril[::-1,::-1].T

theta_truth2 = (theta_truth_tril + theta_truth_tril.T)/2

rng = np.random.default_rng(1)
x2 = rng.multivariate_normal(mu2, cov=np.linalg.inv(theta_truth2), size=1000)

In [25]:
_, _, _,_,Y2,_ = synthetic.preprocess(x2, assume_centered = False, cholesky=True)
default_theta_init2 = np.eye(N)
M2 = np.max(np.abs(theta_truth*(1-default_theta_init)))

In [49]:
# TODO: Why are we seeing this slight bias on CD l0?
fit_cd = fit(Y2, l0=0, l2=fixed_l2, scale_x=False, max_active_set_size=3, initial_active_set=np.inf, super_active_set=0., algorithm="CDPSI")
fit_cd.theta

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fitpsi called 
fit 1
fit loop0
current_iter: 1 cur_objective = 5.82135
fit loop1
current_iter: 2 cur_objective = 5.82135
this->params.max_active_set_size = 3
this->active_set.size() = 0
n_to_keep = 3
fit loop2
current_iter: 3 cur_objective = 5.3114
fit loop3
current_iter: 4 cur_objective = 5.29768
fit loop4
current_iter: 5 cur_objective = 5.29669
fit loop5
current_iter: 6 cur_objective = 5.29661
fit loop6
current_iter: 7 cur_objective = 5.2966
fit loop7
current_iter: 8 cur_objective = 5.2966
this->params.max_active_set_size = 3
this->active_set.size() = 3
n_to_keep = 0
Pre psi cost: 5.2966 
PSI iter: 0 
PSI iter: 0 Swapping row: 0
psi_row_fit row =  0 
selected super_active_set start =  {0, 1} 
selected super_active_set end =  {1, 2} 
zero_indices =          2
        3
        4
 
non_zero_indices =          1
 
Non Zero Index Loop: (0, 1) 
No swap for (0, 1) 
PSI iter: 0 Swapping row: 1
psi_row_fit row =  1 
selected super_active_set start = 

array([[0.88394604, 0.33252268, 0.        , 0.        , 0.        ],
       [0.33252268, 0.90349329, 0.28146035, 0.        , 0.        ],
       [0.        , 0.28146035, 0.98315176, 0.16799348, 0.        ],
       [0.        , 0.        , 0.16799348, 1.00246288, 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.94224434]])

In [50]:
theta_truth2

array([[1.    , 0.4375, 0.3125, 0.1875, 0.0625],
       [0.4375, 1.    , 0.375 , 0.125 , 0.    ],
       [0.3125, 0.375 , 1.    , 0.25  , 0.    ],
       [0.1875, 0.125 , 0.25  , 1.    , 0.    ],
       [0.0625, 0.    , 0.    , 0.    , 1.    ]])

In [27]:
results = fit(x, l0=0, scale_x=True, max_active_set_size=2, initial_active_set=np.inf, super_active_set=0.)

theta_truth_copy = np.copy(theta_truth)
np.fill_diagonal(theta_truth_copy, 0)
i0, j0 = np.unravel_index(np.argmax(theta_truth_copy), theta_truth_copy.shape)

gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = 5.83348
fit loop1
current_iter: 2 cur_objective = 5.83348
this->params.max_active_set_size = 2
this->active_set.size() = 0
n_to_keep = 2
fit loop2
current_iter: 3 cur_objective = 5.34914
fit loop3
current_iter: 4 cur_objective = 5.34021
fit loop4
current_iter: 5 cur_objective = 5.33955
fit loop5
current_iter: 6 cur_objective = 5.3395
fit loop6
current_iter: 7 cur_objective = 5.33949
this->params.max_active_set_size = 2
this->active_set.size() = 2
n_to_keep = 0


In [28]:
results.theta

array([[0.97754908, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 1.04489167, 0.21993293, 0.        , 0.        ],
       [0.        , 0.21993293, 0.84492731, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.86428477, 0.37798077],
       [0.        , 0.        , 0.        , 0.37798077, 0.95401447]])

In [29]:
theta_truth

array([[1.    , 0.    , 0.    , 0.    , 0.0625],
       [0.    , 1.    , 0.25  , 0.125 , 0.1875],
       [0.    , 0.25  , 1.    , 0.375 , 0.3125],
       [0.    , 0.125 , 0.375 , 1.    , 0.4375],
       [0.0625, 0.1875, 0.3125, 0.4375, 1.    ]])

In [30]:
results.theta[i0, j0] > results.theta[i1, j1]

NameError: name 'i1' is not defined

In [None]:
results.theta[i1, j1]

In [None]:
np.mean(theta_truth_copy)

In [None]:
results = fit(x, l0=0, scale_x=True, max_active_set_size=1, initial_active_set=np.inf, super_active_set=0.)

In [None]:
results.active_set_size

In [None]:
results.theta

In [None]:
theta_init = np.diag(np.diag(theta_truth))
theta_init[0, 1] = theta_init[1, 0] = theta_truth[0, 1]
theta_init[1, 2] = theta_init[2, 1] = theta_truth[1, 2]

initial_active_set = np.asarray([[0, 1], [0, 2], [1, 2], [1, 3]])
initial_super_active_set = np.asarray([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [1, 4]])

results = fit(x, l0=0.01, scale_x=True, theta_init=theta_init, initial_active_set=initial_active_set,
              super_active_set=initial_super_active_set, max_active_set_size=5)

In [None]:
theta_truth

In [None]:
results.theta

In [None]:
def insert_values_by_coords(x, coords, values):
    x = np.copy(x)
    x[coords[:, 0], coords[:, 1]] = values
    return x

In [None]:
spy_theta = np.eye(5)
spy_theta = insert_values_by_coords(spy_theta, initial_super_active_set, 1)

In [None]:
spy_theta

In [None]:
results.theta

In [None]:
theta_truth

In [None]:
_, _, _,_,Y,_ = synthetic.preprocess(x, assume_centered = False, cholesky=True)
M = np.max(np.abs(theta_truth*(1-np.eye(5))))

In [None]:
x

In [None]:
fit_mosek = opt.MIO_mosek(Y, l0=0.2, l2=0, M=M)

In [None]:
fit_mosek.theta_hat[np.abs(fit_mosek.theta_hat) < 1e-6] = 0

In [None]:
fit_mosek.theta_hat

In [None]:
from scipy.optimize import minimize_scalar, bisect

In [None]:
def make_func(target_non_zeros, Y, l2, M, tol=1e-6):
    def f(l0):
        fit_mosek = opt.MIO_mosek(Y, l0=l0, l2=0, M=M)
        theta_hat = fit_mosek.theta_hat
        theta_hat[np.abs(fit_mosek.theta_hat) < 1e-6] = 0
        np.fill_diagonal(theta_hat, 0)
        print(f"l0 = {l0}, num_non_zero= {np.count_nonzero(theta_hat)//2}, cost={(target_non_zeros - np.count_nonzero(theta_hat)//2)**2}")
        return target_non_zeros - np.count_nonzero(theta_hat)//2

    return f



In [None]:
f = make_func(2, Y, l2=0.01, M=M, tol=1e-6)

In [None]:
bisect(f, a=0, b=10)

In [None]:
fit_mosek = opt.MIO_mosek(Y, l0=0.15, l2=0.01, M=M)

In [None]:
fit_mosek.theta_hat[np.abs(fit_mosek.theta_hat) < 1e-6] = 0
fit_mosek.theta_hat

In [None]:
results = fit(Y, l0=0, l2=0.01, scale_x=False, max_active_set_size=3, initial_active_set=np.inf, super_active_set=0.)

In [None]:
results.active_set_size

In [None]:
results.theta

In [None]:
fit_mosek.theta_hat

In [None]:
from gl0learn.metrics import nonzeros

def pseudo_likelihood_loss_(y, theta, l0=0, l1=0, l2=0,  abs_tol: float = 1e-6):
    loss = (-np.log(np.diag(theta)) + np.linalg.norm(y @ theta, axis=0) / np.diag(theta)).sum()

    upper_triu = np.triu_indices(theta.shape[0], k=1)
    theta_upper = theta[upper_triu]

    loss += l0/2 * nonzeros(theta_upper, abs_tol=abs_tol).sum() + l1/2 * np.abs(theta_upper).sum() + l2/2*np.square(theta_upper).sum()
    return loss

In [None]:
pseudo_likelihood_loss_(Y, fit_mosek.theta_hat, l0=0.15, l2=0.01,)

In [None]:
pseudo_likelihood_loss_(Y, results.theta, l0=0.15, l2=0.01,)

In [None]:
np.log(np.e)