In [1]:
import sys
sys.path.append("tests/utils")

In [2]:
import random
from typing import Callable, Tuple
from copy import deepcopy

import numpy as np
import pytest
from gl0learn import fit, synthetic
from gl0learn.metrics import nonzeros
from gl0learn.opt import MIO_mosek
from hypothesis import given, settings, HealthCheck, assume, note
from hypothesis.strategies import just, booleans, floats, integers, random_module

from utils import (
    _sample_data,
    _sample_data2,
    sample_from_cov,
    overlap_covariance_matrix,
    is_scipy_installed,
    is_mosek_installed,
    make_bisect_func,
    random_penalty,
    random_penalty_values,
    top_n_triu_indicies_by_abs_value,
)

In [3]:
np.random.seed

<function RandomState.seed>

In [60]:
def test_cd_vs_mosek_high_data(p, module, overlaps, lXs):
    num_samples = 30 * p**2
    theta_truth = overlap_covariance_matrix(
        p=p, seed=module.seed, max_overlaps=overlaps, decay=1 - np.exp(overlaps - 6)
    )

    assume(all(np.linalg.eigvalsh(theta_truth) > 0))
    x = sample_from_cov(n=num_samples, cov=theta_truth)
    
    print(np.round(np.cov(x.T), 2))

    _, _, _, _, y, _ = synthetic.preprocess(x, assume_centered=False, cholesky=True)
    
    print(np.round(np.cov(y.T), 2))

    m = np.max(np.abs(theta_truth * (1 - np.eye(p))))
    int_tol = 1e-4

    MIO_results = MIO_mosek(y=y, m=m, **lXs, int_tol=int_tol)
    cd_results = fit(
        y,
        **lXs,
        theta_init=None,
        active_set=0.,
        super_active_set=0.,
        max_active_set_ratio=1.
    )
    
    print(theta_truth)
    print(np.round(MIO_results.theta_hat, 2))
    print(np.round(cd_results.theta, 2))

    np.testing.assert_array_equal(np.abs(MIO_results.theta_hat) > int_tol,
                                  np.abs(cd_results.theta) > 0)


In [61]:
np.cov?

[0;31mSignature:[0m
[0mnp[0m[0;34m.[0m[0mcov[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mm[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0my[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrowvar[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbias[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mddof[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfweights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maweights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Estimate a covariance matrix, given data and weights.

Covariance indicates the level to which two variables vary together.
If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`,

In [62]:
class RandomSeeder:
    
    def __init__(self, seed: int):
        self.seed = seed

In [63]:
p = 6
module = RandomSeeder(0)
overlaps = 2
lXs = {'l0': 0.01, 'l2': 0.01}

In [64]:
#TODO, Seem to be operating on the cov of X, not y... How is this happening?

test_cd_vs_mosek_high_data(p, module, overlaps, lXs)

[[ 4.15 -3.68  3.17  3.36 -3.39 -3.31]
 [-3.68  4.81 -3.31 -4.23  3.21  3.82]
 [ 3.17 -3.31  4.75  3.63 -3.61 -4.15]
 [ 3.36 -4.23  3.63  5.32 -3.23 -4.45]
 [-3.39  3.21 -3.61 -3.23  4.16  3.42]
 [-3.31  3.82 -4.15 -4.45  3.42  5.27]]
[[ 0.69 -0.7   0.45  0.52 -0.6  -0.57]
 [-0.7   0.95 -0.61 -0.8   0.62  0.74]
 [ 0.45 -0.61  0.72  0.53 -0.62 -0.71]
 [ 0.52 -0.8   0.53  0.89 -0.55 -0.78]
 [-0.6   0.62 -0.62 -0.55  0.78  0.63]
 [-0.57  0.74 -0.71 -0.78  0.63  0.99]]
gL0LearnFit 1
gL0LearnFit 2
gL0LearnFit 2
fit 1
fit loop0
current_iter: 1 cur_objective = -0.911444
fit loop1
current_iter: 2 cur_objective = -2.43987
fit loop2
current_iter: 3 cur_objective = -3.13604
fit loop3
current_iter: 4 cur_objective = -3.57924
fit loop4
current_iter: 5 cur_objective = -3.84942
fit loop5
current_iter: 6 cur_objective = -4.03328
fit loop6
current_iter: 7 cur_objective = -4.14192
fit loop7
current_iter: 8 cur_objective = -4.21246
fit loop8
current_iter: 9 cur_objective = -4.2724
fit loop9
current_iter:

AssertionError: 
Arrays are not equal

Mismatched elements: 2 / 36 (5.56%)
 x: array([[ True,  True, False, False,  True, False],
       [ True,  True, False,  True, False, False],
       [False, False,  True, False,  True,  True],...
 y: array([[ True,  True, False, False,  True, False],
       [ True,  True, False,  True, False,  True],
       [False, False,  True, False,  True,  True],...

In [22]:
x = np.arange(10).reshape(5, 2)

In [21]:
x[[1, 5],:]

IndexError: index 5 is out of bounds for axis 0 with size 5

In [None]:
p*(p-1)//2

In [7]:
np.triu

<function numpy.triu(m, k=0)>

In [11]:
def top_n_triu_indicies_by_abs_value(x, n):
    """
    Parameters
    ----------
    n: int
        Number of indicies to return.
        If n is greather than p*(p-1)//2, the number of upper triangluer coordinates, an error is raised
        If there are only k non-zero vaues, st k < n. Only k values are returned.
    """
    if n <= 0:
        raise ValueError(f"Cannot request {n} non-zero items")

    p, p1 = x.shape
    if p != p1:
        raise ValueError(f"x is not a square matrix")

    if n > p*(p-1)//2:
        raise ValueError(f"n is to large for a {p} by {p} matrix")

    triu_x = np.abs(np.triu(x, k=1))

    if (triu_x==0).all():
        raise ValueError("All triu values of x are 0.")

    triu_x_flat = triu_x.flatten()

    non_zero_triu_x = triu_x_flat[np.nonzero(triu_x_flat)]
    nnz = non_zero_triu_x.size
    if (np.unique(non_zero_triu_x).size != nnz):
        raise NotImplementedError("Not implemented for arrays with duplicate values")

    sorted_triu_values = np.sort(triu_x_flat)[::-1]

    if sorted_triu_values[n] == 0:
        n = np.where(sorted_triu_values == 0)[0][0] - 1
        return np.where(triu_x >= sorted_triu_values[n])

In [12]:
theta = np.eye(5)
theta[0, 4] = -1
print(theta)

top_n_triu_indicies_by_abs_value(theta, 1)

[[ 1.  0.  0.  0. -1.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]


(array([0]), array([4]))

In [19]:
np.nonzero

<function numpy.nonzero(a)>

In [84]:
theta_truth = np.eye(3)
theta_truth[1, 2] = 0.5
print(theta_truth)

[[1.  0.  0. ]
 [0.  1.  0.5]
 [0.  0.  1. ]]


In [12]:
np.unique?

[0;31mSignature:[0m
[0mnp[0m[0;34m.[0m[0munique[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mar[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreturn_index[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreturn_inverse[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreturn_counts[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Find the unique elements of an array.

Returns the sorted unique elements of an array. There are three optional
outputs in addition to the unique elements:

* the indices of the input array that give the unique values
* the indices of the unique array that reconstruct the input array
* the number of times each unique value comes up in the input array

Parameters
----------
ar : array_like
    Input array. Unless `axis` is specified, this will be flatt

In [85]:
top_n_triu_indicies_by_abs_value(theta_truth, 3)

3
[0.5 0.  0.  0.  0.  0.  0.  0.  0. ]
0


(array([1]), array([2]))