In [84]:
from sys import stdout
from time import perf_counter
from collections import namedtuple

import numpy as np


def diag_index(i):
    return (i + 1) * (i + 2) // 2 - 1


In [114]:
diag_index(4), diag_index(5)

(14, 20)

In [85]:
from gl0learn import fit, synthetic
from gl0learn.utils import check_make_valid_coordinate_matrix
from gl0learn.metrics import false_positives, prediction_error
import numpy as np
import time

In [362]:
n = 40
p = 10
model = "AR1"
rng = 1
rho = 0.5
normalize="covariance"
X, Sigma_truth, Theta_truth = synthetic.generate_synthetic(n,p,model,normalize, rng=rng, rho=rho)

In [363]:
_, _, _,_,Y,_ = synthetic.preprocess(X, assume_centered = False, cholesky=False)

In [364]:
l0=0.2
l2=0.5
M = np.max(np.abs(Theta_truth*(1-np.eye(p))))
int_tol = 1e-4
mio_gap = 1e-4
maxtime=30

In [365]:
n, p = Y.shape
num_coeffs = p * (p + 1) // 2
num_l0 =  p * (p - 1) // 2
try:
    import mosek.fusion as msk
    import mosek
except ModuleNotFoundError:
    raise Exception(
        f"`mosek` is not installed. Refer ot installation documentation about how to install `mosek`"
    )

model = msk.Model()
model.acceptedSolutionStatus(msk.AccSolutionStatus.Feasible)

theta_tril = model.variable("theta_tril", num_coeffs, msk.Domain.unbounded())

s = model.variable("s", num_coeffs, msk.Domain.greaterThan(0))
z = model.variable("z", num_coeffs, msk.Domain.integral(msk.Domain.inRange(0, 1)))
t  = model.variable("t", p, msk.Domain.greaterThan(0))
lg = model.variable("lg", p, msk.Domain.unbounded())
residuals = model.variable("residuals", [min(n, p), p], msk.Domain.unbounded())

theta = theta_tril.fromTril(p)
if n <= p:
    expr = msk.Expr.mul(msk.Matrix.dense(Y), theta)
else:
    C = np.linalg.cholesky(Y.T @ Y)
    expr = msk.Expr.mul(msk.Matrix.dense(C.T), theta)
model.constraint(msk.Expr.sub(residuals, expr), msk.Domain.equalsTo(0))

for i in range(p):
    model.constraint(
        msk.Expr.vstack(
            theta_tril.index(diag_index(i)),
            msk.Expr.mul(0.5, t.index(i)),
            residuals.slice([0, i], [min(n, p), i + 1]).reshape(min(n, p)), # This grabs columns of residuals!
        ),
        msk.Domain.inRotatedQCone(),
    )
    model.constraint(
        msk.Expr.vstack(
            theta_tril.index(diag_index(i)), msk.Expr.constTerm(1), lg.index(i)
        ),
        msk.Domain.inPExpCone(),
    )

z_expr = msk.Expr.constTerm(0)
s_expr = msk.Expr.constTerm(0)
for i in range(1, p):
    theta_tmp = theta_tril.slice(diag_index(i - 1) + 1, diag_index(i))
    z_tmp = z.slice(diag_index(i - 1) + 1, diag_index(i))
    s_tmp = s.slice(diag_index(i - 1) + 1, diag_index(i))
    expr = msk.Expr.mul(z_tmp, M)
    model.constraint(msk.Expr.sub(expr, theta_tmp), msk.Domain.greaterThan(0))
    model.constraint(msk.Expr.add(theta_tmp, expr), msk.Domain.greaterThan(0))
    expr = msk.Expr.hstack(msk.Expr.mul(0.5, s_tmp), z_tmp, theta_tmp)
    model.constraint(expr, msk.Domain.inRotatedQCone())
    z_expr = msk.Expr.add(z_expr, msk.Expr.sum(z_tmp))
    s_expr = msk.Expr.add(s_expr, msk.Expr.sum(s_tmp))

z_expr = msk.Expr.mul(l0, z_expr)
s_expr = msk.Expr.mul(l2, s_expr)
t_expr = msk.Expr.sum(msk.Expr.sub(t, lg))

model.residual(msk.ObjectiveSense.Minimize, msk.Expr.add([t_expr, z_expr, s_expr]))

model.setSolverParam("log", 0)
model.setSolverParam("mioTolAbsRelaxInt", int_tol)
model.setSolverParam("mioTolAbsGap", mio_gap)
model.setSolverParam("mioTolRelGap", mio_gap)
model.setSolverParam("mioRelGapConst", 1)

if maxtime is not None:
    model.setSolverParam("mioMaxTime", maxtime)
model.setLogHandler(stdout)


# if initial_theta_tril is not None:
#     theta_tril.setLevel(initial_theta_tril)
#     z.setLevel(np.asarray(initial_theta_tril > int_tol, dtype=float))

model.solve()

status = model.getProblemStatus()

lower_bound = model.getSolverDoubleInfo("mioObjBound")
upper_bound = model.getSolverDoubleInfo("mioObjInt")
gap = (upper_bound - lower_bound) / max(1, abs(upper_bound))

In [366]:
z.level()

array([0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0.])

In [367]:
np.round(s.level(), decimals=1)

array([0. , 0. , 0. , 0. , 0.2, 0. , 0. , 0. , 0.2, 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0.2, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.2, 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0.3, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0.1, 0. ])

In [369]:
t.level()

array([1.00000365, 0.91623328, 0.83573794, 0.88892605, 1.00000236,
       0.86234282, 0.78800168, 0.76727179, 0.82111819, 0.91037464])

In [370]:
residual_level = residuals.level().reshape(10, 10)

In [385]:
np.linalg.norm(Y@theta.level().reshape(p, p)[0, :])**2/theta.level().reshape(p, p)[0, 0]

1.0000036477019787

In [380]:
np.linalg.norm(Y@theta.level().reshape(p, p), axis=0)/np.diag(theta.level().reshape(p, p))

array([0.88458705, 0.79201575, 0.81456896, 0.8356928 , 0.82383743,
       0.71937452, 0.75867759, 0.69874218, 0.8378979 , 0.83100237])

In [374]:
theta.level().reshape(p, p)

array([[ 1.27796932e+00,  0.00000000e+00, -5.95608468e-09,
         2.08874777e-08, -4.68026209e-09, -1.20221225e-08,
        -4.00598684e-09, -4.63787435e-11,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  1.46062398e+00, -3.89264587e-01,
        -1.79572924e-09, -1.14038858e-09,  7.64697708e-10,
         0.00000000e+00,  1.18631642e-08,  0.00000000e+00,
         6.00072135e-09],
       [-5.95608468e-09, -3.89264587e-01,  1.25954706e+00,
        -4.55616586e-01,  0.00000000e+00,  0.00000000e+00,
        -4.91910835e-09, -9.77635950e-09,  1.14020447e-09,
        -3.06171383e-09],
       [ 2.08874777e-08, -1.79572924e-09, -4.55616586e-01,
         1.27283560e+00,  0.00000000e+00,  2.50276610e-09,
         1.39801698e-08, -2.21407844e-08,  0.00000000e+00,
        -1.29421099e-08],
       [-4.68026209e-09, -1.14038858e-09,  0.00000000e+00,
         0.00000000e+00,  1.47339091e+00, -6.34265120e-09,
        -1.04235083e-08, -5.26010466e-09,  0.00000000e+00,
         0.

In [230]:
i = 2
residuals.slice([0, i], [min(n, p), i + 1]).level()

array([ 9.13798462e-02,  1.02686293e-01,  9.48132643e-01, -3.67152563e-01,
       -3.28531822e-10, -4.13955326e-09, -7.51527503e-09, -8.31132151e-09,
        3.73244883e-10, -2.14411917e-09])

In [204]:
theta_tril.index(diag_index(0)).level()

array([1.27796932])

In [188]:
theta_level = theta.level().reshape(p,p)

In [168]:
theta_level[np.tril_indices(10, k=0)][non_zero_values]

NameError: name 'non_zero_values' is not defined

In [141]:
diag_index(0), diag_index(1)

(0, 2)

In [333]:
C = np.linalg.cholesky(Y.T @ Y)
#expr = msk.Expr.mul(msk.Matrix.dense(C.T), theta)

In [340]:
np.linalg.norm(C.T@theta_level, axis=0)

array([1.13047511, 1.15683719, 1.02598795, 1.06369954, 1.21383458,
       1.19873972, 1.03865158, 1.09807568, 0.97997405, 1.09551389])

In [339]:
t.level()

array([1.00000365, 0.91623328, 0.83573794, 0.88892605, 1.00000236,
       0.86234282, 0.78800168, 0.76727179, 0.82111819, 0.91037464])

In [179]:
s_values = theta_level[np.tril_indices(p, k=0)]**2
s_values

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.54749447e-17,
       1.51526919e-01, 0.00000000e+00, 4.36286723e-16, 3.22464351e-18,
       2.07586474e-01, 0.00000000e+00, 2.19048532e-17, 1.30048610e-18,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.44531431e-16,
       5.84762584e-19, 0.00000000e+00, 6.26383817e-18, 4.02292242e-17,
       0.00000000e+00, 1.60479306e-17, 0.00000000e+00, 2.41976270e-17,
       1.95445148e-16, 1.08649526e-16, 2.36528320e-01, 0.00000000e+00,
       2.15098785e-21, 1.40734664e-16, 9.55772050e-17, 4.90214333e-16,
       2.76687010e-17, 1.26187306e-16, 2.02835157e-01, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.30006623e-18, 0.00000000e+00,
       0.00000000e+00, 5.44528629e-17, 0.00000000e+00, 2.91376974e-01,
       0.00000000e+00, 0.00000000e+00, 3.60086567e-17, 9.37409160e-18,
       1.67498209e-16, 0.00000000e+00, 4.10731330e-16, 0.00000000e+00,
       0.00000000e+00, 1.20168779e-01, 0.00000000e+00])

In [183]:
np.min(s.level() - s_values)

0.0

In [170]:
theta_level[np.tril_indices(10, k=0)]

array([ 1.27796932e+00,  0.00000000e+00,  1.46062398e+00, -5.95608468e-09,
       -3.89264587e-01,  1.25954706e+00,  2.08874777e-08, -1.79572924e-09,
       -4.55616586e-01,  1.27283560e+00, -4.68026209e-09, -1.14038858e-09,
        0.00000000e+00,  0.00000000e+00,  1.47339091e+00, -1.20221225e-08,
        7.64697708e-10,  0.00000000e+00,  2.50276610e-09, -6.34265120e-09,
        1.66636388e+00, -4.00598684e-09,  0.00000000e+00, -4.91910835e-09,
        1.39801698e-08, -1.04235083e-08, -4.86341773e-01,  1.36902895e+00,
       -4.63787435e-11,  1.18631642e-08, -9.77635950e-09, -2.21407844e-08,
       -5.26010466e-09, -1.12333123e-08, -4.50372242e-01,  1.57150337e+00,
        0.00000000e+00,  0.00000000e+00,  1.14020447e-09,  0.00000000e+00,
        0.00000000e+00,  7.37921831e-09,  0.00000000e+00, -5.39793455e-01,
        1.16956260e+00,  0.00000000e+00,  6.00072135e-09, -3.06171383e-09,
       -1.29421099e-08,  0.00000000e+00,  2.02665076e-08,  0.00000000e+00,
        0.00000000e+00, -

In [135]:
def mosek_levels_from_theta(theta, Y, int_tol: float = 1e-4):
    """
    
    Parameters
    ----------
    theta : (p, p) symmetric matrix
        The lower triangular will be selected automatically
        
    Returns
    -------
    theta: (p, p) array
        returns `theta` as passed
    theta_tril: (p*(p+1)//2, ) array
        Lower triangular section of theta including the main diagonal
    z_values: (p*(p+1)//2, ) array
        Indicator matrix of lower triangular section of theta including the main diagonal where:
            Any non zero item of the matrix is located 
                AND
            Is not located on the main diagonal. 
    s_values: (p*(p+1)//2, ) array
        Derived matrix of triangular section of theta including the main diagonal where the value is:
            theta[i, j]**2 if i != j else 0!
    t_values: (p, ) array
        t_values[i] <- 1/theta[i,i]||Ytheta[:, i]||^2

    lg: (p, ) array
        natural log of the main diagonal
    residuals:
    """
    n, p = Y.shape
    
    assert n > p
    
    assert theta.shape == (p, p), "Initial Theta must be passed as a (p by p matrix)!"
    np.testing.asset_approx_equal(theta, theta.T)
    
    tril_indicies = np.tril_indices(p, k=0) # Used to select the lower triangular values including the main diagonal
    
    theta_level = theta[tril_indicies]
    # Since mosek keeps main diagonal in the l0 and l2 variables. 
    # We create a copy and set diagonal to zero to make l0, and l2 calculations easier!
    theta_level_copy = np.copy(theta_level) 
    
    np.fill_diagonal(theta_level_copy, 0)
    
    non_zero_values = np.abs(theta_level) > int_tol
    
    theta_tril = theta[tril_indicies]
    theta = theta
    z_values = np.asarray(non_zero_values, dtype=float)[tril_indicies]
    s_values = theta_level_copy[tril_indicies]**2
    
    t_values = np.linalg.norm(Y@theta)**2/np.diag(theta)
    
    lg_values = np.log(np.diag(theta))
    
    YtY = np.linalg.cholesky(Y.T @ Y)
    residuals = YtY@theta
    
    return theta, theta_tril, z_values, s_values, lg_values, residuals
    
    

SyntaxError: EOF while scanning triple-quoted string literal (2153480393.py, line 1)

In [199]:
t.level()

array([1.00000365, 0.91623328, 0.83573794, 0.88892605, 1.00000236,
       0.86234282, 0.78800168, 0.76727179, 0.82111819, 0.91037464])

In [191]:
np.diag(theta_level)

array([1.27796932, 1.46062398, 1.25954706, 1.2728356 , 1.47339091,
       1.66636388, 1.36902895, 1.57150337, 1.1695626 , 1.31830417])

In [28]:
theta_tril = results.theta_hat[np.tril_indices(p)]

In [37]:
results2 = MIO_mosek(Y, l0, l2, M, initial_theta_tril=theta_tril)

In [38]:
results2.elapsed

4.783151411999938

In [360]:
np.linalg.norm(Y@theta_level, axis=0)/np.diag(theta_level) 

array([0.88458705, 0.79201575, 0.81456896, 0.8356928 , 0.82383743,
       0.71937452, 0.75867759, 0.69874218, 0.8378979 , 0.83100237])

In [361]:
t.level()

array([1.00000365, 0.91623328, 0.83573794, 0.88892605, 1.00000236,
       0.86234282, 0.78800168, 0.76727179, 0.82111819, 0.91037464])

In [281]:
lg.level()

array([0.24527235, 0.37886373, 0.23075218, 0.24124717, 0.38756649,
       0.51064394, 0.31410169, 0.45203272, 0.15662984, 0.27634619])

In [323]:
np.log(np.diag(theta_level))

array([0.24527235, 0.37886372, 0.23075218, 0.24124717, 0.38756649,
       0.51064394, 0.31410169, 0.45203272, 0.15662984, 0.27634619])

In [341]:
np.sqrt(4*(np.diag(np.linalg.cholesky(theta_level))**2 - np.square(residual_level).sum(axis=1)))

  np.sqrt(4*(np.diag(np.linalg.cholesky(theta_level))**2 - np.square(residual_level).sum(axis=1)))


array([       nan, 0.98964934,        nan,        nan, 0.6016303 ,
       1.32112065, 0.77238792, 1.04301566, 1.03103298, 1.06242524])

In [310]:
theta_level[1, 1]

1.4606239756368669

In [313]:
sum(np.square(residual_level[0, :]))

1.6036136499595322

In [289]:
residual_level.shape

(10, 10)

In [318]:
t.level()

array([1.00000365, 0.91623328, 0.83573794, 0.88892605, 1.00000236,
       0.86234282, 0.78800168, 0.76727179, 0.82111819, 0.91037464])

In [327]:
np.diag(theta_level)

array([1.27796932, 1.46062398, 1.25954706, 1.2728356 , 1.47339091,
       1.66636388, 1.36902895, 1.57150337, 1.1695626 , 1.31830417])

In [408]:

possible_active_set = np.where(np.abs(np.triu(theta_level, k=1)) > 0)

possible_active_set = np.asarray(possible_active_set).T
if possible_active_set.shape[0] > 1:
    idx = np.sort(np.random.choice(np.arange(31),
                                       size=np.random.randint(1, 31),
                                       replace=False))
else:
    idx = 0

initial_super_active_set = possible_active_set[[0], :]

In [409]:
possible_active_set.size

62

In [410]:
idx

array([ 0, 21, 25, 29])

In [411]:
for row in initial_super_active_set:
    print(row)

[0 2]


In [412]:
initial_super_active_set

array([[0, 2]])