## Debugging the `LabelModel` with deps + higher-order cliques

In [1]:
%matplotlib inline

import numpy as np
import torch
import matplotlib.pyplot as plt
import mpmath

from synthetic.generate import SingleTaskTreeDepsGenerator
from metal.label_model import LabelModel
from metal.label_model.utils import (
    compute_mu,
    compute_covariance,
    compute_inv_covariance,
    print_matrix,
    visualize_matrix
)

### Generating a dataset

In [2]:
# Generate synthetic dataset
np.random.seed(0)
N = 10000
M = 10
K = 2
EDGE_PROB=0.5
data = SingleTaskTreeDepsGenerator(N, M, k=K, edge_prob=EDGE_PROB)

### Training the `LabelModel`

Note that:
* The `train` method assembles other data structures, such as the dependencies junction tree, etc.
* The `higher_order_cliques` kwarg controls whether or not to include them

In [3]:
lm = LabelModel(k=data.k, class_balance=data.p)

In [4]:
lm.train(
    data.L,
    deps=data.E,
    all_unary_cliques=True,
    higher_order_cliques=True,
    n_epochs=50000,
    print_every=5000,
    lr=0.0001,
    l2=0,
    O_inv_prec=1024
)

# Test against the true parameter values
mu_est = lm.mu.detach().numpy()
mu = compute_mu(lm._get_augmented_label_matrix(data.L.todense()), data.Y, K, data.p)
print(f"Average absolute error: {np.mean(np.abs(mu_est - mu))}")

Computing O^{-1}...
Estimating Z...
[Epoch 0] Loss: 3605678.000000
[Epoch 5000] Loss: 412.065643
[Epoch 10000] Loss: 394.892120
[Epoch 15000] Loss: 376.943146
[Epoch 20000] Loss: 362.795959
[Epoch 25000] Loss: 356.325897
[Epoch 30000] Loss: 353.102997
[Epoch 35000] Loss: 347.530212
[Epoch 40000] Loss: 317.787506
[Epoch 45000] Loss: 314.971313
[Epoch 49999] Loss: 314.335632
Estimating \mu...
[Epoch 0] Loss: 260.110046
[Epoch 5000] Loss: 0.033500
[Epoch 10000] Loss: 0.007396
[Epoch 15000] Loss: 0.001761
[Epoch 20000] Loss: 0.000432
[Epoch 25000] Loss: 0.000108
[Epoch 30000] Loss: 0.000028
[Epoch 35000] Loss: 0.000007
[Epoch 40000] Loss: 0.000002
[Epoch 45000] Loss: 0.000001
[Epoch 49999] Loss: 0.000001
Average absolute error: 0.04663337137793971


### Trying to solve with `scipy.optimize.minimize`

In [None]:
from scipy.optimize import minimize

O_inv = lm.O_inv.numpy()
mask = lm.mask.numpy()

z0 = np.random.randn(lm.d * lm.k)

def objective_fn(z):
    Z = z.reshape(-1, data.k)
    return np.linalg.norm( (O_inv + Z @ Z.T) * mask )**2

def gradient_fn(z):
    Z = z.reshape(-1, data.k)
    X = (O_inv + Z @ Z.T) * mask
    return np.ravel(X @ Z)

res = minimize(objective_fn, z0, jac=gradient_fn, method='BFGS')
Z = res['x'].reshape(-1, data.k)
res['fun']

In [None]:
O = lm.O.numpy()
P = lm.P.numpy()
I_k = np.eye(data.k)
Q = O @ Z @ np.linalg.inv(I_k + Z.T @ O @ Z) @ Z.T @ O

mu0 = np.random.randn(lm.d * lm.k)

def objective_fn_2(mu):
    M = mu.reshape(-1, data.k)
    return np.linalg.norm(Q - M @ P @ M.T)**2 + np.linalg.norm(np.sum(M @ P, 1) - np.diag(O))**2

res_2 = minimize(objective_fn_2, mu0, method='BFGS')
M = res_2['x'].reshape(-1, data.k)
res_2['fun']

In [None]:
# Test against the true parameter values
print(f"Average absolute error: {np.mean(np.abs(M - mu))}")

## Visualizing the inverse covariance matrix

In [None]:
J = compute_inv_covariance(
    lm._get_augmented_label_matrix(data.L.todense()),
    data.Y,
    data.k,
    data.p
)
visualize_matrix(np.abs(J))

In [None]:
visualize_matrix(lm.mask.numpy(), fig_size=[5,5])

In [None]:
O_inv = lm.O_inv.numpy()
Z = lm.Z.detach().numpy()
mask = lm.mask.numpy()
visualize_matrix(np.abs((O_inv + Z@Z.T) * mask))

### Looking at the internal 'bookkeeping' of cliques...

In [None]:
lm.c_data

In [None]:
# Dependency edge weights
[((i,j), data.theta[(i,j)]) for i,j in data.E]