In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../") # go to parent dir

import numpy as np
import torch
from torch import nn, optim
import matplotlib.pyplot as plt

from itertools import product

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Step 1: Generate the true class balance to be recovered

In [2]:
K = 3

# Generate the true class balance to be recovered
p_Y = np.random.random(K)
p_Y /= p_Y.sum()
p_Y

array([0.33627531, 0.06528622, 0.59843847])

### Step 2: Generate the true conditional probability tables (CPTs) for the LFs

Separate simple process here to keep simple (later merge this with the SPA generator).
Generate in terms of the _conditional accuracies_ (which is equivalent to the recall...):
$$
\alpha_{i,y',y} = P(\lambda_i = y' | Y = y)
$$

Note that this table should be normalized such that:
$$
\sum_{y'} \alpha_{i,y',y} = 1
$$

In [3]:
M = 10
alphas = []
for i in range(M):
    a = np.random.random((K,K))
    alphas.append( a @ np.diag(1 / a.sum(axis=0)) )
alpha = np.array(alphas)

assert np.all(np.abs(alpha.sum(axis=1) - 1) < 1e-5)
alpha[0]

array([[0.28359862, 0.23194285, 0.39931097],
       [0.36570757, 0.39197016, 0.31187131],
       [0.35069381, 0.376087  , 0.28881771]])

### Aside: Different tensor product approaches in Python

#### (1) Brute force

In [4]:
%%time
O_1 = np.zeros((M,M,M,K,K,K))
for i, j, k, y1, y2, y3 in product(range(M), range(M), range(M), range(K), range(K), range(K)):
    for y in range(K):
        O_1[i,j,k,y1,y2,y3] += alpha[i, y1, y] * alpha[j, y2, y] * alpha[k, y3, y]

CPU times: user 129 ms, sys: 817 µs, total: 130 ms
Wall time: 130 ms


#### (3) `np.einsum`

In [5]:
%time O_3 = np.einsum('aby,cdy,efy->acebdf', alpha, alpha, alpha)

CPU times: user 1.44 ms, sys: 496 µs, total: 1.94 ms
Wall time: 1.3 ms


In [6]:
np.mean(np.abs(O_1 - O_3))

4.096404928229565e-18

Now, trying a bilinear form:

In [7]:
%%time
Op_1 = np.zeros((M,M,M,K,K,K))
for i, j, k, y1, y2, y3 in product(range(M), range(M), range(M), range(K), range(K), range(K)):
    for y in range(K):
        Op_1[i,j,k,y1,y2,y3] += p_Y[y] * alpha[i, y1, y] * alpha[j, y2, y] * alpha[k, y3, y]

CPU times: user 495 ms, sys: 4.54 ms, total: 500 ms
Wall time: 136 ms


In [8]:
%time Op_3 = np.einsum('aby,cdy,efy,y->acebdf', alpha, alpha, alpha, p_Y)

CPU times: user 622 µs, sys: 118 µs, total: 740 µs
Wall time: 530 µs


In [9]:
np.mean(np.abs(Op_1 - Op_3))

2.1384522560651964e-18

### Step 3: Generate the _three-way_ overlaps tensor $O$ of conditionally-independent LFs

Now we can directly generate $O$.
By our conditional independence assumption, we have:
$$
P(\lambda_i = y', \lambda_j = y'' | Y = y) = \alpha_{i,y',y} \alpha_{j,y'',y}
$$

Thus we have:
$$
O_{i,j,y',y''} = \sum_y P(Y=y) \alpha_{i,y',y} \alpha_{j,y'',y}
$$

In [10]:
# Compute mask
mask = torch.ones((M,M,M,K,K,K)).byte()
for i, j, k in product(range(M), repeat=3):
    if len(set((i,j,k))) < 3:
        mask[i,j,k,:,:,:] = 0

In [11]:
%%time
O = np.einsum('aby,cdy,efy,y->acebdf', alpha, alpha, alpha, p_Y)
O = torch.from_numpy(O).float()
O[1-mask] = 0

CPU times: user 12.1 ms, sys: 980 µs, total: 13.1 ms
Wall time: 3.12 ms


In [38]:
# Compute pairwise labeling rates
mask_2 = torch.ones((M,M,K,K)).byte()
for i in range(M):
    mask_2[i,i,:,:] = 0

O_2 = np.einsum('aby,cdy,y->acbd', alpha, alpha, p_Y)
O_2 = torch.from_numpy(O_2).float()
O_2[1-mask_2] = 0

In [39]:
# Compute observed labeling rates
O_l = torch.from_numpy(np.einsum('aby,y->ab', alpha, p_Y)).float()

### Step 4: Try to recover $\alpha$ given $P(Y=y)$

In [58]:
def get_loss(A, P, O, O_l, O_2):
    
    # Main constraint: match empirical three-way overlaps matrix (entries O_ijk for i != j != k)
    loss_1 = torch.norm((O - torch.einsum('aby,cdy,efy,y->acebdf', [A,A,A,P]))[mask])**2
    
    # Col-wise stochastic: \sum_y' P(\lf=y'|Y=y) = 1.0
    loss_2 = torch.norm(torch.sum(A, 1) - 1)**2
    
    # Row-wise constraint: match observed labeling rates P(\lf=y') = \sum_y P(Y=y) P(\lf=y'|Y=y)
    loss_3 = torch.norm(O_l - torch.einsum('aby,y->ab', [A,P]))**2
    
    # Pairwise observed: match empirical pairwise overlaps matrix (entries O_ij for i != j)
    # loss_4 = torch.norm((O_2 - torch.einsum('aby,cdy,y->acbd', [A,A,P]))[mask_2])**2
    
    return loss_1 + loss_2 + loss_3 # + loss_4

def train_model(A, P, O, O_l, O_2, n_epochs=10, lr=0.01, momentum=0, print_every=1):
    optimizer = optim.SGD([A], lr=lr, momentum=momentum)
    
    for epoch in range(n_epochs):
        epoch_loss = 0.0
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass to calculate outputs
        loss = get_loss(A, P, O, O_l, O_2)

        # Backward pass to calculate gradients
        loss.backward()

        # Perform optimizer step
        optimizer.step()

        # Keep running sum of losses
        epoch_loss += loss.detach()

        # Report progress
        if epoch % print_every == 0:
            msg = f"[E:{epoch}]\tLoss: {epoch_loss:.8f}"
            print(msg)

def train_model_lbfgs(A, P, O, O_l, O_2, n_epochs=10, lr=1, print_every=1):
    optimizer = optim.LBFGS([A], lr=lr)
    
    for epoch in range(n_epochs):        
        def closure():
            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass to calculate outputs
            loss = get_loss(A, P, O, O_l, O_2)

            # Backward pass to calculate gradients
            loss.backward()

            # Report progress
            if epoch % print_every == 0:
                msg = f"[E:{epoch}]\tLoss: {loss.detach():.8f}"
                print(msg)
            
            return loss

        # Perform optimizer step
        optimizer.step(closure)

In [63]:
A = nn.Parameter(torch.from_numpy(np.random.rand(M, K, K)).float()).float()
P = torch.from_numpy(p_Y).float()

# train_model(A, P, O, O_l, O_2, n_epochs=10000, lr=0.005, momentum=0.9, print_every=1000)
train_model_lbfgs(A, P, O, O_l, O_2, n_epochs=10, print_every=1)

print(f"Param estimation error: {np.mean(np.abs(A.detach().numpy() - alpha))}")

[E:0]	Loss: 177.65818787
[E:0]	Loss: 144.57876587
[E:0]	Loss: 58.89773178
[E:0]	Loss: 35.49965668
[E:0]	Loss: 18.02413940
[E:0]	Loss: 4.31744003
[E:0]	Loss: 2.35179520
[E:0]	Loss: 1.00241983
[E:0]	Loss: 0.57635510
[E:0]	Loss: 0.44058844
[E:0]	Loss: 0.41010287
[E:0]	Loss: 0.24305074
[E:0]	Loss: 0.17068177
[E:0]	Loss: 0.09264879
[E:0]	Loss: 0.07045437
[E:0]	Loss: 0.06329033
[E:0]	Loss: 0.05925115
[E:0]	Loss: 0.05787576
[E:0]	Loss: 0.05537866
[E:0]	Loss: 0.05238256
[E:1]	Loss: 0.04945649
[E:1]	Loss: 0.04760237
[E:1]	Loss: 0.04589147
[E:1]	Loss: 0.04376334
[E:1]	Loss: 0.03950287
[E:1]	Loss: 0.03583579
[E:1]	Loss: 0.03326323
[E:1]	Loss: 0.02985516
[E:1]	Loss: 0.02592331
[E:1]	Loss: 0.02189494
[E:1]	Loss: 0.01907339
[E:1]	Loss: 0.01803201
[E:1]	Loss: 0.01729955
[E:1]	Loss: 0.01655176
[E:1]	Loss: 0.01559204
[E:1]	Loss: 0.01457048
[E:1]	Loss: 0.01313580
[E:1]	Loss: 0.01159024
[E:1]	Loss: 0.00931841
[E:1]	Loss: 0.00672343
[E:2]	Loss: 0.00397075
[E:2]	Loss: 0.00212902
[E:2]	Loss: 0.00148414
[E:2

In [64]:
A[0]

tensor([[0.2836, 0.2320, 0.3993],
        [0.3657, 0.3920, 0.3119],
        [0.3507, 0.3761, 0.2888]], grad_fn=<SelectBackward>)

In [65]:
alpha[0]

array([[0.28359862, 0.23194285, 0.39931097],
       [0.36570757, 0.39197016, 0.31187131],
       [0.35069381, 0.376087  , 0.28881771]])

In [36]:
A[0].sum(1)

tensor([0.9199, 1.0672, 1.0129], grad_fn=<SumBackward1>)

In [37]:
alpha[0].sum(1)

array([0.91485244, 1.06954904, 1.01559852])