In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np

In [2]:
import sys
import unittest

import numpy as np
import torch

sys.path.append("../metal")
from metal.label_model.label_model import LabelModel
from metal.label_model.baselines import (
    RandomVoter,
    MajorityClassVoter,
    MajorityLabelVoter,
)

sys.path.append("../synthetics")
from synthetics.generate import generate_single_task_unipolar

In [3]:
# np.random.seed(9)

# Generate unipolar L for single task
N, M = 10000, 40
L, Y, metadata = generate_single_task_unipolar(N, M, 
    class_balance=[0.5, 0.5], alpha_range=[0.4, 0.8], beta_range=[0.5])

In [88]:
import numpy as np
from scipy.sparse import csr_matrix

L = np.array([[1, 0, 1], [1, 3, 2], [0, 0, 0], [0, 0, 2], [0, 1, 2]])
L

array([[1, 0, 1],
       [1, 3, 2],
       [0, 0, 0],
       [0, 0, 2],
       [0, 1, 2]])

In [90]:
np.ravel((L != 0).sum(axis=0) / L.shape[0])

array([0.4, 0.4, 0.8])

In [12]:
model = LabelModel()
model.train(L, accs=metadata['cond_probs'], verbose=True, lr=0.001)
# score = model.score(L, Y, verbose=False)
# accs_score = model.get_accs_score(metadata['accs'])

Reaffirming lr=0.001
[Epoch 0] Loss: 2385.671861	Accs mean sq. error = 0.775648308370996
[Epoch 10] Loss: 148.934256	Accs mean sq. error = 0.1421062857639292
[Epoch 20] Loss: 117.509378	Accs mean sq. error = 0.23200102833350406
[Epoch 30] Loss: 23.789258	Accs mean sq. error = 0.15049886330726042
[Epoch 40] Loss: 12.003277	Accs mean sq. error = 0.18818972900319095
[Epoch 50] Loss: 9.056403	Accs mean sq. error = 0.16124507278655825
[Epoch 60] Loss: 7.929413	Accs mean sq. error = 0.12425781484892799
[Epoch 70] Loss: 5.293252	Accs mean sq. error = 0.10088248223955072
[Epoch 80] Loss: 3.658852	Accs mean sq. error = 0.07791547725051463
[Epoch 90] Loss: 2.188789	Accs mean sq. error = 0.0572695920964544
[Epoch 99] Loss: 1.453586	Accs mean sq. error = 0.04528077323913161


In [14]:
# model.predict_proba(L)
model.accs()

array([ 0.12555656,  0.27525154,  0.27115615,  0.20970066,  0.00907444,
        0.19064112,  0.21353412,  0.19161847,  0.19845596, -0.02677639,
        0.21557667,  0.28395487,  0.31725789,  0.23323984,  0.67333791,
        0.12907169, -0.06355467,  0.25294338,  0.39361005,  0.17715314,
        0.24550579,  0.2131668 ,  0.06105205,  0.35771958,  0.16968615,
        0.2580307 , -0.08519086,  0.49239095,  0.00263071,  0.29859959,
        0.35364489,  0.20489656,  0.08809125,  0.06139106,  0.31097437,
        0.33278009, -0.17117194,  0.2467482 ,  0.16584052,  0.23143859])

In [None]:
np.random.seed(1)
model = RandomVoter()
model.train(L)
score = model.score(L, Y, verbose=False)
print(score)

In [None]:
np.random.seed(1)
balance = metadata['class_balance']
model = MajorityClassVoter()
balance = np.array([0.5, 0.5])
model.train(L, balance)
score = model.score(L, Y, verbose=False)
print(score)

In [None]:
model = MajorityLabelVoter()
model.train(L)
score = model.score(L, Y, verbose=False, break_ties='abstain')
print(score)

### Old synthetic generator

In [None]:
# import scipy
# import torch
# from synthetics.generate import generate_single_task_unipolar_lfs

# L, _, accs, Y, _ = generate_single_task_unipolar_lfs(100000, 10, beta=0.5, 
#     class_balance=[0.6, 0.4])
# L = scipy.sparse.csr_matrix(L, dtype=np.int8)
# Y = torch.tensor(Y, dtype=torch.short)

### New synthetic generator

In [None]:
from synthetics.generate import generate_single_task_unipolar

n = 1000
m = 20
k = 2
L, Y, metadata = generate_single_task_unipolar(
    n, m, k=k, acc=[0.6, 0.9], rec=[0.1, 0.2], 
    class_balance=[0.7, 0.3], lf_balance=None, seed=1)

In [None]:
from metal.metrics import accuracy_score, coverage_score

# accs = metadata['accs']
# emp_accs = []
# for j in range(m):
#     emp_accs.append(accuracy_score(Y, L[:,j], ignore_in_pred=[0]))
# for a, b in zip(accs, emp_accs):
#     print(f"{a:.4f}, {b:.4f}, {abs(a - b):.8f}")

# recs = metadata['recs']
# polarities = metadata['polarities']
# emp_recs = []
# for j in range(m):
#     other_labels = [x for x in range(k + 1) if x != polarities[j]]
#     emp_recs.append(coverage_score(Y, L[:,j], ignore_in_gold=other_labels))
#     for a, b in zip(recs, emp_recs):
#         print(f"{a:.4f}, {b:.4f}, {abs(a - b):.8f}")


In [None]:
from metal.label_model.baselines import (
    RandomVoter, 
    MajorityLabelVoter, 
    MajorityClassVoter,
)

In [None]:
rv = RandomVoter()
rv.train(L)
# rv.predict(L)

In [None]:
scores = []
for i in range(100):
    score = rv.score(L, Y, verbose=False)
    scores.append(score)
print(np.mean(scores))

In [None]:
from collections import Counter
counter = Counter(Y.numpy())
class_counts = np.array([v for k, v in sorted(counter.items())])
balances = [class_counts / sum(class_counts)]

mc = MajorityClassVoter()
mc.train(L, balances)
# mc.predict(L)

In [None]:
scores = []
for i in range(100):
    score = mc.score(L, Y, verbose=False)
    scores.append(score)
print(np.mean(scores))

In [None]:
mv = MajorityLabelVoter()
mv.train(L)
# mv.predict(L)

In [None]:
scores = []
for i in range(100):
    score = mv.score(L, Y, verbose=False)
    scores.append(score)
print(np.mean(scores))