In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np

### Generate synthetic data

In [2]:
from synthetics import generate_single_task_unipolar, gaussian_bags_of_words, vocab1k

N = 1000
M = 30
K = 2
NUM_SPLITS = 3

Ds = [] # data
Xs = [] # features
Ls = [] # noisy labels
Ys = [] # true labels
for _ in range(NUM_SPLITS):
    L, Y, _ = generate_single_task_unipolar(
        N, M, k=K, acc=[0.6, 0.9], rec=[0.1, 0.2], 
        class_balance=[0.3, 0.7], lf_balance=None, seed=1)
    
    X, D = gaussian_bags_of_words(Y, vocab1k)
    
    Ls.append(L)
    Ys.append(Y)
    Ds.append(D)
    Xs.append(X)

### Apply LabelModel

In [3]:
from metal.label_model import LabelModel

lm = LabelModel(seed=2)
lm.train(Ls[0], n_epochs=50)
Y_p = lm.score(Ls[1], Ys[1])

Overwriting seed=None to seed=2
Overwriting n_epochs=100 to n_epochs=50
[Epoch 0] Loss: 0.180101
[Epoch 10] Loss: 0.157589
[Epoch 20] Loss: 0.119086
[Epoch 30] Loss: 0.086237
[Epoch 40] Loss: 0.070783
[Epoch 49] Loss: 0.067910
Finished Training
Accuracy: 0.717


In [4]:
Y_p = lm.predict(Ls[1])

### Random Search

In [5]:
search_space = {
    'n_epochs': 25, # a single constant value
    'lr': [0.01, 0.1], # a list of discrete values
    'l2': {'range': [0.0001, 1], 'scale': 'log'}, # a range and scale to interpolate by
}

In [6]:
from metal.tuner import ModelTuner

tuner = ModelTuner(LabelModel)
init_args = []
train_args = [Ls[0]]
model, best_config = tuner.search(init_args, train_args, Ls[1], Ys[1], 
                                  search_space, max_search=3, metric='f1')

[1] Testing {'lr': 0.1, 'l2': 0.018559937546941248}
[Epoch 0] Loss: 0.180101
[Epoch 10] Loss: 0.159935
[Epoch 20] Loss: 0.136423
[Epoch 24] Loss: 0.130878
Finished Training
F1: 0.755
[2] Testing {'lr': 0.01, 'l2': 0.005158094689171092}
[Epoch 0] Loss: 0.180101
[Epoch 10] Loss: 0.177784
[Epoch 20] Loss: 0.173423
[Epoch 24] Loss: 0.171480
Finished Training
F1: 0.657
[3] Testing {'lr': 0.1, 'l2': 0.005158094689171092}
[Epoch 0] Loss: 0.180101
[Epoch 10] Loss: 0.158265
[Epoch 20] Loss: 0.124516
[Epoch 24] Loss: 0.112977
Finished Training
F1: 0.748
[SUMMARY]
Best model: [1]
Best config: {'n_epochs': 25, 'lr': 0.1, 'l2': 0.005158094689171092, 'verbose': False}
Best score: 0.7546583850931676


### Metrics

Calculate metrics in one of two ways.

1. Use metric_score() and pass the metric name
3. The the specific metric's function (e.g., accuracy_score())

In [7]:
from metal.metrics import metric_score, accuracy_score

metric_score(Ys[1], Y_p, 'accuracy')
accuracy_score(Ys[1], Y_p)

0.712

0.712

Built-in metrics include:

In [8]:
metric_score(Ys[1], Y_p, 'accuracy')
metric_score(Ys[1], Y_p, 'coverage')
metric_score(Ys[1], Y_p, 'precision')
metric_score(Ys[1], Y_p, 'recall')
metric_score(Ys[1], Y_p, 'f1')
metric_score(Ys[1], Y_p, 'fbeta', beta=1.0)

0.712

1.0

0.510989010989011

0.93

0.6595744680851063

0.6595744680851063

### Confusion matrix

In [9]:
from metal.analysis import confusion_matrix
confusion_matrix(Y_p, Ys[1])

array([[279, 267],
       [ 21, 433]])

In [10]:
confusion_matrix(Y_p, Ys[1], normalize=True)

array([[0.279, 0.267],
       [0.021, 0.433]])

In [11]:
mat = confusion_matrix(Y_p, Ys[1], pretty=True)

        y=1    y=2   
 l=1    279    267   
 l=2    21     433   


### Error analysis

In [12]:
from metal.analysis import error_buckets

buckets = error_buckets(Y_p, Ys[1], Ds[1])
tp = buckets[1,1]

In [13]:
tp[:3]

['him fight mr within hope economy visit parent piece problem discuss camera join factor day should night personal music week sell catch manager base them chance job later away legal period left treatment successful control maintain if fail throw',
 'professional operation arm response or organization city affect agency protect case simple defense shoulder such process patient property on large gun somebody side employee story their well court',
 'animal sell think inside similar manage than these might ten or lose machine in hear land customer audience nearly attack go college capital weight social lead answer recent wall us individual response serious he team']