# Minimalist Tutorial

Generate synthetic data

In [1]:
%%time
from synthetics.generate import SingleTaskTreeDepsGenerator
from synthetics.generate import gaussian_bags_of_words
from synthetics.words1k import vocab1k as vocab
import torch

N = 10000
M = 10
K = 2

data = SingleTaskTreeDepsGenerator(N, M, k=K)
L = data.L
Y = data.Y

# Create Gaussian bags of words with different vocab distribution by class
bags, sentences = gaussian_bags_of_words(Y, vocab, sigma=1, bag_size=[25, 50])
X = torch.zeros(N, len(vocab), dtype=torch.float)
for i, bag in enumerate(bags):
    for word in bag:
        X[i, word] += 1
        
# Make train/dev/test splits
Xs = [X[:8000], X[8000:9000], X[9000:]]
Ys = [Y[:8000], Y[8000:9000], Y[9000:]]

CPU times: user 17 s, sys: 412 ms, total: 17.4 s
Wall time: 18 s


Train and score label model

In [2]:
%%time
from metal.label_model import LabelModel

label_model = LabelModel(data.m, data.k, p=data.p, deps=data.E)
label_model.train(data.L, n_epochs=100, print_every=10)
score = label_model.score(L, Y, verbose=False)
print(f"Label Model Accuracy={score}")

Computing O...
Estimating \mu...
[Epoch 0] Loss: 9.721893
[Epoch 10] Loss: 2.027859
[Epoch 20] Loss: 0.385520
[Epoch 30] Loss: 0.382989
[Epoch 40] Loss: 0.057346
[Epoch 50] Loss: 0.027102
[Epoch 60] Loss: 0.013278
[Epoch 70] Loss: 0.003172
[Epoch 80] Loss: 0.002982
[Epoch 90] Loss: 0.001566
[Epoch 99] Loss: 0.001319
Label Model Accuracy=0.9671
CPU times: user 420 ms, sys: 53.1 ms, total: 473 ms
Wall time: 235 ms


Train and score end model

In [3]:
%%time
from metal.end_model import EndModel

em = EndModel(seed=1, verbose=True, layer_output_dims=[1000,10], batchnorm=False, dropout=0.0)
em.train(Xs[0], Ys[0], Xs[1], Ys[1], verbose=False, n_epochs=10)
score = em.score(Xs[2], Ys[2], verbose=False)
print(f"End Model Accuracy: {score}")


Network architecture:
Sequential(
  (0): Sequential(
    (0): IdentityModule()
  )
  (1): Sequential(
    (0): Linear(in_features=1000, out_features=10, bias=True)
  )
  (2): Linear(in_features=10, out_features=2, bias=True)
)

End Model Accuracy: 0.998
CPU times: user 16.3 s, sys: 5.5 s, total: 21.8 s
Wall time: 7.32 s
