# Labelfunction and model analysis

imports

In [None]:
from fonduer.supervision import Labeler
from fonduer.features import Featurizer
from fonduer.candidates.models import Candidate

from snorkel.labeling import LFAnalysis
from snorkel.labeling.model import LabelModel

from pipeline.utils import get_session, load_candidates
from definitions.candidates import NameFullAbbr, NameFullTask
from pipeline.lfs.dummy_lfs import dummy_lfs


Get session object

In [None]:
session = get_session(db_name="test_collection")

Define candidates and Labeler object

In [None]:
candidates = [NameFullTask, NameFullAbbr]
labeler = Labeler(session, candidates)

Load candidates and labels

In [None]:
train_cands = load_candidates(session, 0, candidates)
L_train_NFT, L_train_NFA = labeler.get_label_matrices(train_cands) # NFT --> NameFullTask, NFA --> NameFullAbbr

## LF analysis

NameFull + Abrreviation

In [None]:
LFAnalysis(
    L_train_NFA,
    lfs=sorted(dummy_lfs, key=lambda lf: lf.name)
).lf_summary()

NameFull + Task

In [None]:
LFAnalysis(
    L_train_NFT,
    lfs=sorted(dummy_lfs, key=lambda lf: lf.name)
).lf_summary()

## Model analysis

List of models

| Modelname | Candidate | description | n_epochs |
| --------- | --------- | ----------- | -------- |
| label_model_v1_NFA | NameFullAbbr | label model with random label functions | 500 |
| label_model_v1_NFT | NameFullTask | label model with random label functions | 500 |

In [None]:
from pathlib import Path

gen_model_NFT = LabelModel(cardinality=2)
gen_model_NFA = LabelModel(cardinality=2)


if Path("models/label_model_NFA_v1.pkl").is_file() and Path("models/label_model_NFT_v1.pkl").is_file():
    gen_model_NFA.load(source="models/label_model_NFA_v1.pkl")
    gen_model_NFT.load(source="models/label_model_NFT_v1.pkl")
    print("Loaded Models")
else:
    gen_model_NFT.fit(L_train_NFT, n_epochs=500, log_freq=100)
    gen_model_NFT.save("models/label_model_NFT_v1.pkl")

    gen_model_NFA.fit(L_train_NFA, n_epochs=500, log_freq=100)
    gen_model_NFA.save("models/label_model_NFA_v1.pkl")
    print("Fit and saved models")

### Generating train marginals

In [None]:
train_marginals_NFA = gen_model_NFA.predict_proba(L_train_NFA)
train_marginals_NFT = gen_model_NFT.predict_proba(L_train_NFT)

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(2,2)
fig.set_figheight(6)
fig.set_figwidth(10)
fig.set_tight_layout("w_pad")

bins=20

ax[0, 0].hist(train_marginals_NFA[:, 0], bins=bins)
ax[0, 0].set_title("NFA(TRUE)")
ax[0, 1].hist(train_marginals_NFA[:, 1], bins=bins)
ax[0, 1].set_title("NFA(FALSE)")

ax[1, 0].hist(train_marginals_NFT[:, 0], bins=bins)
ax[1, 0].set_title("NFT(TRUE)")
ax[1, 1].hist(train_marginals_NFT[:, 1], bins=bins)
ax[1, 1].set_title("NFT(FALSE)")
plt.show()