The purpose of this notebook is to find candidates for explanations that do not have a candidate for consistency checking. For each explanation missing a candidate, loop through the candidates until you find one that matches. Confirm that the parse of the function is the right one, then move on.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
config = {
    'domain': 'spouse',
}

In [3]:
# Get DB connection string and add to globals
# NOTE: $SNORKELDB must be set before any snorkel imports
import os

default_db_name = 'babble_' + config['domain'] + ('_debug' if config.get('debug', False) else '')
DB_NAME = config.get('db_name', default_db_name)
if 'postgres' in config and config['postgres']:
    DB_TYPE = 'postgres'
else:
    DB_TYPE = 'sqlite'
    DB_NAME += '.db'
DB_ADDR = "localhost:{0}".format(config['db_port']) if 'db_port' in config else ""
os.environ['SNORKELDB'] = '{0}://{1}/{2}'.format(DB_TYPE, DB_ADDR, DB_NAME)
print("$SNORKELDB = {0}".format(os.environ['SNORKELDB']))

$SNORKELDB = sqlite:///babble_spouse.db


In [5]:
from snorkel import SnorkelSession
session = SnorkelSession()

# Resolve config conflicts (nb_config > local_config > global_config)
from snorkel.contrib.pipelines import merge_configs, get_local_pipeline
config = merge_configs(config)

from snorkel.models import candidate_subclass
candidate_class = candidate_subclass(config['candidate_name'], config['candidate_entities'])

pipeline = get_local_pipeline(config['domain'])
pipe = pipeline(session, candidate_class, config)

Overwriting domain=None to domain=spouse
Overwriting print_freq=1 to print_freq=5
Overwriting LF_acc_prior_weight_default=1.0 to LF_acc_prior_weight_default=0.5
Overwriting decay=0.95 to decay=0.99
Overwriting init_class_prior=0 to init_class_prior=-1.15
Overwriting reg_param=0.1 to reg_param=0.5
Overwriting babbler_candidate_split=1 to babbler_candidate_split=[0, 1, 2]
Overwriting disc_model_class=lstm to disc_model_class=logreg
Using SpousePipeline object.


In [6]:
candidates = session.query(pipe.candidate_class).all()

In [7]:
len(candidates)

27688

In [8]:
# WARNING: hardcoded
from experiments.babble.protein.protein_examples import get_explanations, get_user_lists
# WARNING: hardcoded

explanations = get_explanations()
user_lists = get_user_lists()

In [9]:
from snorkel.contrib.babble import Explanation

explanations = [
    Explanation(
        name='LF_between_before',
        label=True,
        condition="""'husband' is in the sentence""",
        candidate=None),
]

In [10]:
from snorkel.contrib.babble.utils import link_explanation_candidates
explanations = link_explanation_candidates(explanations, candidates)

Building list of target candidate ids...
Collected 0 unique target candidate ids from 1 explanations.
No candidate hashes were provided. Skipping linking.


In [11]:
# for e in explanations:
#     print(e.condition)
#     if e.candidate:
#         print(e.candidate[0].get_span(), e.candidate[1].get_span())
#         print(e.candidate.get_parent())
#         print("")

In [12]:
from snorkel.contrib.babble import Babbler

babbler = Babbler(session,
                  mode='text', 
                  candidate_class=pipe.candidate_class, 
                  user_lists=user_lists)
babbler.apply(explanations, 
              split=1)

Created grammar with 597 rules
Flushing all parses from previous explanation set.
1 explanation(s) out of 1 were parseable.
1 parse(s) generated from 1 explanation(s).
1 parse(s) remain (0 parse(s) removed by DuplicateSemanticsFilter).
Note: 1 LFs did not have candidates and therefore could not be filtered.
1 parse(s) remain (0 parse(s) removed by ConsistencyFilter).
### Applying labeling functions to split 1

### Done in 5.5s.

1 parse(s) remain (0 parse(s) removed by UniformSignatureFilter: (0 None, 0 All)).
1 parse(s) remain (0 parse(s) removed by DuplicateSignatureFilter).
Added 1 parse(s) from 1 explanations to set. (Total # parses = 1)


In [13]:
parses = babbler.get_parses(translate=False)

In [14]:
for parse in parses:
    print(parse.explanation)
    print(parse.semantics)
    print(babbler.semparser.grammar.translate(parse.semantics))
    print("")

Explanation("LF_between_before: True, 'husband' is in the sentence")
('.root', ('.label', ('.bool', True), ('.call', ('.in', ('.extract_text', ('.sentence',))), ('.string', u'husband'))))
return 1 if 'husband'.in(text(the sentence)) else 0



In [None]:
from collections import defaultdict

explanation_map = defaultdict(list)
for parse in parses:
    if parse.explanation.candidate is None:
        print(parse.explanation)
        print("")
        for c in candidates:
            if parse.function(c):
                print((c[0].get_span(), c[1].get_span()))
                print("")
                print(c.get_parent().text)
                print("")
                print(c.get_stable_id())
                break

In [None]:
# TRAIN = 0
# candidates = session.query(pipe.candidate_class).filter(
#     pipe.candidate_class.split == TRAIN)
# for c in candidates:
#     sentence = c.get_parent().text
#     if 'develop' in sentence and 'following' in sentence:
#         print(sentence)
#         print("")
#         print(c)
#         print(c.get_stable_id())
#         print("\n")