In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
config = {
    'domain': 'spouse',
    'split': 1,
}

In [3]:
# Get DB connection string and add to globals
# NOTE: $SNORKELDB must be set before any snorkel imports
import os

default_db_name = 'babble_' + config['domain'] + ('_debug' if config.get('debug', False) else '')
DB_NAME = config.get('db_name', default_db_name)
if 'postgres' in config and config['postgres']:
    DB_TYPE = 'postgres'
else:
    DB_TYPE = 'sqlite'
    DB_NAME += '.db'
DB_ADDR = "localhost:{0}".format(config['db_port']) if 'db_port' in config else ""
os.environ['SNORKELDB'] = '{0}://{1}/{2}'.format(DB_TYPE, DB_ADDR, DB_NAME)
print("$SNORKELDB = {0}".format(os.environ['SNORKELDB']))

$SNORKELDB = sqlite:///babble_spouse.db


In [4]:
from snorkel import SnorkelSession
session = SnorkelSession()

In [5]:
from snorkel.models import candidate_subclass

Spouse = candidate_subclass('Spouse', ['person1', 'person2'])

In [6]:
candidates = session.query(Spouse).filter(Spouse.split == config['split']).order_by(
    Spouse.id).all()
print("Candidates: {}".format(len(candidates)))

Candidates: 2796


### Write Input File

In [7]:
# from snorkel.annotations import load_gold_labels

# L_gold = load_gold_labels(session, annotator_name='gold', split=0)
# L_gold = load_gold_labels(session, annotator_name='gold', split=1)
# L_gold

In [8]:
# labels = [L_gold[L_gold.get_row_index(c),0] for c in candidates]
# assert(len(labels) == len(candidates))

In [None]:
# from gradturk_processing import GradTurkHelper
# helper = GradTurkHelper(candidates, labels, 
#                         pct_positive=0.5, 
#                         num_hits=75, candidates_per_hit=5)

In [None]:
# import os
# index_path = (os.environ['SNORKELHOME'] + 
#               '/experiments/babble/spouse/data/gradturk_candidate_index.csv')
# helper.write_candidate_index(fpath=index_path)

In [None]:
# html_path = (os.environ['SNORKELHOME'] + 
#               '/experiments/babble/spouse/data/gradturk_candidate_html.html')
# helper.write_candidate_html(fpath=html_path)

### Read Output File

In [57]:
from gradturk_processing import GradTurkPoster

postprocessor = GradTurkPoster()
output_csv_path= (os.environ['SNORKELHOME'] + 
                  '/experiments/babble/spouse/data/gradturk_responses.csv')
candidate_index_path = (os.environ['SNORKELHOME'] + 
                        '/experiments/babble/spouse/data/gradturk_candidate_index.csv')
output_path = (os.environ['SNORKELHOME'] + 
                  '/experiments/babble/spouse/data/gradturk_explanations.csv')
explanations = postprocessor.postprocess(output_csv_path, candidate_index_path, output_path,
                                         candidates)

Skipping snorkel user Braden.
Skipping snorkel user Jared.
Skipping snorkel user Jared.
Skipping snorkel user Jared.
Skipping snorkel user Jared.
Building list of target candidate ids...
Collected 143 unique target candidate ids from 148 explanations.
Gathering desired candidates...
Found 143/143 desired candidates
Linking explanations to candidates...
Linked 148/148 explanations


In [58]:
explanations[:10]

[Explanation("True, The word "and" appears between person1 and person2"),
 Explanation("False, The word "Chief" appears before person2"),
 Explanation("False, person1 is the same as person2"),
 Explanation("False, The word "apartment" appears immediately after person2"),
 Explanation("True, "wedding" occurs immediately after person1 and person2"),
 Explanation("True, "husband" occurs before person2"),
 Explanation("True, "previously married" occurs between person1 and person2"),
 Explanation("True, "fiance" occurs between person1 and person2"),
 Explanation("False, "said" occurs between person1 and person2 and "investigation" occurs after person2"),
 Explanation("False, person2 is an empty string.")]

In [59]:
exp_iterator = iter(explanations)

In [67]:
exp = exp_iterator.next()
from snorkel.viewer import SentenceNgramViewer
print(exp.condition)
sv = SentenceNgramViewer([exp.candidate], session, n_per_page=3, height=150)
sv

"fiance" occurs between person1 and person2


<IPython.core.display.Javascript object>

### Write Explanations File

In [69]:
import os
from snorkel.contrib.babble import ExplanationIO

fpath = os.environ['SNORKELHOME'] + '/experiments/babble/spouse/data/grad_explanations.tsv'
# fpath = os.environ['SNORKELHOME'] + '/experiments/babble/spouse/data/mturk_explanations_all.tsv'

expio = ExplanationIO()
expio.write(explanations, fpath)

Wrote 148 explanations to /Users/bradenjh/repo...use/data/grad_explanations.tsv


###  Read Explanations File

In [None]:
from pprint import pprint

explanations = expio.read(fpath)
pprint(explanations[:10])