In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
config = {
    'domain': 'protein', # NOTE: hardcode below still
#     'db_name': 'babble_spouse_demo',
#     'babbler_candidate_split': 1,
#     'babbler_label_split': 1,
}

In [3]:
# Get DB connection string and add to globals
# NOTE: $SNORKELDB must be set before any snorkel imports
import os

default_db_name = 'babble_' + config['domain'] + ('_debug' if config.get('debug', False) else '')
DB_NAME = config.get('db_name', default_db_name)
if 'postgres' in config and config['postgres']:
    DB_TYPE = 'postgres'
else:
    DB_TYPE = 'sqlite'
    DB_NAME += '.db'
DB_ADDR = "localhost:{0}".format(config['db_port']) if 'db_port' in config else ""
os.environ['SNORKELDB'] = '{0}://{1}/{2}'.format(DB_TYPE, DB_ADDR, DB_NAME)
print("$SNORKELDB = {0}".format(os.environ['SNORKELDB']))

$SNORKELDB = sqlite:///babble_protein.db


In [4]:
from snorkel import SnorkelSession
session = SnorkelSession()

# Resolve config conflicts (nb_config > local_config > global_config)
from snorkel.contrib.pipelines import merge_configs, get_local_pipeline
config = merge_configs(config)

from snorkel.models import candidate_subclass
candidate_class = candidate_subclass(config['candidate_name'], config['candidate_entities'])

Overwriting domain=None to domain=protein
Overwriting babbler_candidate_split=1 to babbler_candidate_split=[0, 1, 2]
Overwriting traditional_split=0 to traditional_split=1


In [5]:
from experiments.babble.protein.protein_examples import get_user_lists

In [6]:
user_lists = get_user_lists()
user_lists.keys()

['influence',
 'negexp',
 'nucleic_acids',
 'residue',
 'int_ind',
 'positive',
 'interact',
 'signexp',
 'negative',
 'between',
 'interaction_indicators',
 'prep',
 'coimmunopr',
 'mutations',
 'uncertain',
 'prep2',
 'substrate',
 'bindmid',
 'phosphory',
 'work',
 'neg_ind',
 'known_targets']

In [7]:
from snorkel.contrib.babble import SemanticParser
semparser = SemanticParser(
    mode='text', candidate_class=candidate_class, user_lists=user_lists, beam_width=10)

Created grammar with 599 rules


In [8]:
from snorkel.contrib.babble import Explanation

explanations = [
    Explanation(
        name="LF_by_with",
        label=True,  
        condition="a prep word is between them and no negative words are between them and the number of words between them is smaller than 10",
        candidate="10946297::span:893:900~~10946297::span:920:923",
        semantics=('.root', ('.label', ('.bool', True), ('.and', ('.any', ('.map', ('.in', ('.extract_text', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2)))))), ('.user_list', ('.string', u'prep')))), ('.and', ('.none', ('.map', ('.in', ('.extract_text', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2)))))), ('.user_list', ('.string', u'negative'))), ('.call', ('.lt', ('.int', 10)), ('.count', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2)))))))))))
    ),
]

In [9]:
from snorkel.contrib.babble import link_explanation_candidates

candidates = session.query(candidate_class).all()
print(len(candidates))
explanations = link_explanation_candidates(explanations, candidates)

7615
Building list of target candidate ids...
Collected 1 unique target candidate ids from 1 explanations.
Gathering desired candidates...
Found 1/1 desired candidates
Linking explanations to candidates...
Linked 1/1 explanations


In [10]:
parses = semparser.parse(explanations, return_parses=True)
print(len(parses))

10


In [11]:
c = parses[0].explanation.candidate
print(c[0].get_span(), c[1].get_span())
print(c.get_parent().text)
print("")

for parse in parses:
    if parse.function(parse.explanation.candidate):
        print("PASS!")
    else:
        print("FAIL!")
    print(semparser.grammar.translate(parse.semantics))
    print(parse.semantics)
    print("")

(u'cytokine', u'JNK1')
These findings demonstrate that the negative regulation of Th2 cytokine production by the JNK1 signaling pathway is essential for generating Th1-polarized immunity against intracellular pathogens, such as Leishmania major.

FAIL!
return 1 if (any([s.in(text(between([X,Y]))) for s in user_list('prep')]) and ('negative'.in(text(between([X,Y]))) and count(between([X,Y])).(< 10))) else 0
('.root', ('.label', ('.bool', True), ('.and', ('.any', ('.map', ('.in', ('.extract_text', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2)))))), ('.user_list', ('.string', u'prep')))), ('.and', ('.call', ('.in', ('.extract_text', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2)))))), ('.string', u'negative')), ('.call', ('.lt', ('.int', 10)), ('.count', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2))))))))))

FAIL!
return 1 if (any([s.in(text(between([X,Y]))) for s in user_list('prep')]) and ('negative'.(.eq(u) for at least one

In [12]:
# semparser.grammar.print_chart()