In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from snorkel import SnorkelSession

session = SnorkelSession()

In [2]:
import os

#subdir in results
result_path = 'results/tafamidis97/'

In [3]:
from snorkel.models import Document, Sentence

print("Documents:", session.query(Document).count())
print("Sentences:", session.query(Sentence).count())

print("Loading all sentences from db...")
all_sents = session.query(Sentence).all()
print("Loading complete!")


print('Amount of sentences: {}'.format(len(all_sents)))

Documents: 97
Sentences: 1038
Loading all sentences from db...
Loading complete!
Amount of sentences: 1038


In [4]:
from snorkel.models import Candidate, candidate_subclass
from snorkel.candidates import PretaggedCandidateExtractor

ChemicalGeneInteraction = candidate_subclass('ChemicalGeneInteraction', ['chemical', 'gene'])
candidate_extractor = PretaggedCandidateExtractor(ChemicalGeneInteraction, ['Chemical', 'Gene'])

for k, sents in enumerate([all_sents]):
    candidate_extractor.apply(sents, split=k, clear=True)
    print("Number of candidates:", session.query(ChemicalGeneInteraction).filter(ChemicalGeneInteraction.split == k).count())

  0%|          | 0/1038 [00:00<?, ?it/s]

Clearing existing...
Running UDF...


100%|██████████| 1038/1038 [00:01<00:00, 972.35it/s]

Number of candidates: 258





In [5]:
from snorkel.learning.pytorch import LSTM

lstm = LSTM(n_threads=10)
lstm.load('chemical_gene_interaction.lstm')

[LSTM] Loaded model <chemical_gene_interaction.lstm>


In [6]:
print("Loading all candidates from db...")
all_cands = session.query(ChemicalGeneInteraction).filter(ChemicalGeneInteraction.split == 0).order_by(ChemicalGeneInteraction.id).all()
print("{} candidates load from db!".format(len(all_cands)))

Loading all candidates from db...
258 candidates load from db!


In [7]:
print("Applying LSTM to candidates...")
%time lstm.save_marginals(session, all_cands)
print("LSTM applied!")

Applying LSTM to candidates...
Saved 258 marginals
CPU times: user 2.54 s, sys: 112 ms, total: 2.66 s
Wall time: 845 ms
LSTM applied!




In [8]:
from ksnorkel import KSUtils

header_str = '{}\t{}\t{}\t{}\t{}\t{}\t{}'.format('document_id', 'sentence_id', 'cand_id','chemical_cid', 'chemical_span', 'gene_cid', 'gene_span')
%time KSUtils.save_binary_relation_as_tsv(result_path + "chemical_gene_interaction.tsv", session, all_cands, all_sents, header_str, 'chemical_cid', 'gene_cid')

Storing candidate labels into result file: results/tafamidis97/chemical_gene_interaction.tsv
Amount of candidates: 258
Load mariginals from db...
Marginals loaded!
Building sentence to document map...
Map built!
Saved 29 positive predicitions for binary relation!
CPU times: user 3.25 s, sys: 16 ms, total: 3.26 s
Wall time: 1.12 s
