Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 50 lines (46 sloc) 1.6 KB
#!/usr/bin/env python
from deepdive import *
import ddlib
@tsj_extractor
@returns(lambda
p1_id = "text",
p2_id = "text",
feature = "text",
:[])
def extract(
p1_id = "text",
p2_id = "text",
p1_begin_index = "int",
p1_end_index = "int",
p2_begin_index = "int",
p2_end_index = "int",
doc_id = "text",
sent_index = "int",
tokens = "text[]",
lemmas = "text[]",
pos_tags = "text[]",
ner_tags = "text[]",
dep_types = "text[]",
dep_parents = "int[]",
):
"""
Uses DDLIB to generate features for the spouse relation.
"""
# Create a DDLIB sentence object, which is just a list of DDLIB Word objects
sent = []
for i,t in enumerate(tokens):
sent.append(ddlib.Word(
begin_char_offset=None,
end_char_offset=None,
word=t,
lemma=lemmas[i],
pos=pos_tags[i],
ner=ner_tags[i],
dep_par=dep_parents[i] - 1, # Note that as stored from CoreNLP 0 is ROOT, but for DDLIB -1 is ROOT
dep_label=dep_types[i]))
# Create DDLIB Spans for the two person mentions
p1_span = ddlib.Span(begin_word_id=p1_begin_index, length=(p1_end_index-p1_begin_index+1))
p2_span = ddlib.Span(begin_word_id=p2_begin_index, length=(p2_end_index-p2_begin_index+1))
# Generate the generic features using DDLIB
for feature in ddlib.get_generic_features_relation(sent, p1_span, p2_span):
yield [p1_id, p2_id, feature]