Skip to content

Commit

Permalink
implement StubRelationExtractor that just uses an edge_generator — MU…
Browse files Browse the repository at this point in the history
…CH SIMPLER 😀🎉 — Rostlab/LocText#6
  • Loading branch information
juanmirocks committed Nov 14, 2016
1 parent 1779761 commit 8a2e857
Showing 1 changed file with 33 additions and 14 deletions.
47 changes: 33 additions & 14 deletions nalaf/learning/taggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from nalaf.structures.data import Relation
from nalaf.preprocessing.spliters import NLTKSplitter
from nalaf.preprocessing.tokenizers import NLTK_TOKENIZER
from nalaf.preprocessing.edges import SentenceDistanceEdgeGenerator


class Annotator(object):
Expand Down Expand Up @@ -120,13 +121,20 @@ def annotate(self, dataset):
pass


class StubSameSentenceRelationExtractor(RelationExtractor):
# TODO reuse distance-sentence edge generator _and then_ make all those edges a relation
class StubRelationExtractor(RelationExtractor):
"""
Stub RelationExtractor to mark as a true relationship all edges generated by the given `edge_generator`.
See `StubSameSentenceRelationExtractor`, which is mere sugar code to use the edge generator that generates
all edges (and therefore relationships) between all pairs of class-chosen entities contained in the same sentence.
"""

def __init__(self, edge_generator):
super().__init__(edge_generator.entity1_class, edge_generator.entity2_class, edge_generator.relation_type)

def __init__(self, entity1_class, entity2_class, relation_type):
super().__init__(entity1_class, entity2_class, relation_type)
self.sentence_splitter = NLTKSplitter()
self.tokenizer = NLTK_TOKENIZER
self.edge_generator = edge_generator


def tag(self, dataset):
Expand All @@ -136,20 +144,31 @@ def tag(self, dataset):


def annotate(self, dataset):
from itertools import product

self.sentence_splitter.split(dataset)
self.tokenizer.tokenize(dataset)
self.edge_generator.generate(dataset)

for document in dataset:
for part in document:
for ann_1, ann_2 in product(
(a for a in part.annotations if a.class_id == self.entity1_class),
(a for a in part.annotations if a.class_id == self.entity2_class)):
for edge in dataset.edges():
edge.target = +1

if part.get_sentence_index_for_annotation(ann_1) == part.get_sentence_index_for_annotation(ann_2):
rel = Relation(self.relation_type, ann_1, ann_2)
part.predicted_relations.append(rel)
dataset.form_predicted_relations()


class StubSameSentenceRelationExtractor(StubRelationExtractor):

def __init__(self, entity1_class, entity2_class, relation_type):
edge_generator = SentenceDistanceEdgeGenerator(entity1_class, entity2_class, relation_type, distance=0)
super().__init__(edge_generator)


def tag(self, dataset):
import warnings
warnings.warn('Use the method: annotate', DeprecationWarning)
return self.annotate(dataset)


def annotate(self, dataset):
super().annotate(dataset)


class StubSameDocumentPartRelationExtractor(RelationExtractor):
Expand Down

0 comments on commit 8a2e857

Please sign in to comment.