implement StubRelationExtractor that just uses an edge_generator — MU…

…CH SIMPLER 😀🎉 — Rostlab/LocText#6
Rostlab · Nov 14, 2016 · 8a2e857 · 8a2e857
1 parent 1779761
commit 8a2e857
Showing 1 changed file with 33 additions and 14 deletions.
diff --git a/nalaf/learning/taggers.py b/nalaf/learning/taggers.py
@@ -2,6 +2,7 @@
 from nalaf.structures.data import Relation
 from nalaf.preprocessing.spliters import NLTKSplitter
 from nalaf.preprocessing.tokenizers import NLTK_TOKENIZER
+from nalaf.preprocessing.edges import SentenceDistanceEdgeGenerator
 
 
 class Annotator(object):
@@ -120,13 +121,20 @@ def annotate(self, dataset):
         pass
 
 
-class StubSameSentenceRelationExtractor(RelationExtractor):
-    # TODO reuse distance-sentence edge generator _and then_ make all those edges a relation
+class StubRelationExtractor(RelationExtractor):
+    """
+    Stub RelationExtractor to mark as a true relationship all edges generated by the given `edge_generator`.
+
+    See `StubSameSentenceRelationExtractor`, which is mere sugar code to use the edge generator that generates
+    all edges (and therefore relationships) between all pairs of class-chosen entities contained in the same sentence.
+    """
+
+    def __init__(self, edge_generator):
+        super().__init__(edge_generator.entity1_class, edge_generator.entity2_class, edge_generator.relation_type)
 
-    def __init__(self, entity1_class, entity2_class, relation_type):
-        super().__init__(entity1_class, entity2_class, relation_type)
         self.sentence_splitter = NLTKSplitter()
         self.tokenizer = NLTK_TOKENIZER
+        self.edge_generator = edge_generator
 
 
     def tag(self, dataset):
@@ -136,20 +144,31 @@ def tag(self, dataset):
 
 
     def annotate(self, dataset):
-        from itertools import product
-
         self.sentence_splitter.split(dataset)
         self.tokenizer.tokenize(dataset)
+        self.edge_generator.generate(dataset)
 
-        for document in dataset:
-            for part in document:
-                for ann_1, ann_2 in product(
-                    (a for a in part.annotations if a.class_id == self.entity1_class),
-                    (a for a in part.annotations if a.class_id == self.entity2_class)):
+        for edge in dataset.edges():
+            edge.target = +1
 
-                    if part.get_sentence_index_for_annotation(ann_1) == part.get_sentence_index_for_annotation(ann_2):
-                        rel = Relation(self.relation_type, ann_1, ann_2)
-                        part.predicted_relations.append(rel)
+        dataset.form_predicted_relations()
+
+
+class StubSameSentenceRelationExtractor(StubRelationExtractor):
+
+    def __init__(self, entity1_class, entity2_class, relation_type):
+        edge_generator = SentenceDistanceEdgeGenerator(entity1_class, entity2_class, relation_type, distance=0)
+        super().__init__(edge_generator)
+
+
+    def tag(self, dataset):
+        import warnings
+        warnings.warn('Use the method: annotate', DeprecationWarning)
+        return self.annotate(dataset)
+
+
+    def annotate(self, dataset):
+        super().annotate(dataset)
 
 
 class StubSameDocumentPartRelationExtractor(RelationExtractor):