In [1]:
import os
print(os.getcwd())
# Please change the root to an absolute or relative path to DomiKnowS root.
# In case relative path is used, consider the printed `CWD` as current working directory.
root = '/home/hfaghihi/Framework/DomiKnowS'

/home/hfaghihi/Framework/DomiKnowS/examples/ACE05


In [2]:
import sys
sys.path.append(root)

In [3]:
from typing import Any
from regr.sensor.pytorch.sensors import ReaderSensor, ConstantSensor, FunctionalSensor, FunctionalReaderSensor, TorchEdgeSensor


class MultiLevelReaderSensor(ConstantSensor):
    def __init__(self, *pres, keyword=None, edges=None, label=False, device='auto'):
        super().__init__(*pres, data=None, edges=edges, label=label, device=device)
        self.keyword = keyword

    def fill_data(self, data_item):
        try:
            if isinstance(self.keyword, tuple):
                self.data = (self.fetch_key(data_item, keyword) for keyword in self.keyword)
            else:
                self.data = self.fetch_key(data_item, self.keyword)
        except KeyError as e:
            raise KeyError("The key you requested from the reader doesn't exist: %s" % str(e))

    def fetch_key(self, data_item, key):
        data = []
        if "." in key:
            keys = key.split(".")
            items = data_item
            loop = 0
            direct_loop = True
            for key in keys:
                if key == "*":
                    loop += 1
                    if loop == 1:
                        keys = items.keys()
                        items = [items[key] for key in keys]
                    if loop > 1:
                        keys = [item.keys() for item in items]
                        new_items = []
                        for index, item in enumerate(items):
                            for index1, key in enumerate(keys[index]):
                                new_items.append(item[key])
                        items = new_items
                else:
                    if loop == 0:
                        items = items[key]
                    if loop > 0:
                        items = [it[key] for it in items]
            data = items
        else:
            data = data_item[key]

        return data
        
        
    def forward(self, *_) -> Any:
        if isinstance(self.keyword, tuple) and isinstance(self.data, tuple):
            return (super().forward(data) for data in self.data)
        else:
            return super().forward(self.data)

Log file for dataNode is in: /home/hfaghihi/Framework/DomiKnowS/examples/ACE05/datanode.log


In [4]:
import torch

from regr.program import POIProgram
from regr.sensor.pytorch.sensors import ReaderSensor, ConstantSensor, FunctionalSensor, FunctionalReaderSensor, TorchEdgeSensor
from regr.sensor.pytorch.learners import ModuleLearner
from regr.sensor.pytorch.query_sensor import CandidateSensor, CandidateRelationSensor

from sensors.tokenizers import TokenizerEdgeSensor
from models import Tokenizer, BERT, SpanClassifier, cartesian_concat, token_to_span_candidate, span_candidate_emb, span_label, span_emb, find_is_a


def model(graph):
    graph.detach()

    ling_graph = graph['linguistic']
    ace05_graph = graph['ACE05']
    entities_graph = ace05_graph['Entities']
    relations_graph = ace05_graph['Relations']
    events_graph = ace05_graph['Events']

    document = ling_graph['document']
    token = ling_graph['token']
    span_candidate = ling_graph['span_candidate']
    span_annotation = ling_graph['span_annotation']
    span = ling_graph['span']
    document_contains_token = document.relate_to(token)[0]
    span_contains_token = span.relate_to(token)[0]
    span_is_span_candidate = span.relate_to(span_candidate)[0]

    document['index'] = ReaderSensor(keyword='text')
    document_contains_token['forward'] = TokenizerEdgeSensor('index', mode='forward', to=('index', 'ids', 'offset'), tokenizer=Tokenizer())
    token['emb'] = ModuleLearner('ids', module=BERT())

    span_annotation['extent'] = MultiLevelReaderSensor(keyword="spans.*.mentions.*.extent.start")
    span_annotation['extent'] = ConstantSensor(data=["a", "b", "c", "d"])

    program = POIProgram(graph, poi=(token, span_candidate, span,))

    return program


In [7]:
from ace05.reader import Reader, DictReader
import config

sensor = MultiLevelReaderSensor(keyword="spans.*.mentions.*.head.start")
sensor1 = MultiLevelReaderSensor(keyword="spans.*.mentions.*.head.end")
traint_reader = DictReader(config.path, list_path=config.list_path, type='train', status=config.status)
sensor.fill_data(next(iter(traint_reader)))
sensor1.fill_data(next(iter(traint_reader)))
print(sensor.data)
print(sensor1.data)

[652, 836, 660, 110, 782, 870, 910, 122, 178, 266, 315, 417, 429, 608, 682, 146, 162, 727, 769, 791, 215, 295, 632, 81, 228, 323, 339, 407, 501, 40, 89]
[658, 856, 680, 113, 785, 872, 913, 124, 188, 268, 318, 420, 431, 610, 684, 157, 171, 731, 774, 795, 225, 302, 637, 86, 229, 328, 349, 411, 509, 59, 108]
