In [1]:
# add parent dir to sys path for import of modules
import json
import os
import sys
# find recursively the project root dir
parent_dir = str(os.getcwdb())
while not os.path.exists(os.path.join(parent_dir, "README.md")):
    parent_dir = os.path.abspath(os.path.join(parent_dir, os.pardir))
sys.path.insert(0, parent_dir)

In [36]:
import logging
import itertools

from petreader.labels import *

from PetReader import pet_reader


logger = logging.getLogger('Keywords Same Gateway Filtered Approach')

In [22]:
DF = 'directly_following'
EXCLUSIVE = 'exclusive'
CONCURRENT = 'concurrent'

SOURCE = 'source'
TARGET = 'target'

In [90]:
def transform_relations(relations):
    results = [{SOURCE: (r[SOURCE_SENTENCE_ID], r[SOURCE_HEAD_TOKEN_ID], r[SOURCE_ENTITY], r[SOURCE_ENTITY_TYPE]),
                TARGET: (r[TARGET_SENTENCE_ID], r[TARGET_HEAD_TOKEN_ID], r[TARGET_ENTITY], r[TARGET_ENTITY_TYPE])}
               for r in relations]
    return results

def get_linked_activities(gateway, flow_relations):
    return [r[TARGET] for r in flow_relations if r[SOURCE] == gateway and r[TARGET][3] == ACTIVITY]

def get_linked_activities_via_condition(gateway, flow_relations):
    return [[r2[TARGET] for r2 in flow_relations if r2[SOURCE] == r[TARGET] and r2[TARGET][3] == ACTIVITY][0]
            for r in flow_relations if r[SOURCE] == gateway and r[TARGET][3] == CONDITION_SPECIFICATION]

def get_sg_gateways(gateway, sg_relations):
    """
    search for gateways that are related to the given gateway via a same gateway relation
    search is conducted recursively to support multi branch gateways (>2 branches)
    """
    results = []
    for sg in sg_relations:
        if sg[SOURCE] == gateway:
            results.append(sg[TARGET])
            recursive_gateways = get_sg_gateways_recursive(sg[TARGET], sg_relations)
            if recursive_gateways:
                results.extend(recursive_gateways)
    return results


def data_generation():
    
    # data format ? -> (doc_name, (a1), (a2), type, comment)
    # split/merge points are represented as directly follow relations 
    relations = []
    
    for i, doc_name in enumerate(pet_reader.document_names):
        
        if doc_name == 'doc-1.1':
            continue
            
        if doc_name != 'doc-3.2':
            continue
            
        # 1) Search for exlusive relations using XOR gateways
        doc_relations = pet_reader.relations_dataset.GetRelations(pet_reader.get_document_number(doc_name))
        flow_relations = transform_relations(doc_relations[FLOW])
        same_gateway_relations = transform_relations(doc_relations[SAME_GATEWAY])
        
        print(" FLOW RELATIONS ".center(100, '-'))
        
        for i, f in enumerate(flow_relations):
            print("\n")
            print(i, f[SOURCE], f[TARGET])
            
            # a) DIRECTLY FOLLOWING RELATIONS
            if f[SOURCE][3] == f[TARGET][3] == ACTIVITY:
                relations.append((doc_name, f[SOURCE], f[TARGET], DF, "normal df"))
                
            # b) GATEWAY RELATIONS
            if f[SOURCE][3] == ACTIVITY and f[TARGET][3] in [XOR_GATEWAY, AND_GATEWAY]:
                gateway = f[TARGET]
                
                # extract activities to which the gateway refers
                
                
                # - 1) in case of direct activity link without conditon and same gateway
                # cases: exlusive 'or' gateways || parallel gateways
                linked_activities = get_linked_activities(gateway, flow_relations)
                # add relations of activities before to gateway activities via DF
                for a in linked_activities:
                    relations.append((doc_name, f[SOURCE], a, DF, "g -> a")) 
                # add relations between gateway activities
                for a1, a2 in itertools.combinations(linked_activities, 2):
                    relations.append((doc_name, a1, a2, EXCLUSIVE if gateway[3] == XOR_GATEWAY else CONCURRENT, "branches"))
                
                
                # - 2) in case of indirect link via condition specification and same gateway relations
                whole_gateway_activities = []
                condition_spec_linked = get_linked_activities_via_condition(gateway, flow_relations)
                print(gateway, condition_spec_linked)
                for a in condition_spec_linked:
                    if a:
                        relations.append((doc_name, f[SOURCE], a, DF, "g -> cond -> a")) 
                        whole_gateway_activities.append(a)
                    else:
                        print("&&&&&&&& ERROR")
                
                # detect same gateways and repeat procedure for them
                sg_gateways = get_sg_gateways(gateway, same_gateway_relations)
                for sg_gateway in sg_gateways:
                    print("same gateway", sg_gateway)
                    # directly linked
                    sg_linked_activities = get_linked_activities(sg_gateway, flow_relations)
                    print(sg_linked_activities)
                    for a in sg_linked_activities:
                        relations.append((doc_name, f[SOURCE], a, DF, "g -> sg -> a"))
                        whole_gateway_activities.append(a)
                    # linked via condition
                    sg_gateway_condition_spec_linked = get_linked_activities_via_condition(sg_gateway, flow_relations)
                    for a in sg_gateway_condition_spec_linked:
                        relations.append((doc_name, f[SOURCE], a, DF, "g -> sg -> cond -> a"))
                        whole_gateway_activities.append(a)
                        
                # create relations between activities of different branches
                for a1, a2 in itertools.combinations(whole_gateway_activities, 2):
                    relations.append((doc_name, a1, a2, EXCLUSIVE if gateway[3] == XOR_GATEWAY else CONCURRENT, "branches"))
                
                # TODO: cases where gateway is at the start of the document
                # TODO: one branch gateways
                
            
        print(" SAME GATEWAY RELATIONS ".center(100, '-'))
        for sg in same_gateway_relations:
            print(sg[SOURCE][2], sg[TARGET][2])

    # filter duplicates & sort
    relations_final = []
    for r in relations:
        if r not in relations_final:
            relations_final.append(r)
    relations_final.sort(key=lambda r: (r[1][0], r[1][1]))

    return relations_final
            


    
activity_relations = data_generation()
print(" RESULTS ".center(100, '-'))
for relation in activity_relations:
    print(relation)

------------------------------------------ FLOW RELATIONS ------------------------------------------


0 (0, 14, ['checked'], 'Activity') (1, 0, ['If'], 'XOR Gateway')
(1, 0, ['If'], 'XOR Gateway') [(1, 9, ['initiated'], 'Activity')]
same gateway (1, 11, ['otherwise'], 'XOR Gateway')
[(1, 17, ['tracked'], 'Activity')]


1 (1, 0, ['If'], 'XOR Gateway') (1, 1, ['some', 'files', 'are', 'missing'], 'Condition Specification')


2 (1, 1, ['some', 'files', 'are', 'missing'], 'Condition Specification') (1, 9, ['initiated'], 'Activity')


3 (1, 9, ['initiated'], 'Activity') (2, 15, ['meantime'], 'AND Gateway')
(2, 15, ['meantime'], 'AND Gateway') []


4 (1, 11, ['otherwise'], 'XOR Gateway') (1, 17, ['tracked'], 'Activity')


5 (1, 17, ['tracked'], 'Activity') (2, 15, ['meantime'], 'AND Gateway')
(2, 15, ['meantime'], 'AND Gateway') []


6 (2, 9, ['handed'], 'Activity') (3, 6, ['conducted'], 'Activity')


7 (2, 15, ['meantime'], 'AND Gateway') (2, 9, ['handed'], 'Activity')


8 (2, 15, ['meantim