In [7]:
# add parent dir to sys path for import of modules
import os
import sys
parentdir = os.path.abspath(os.path.join(os.path.abspath(''), os.pardir))
sys.path.insert(0, parentdir) 

In [9]:
from PetReader import pet_reader
from petreader import labels
from petreader.labels import *
from utils import ROOT_DIR

## Single Doc

In [5]:
def print_doc_details(doc_name):
    print(" TEXT ".center(100, '-'))
    for i, s in enumerate(pet_reader.get_doc_sentences(doc_name)):
        print(i, ' '.join(s))
    print()


    print(" ACTIVITIES ".center(100, '-'))
    for i, a in enumerate(pet_reader.token_dataset.GetActivities(doc_name)):
        print(i, a)
    print()

    print(" XOR GATEWAYS ".center(100, '-'))
    for i, gs in enumerate(pet_reader.token_dataset.GetXORGateways(doc_name)):
        if gs: 
            print(i, gs)
    print()

    print(" AND GATEWAYS ".center(100, '-'))
    for i, gs in enumerate(pet_reader.token_dataset.GetANDGateways(doc_name)):
        if gs:
            print(i, gs)
    print()

    
    doc_relations = pet_reader.relations_dataset.GetRelations(pet_reader.get_document_number(doc_name))
    doc_flow_relations, doc_same_gateway_relations = doc_relations[labels.FLOW], doc_relations[labels.SAME_GATEWAY]
    print(" SAME GATEWAY RELATION ".center(100, '-'))
    for i, same_gateway_relation in enumerate(doc_same_gateway_relations):
        for key, value in same_gateway_relation.items():
            print(f"{key}: {value}")
        print()
    print(" FLOW RELATION ".center(100, '-'))
    for i, flow_relation in enumerate(doc_flow_relations):
        for key, value in flow_relation.items():
            print(f"{key}: {value}")
        print()

In [17]:
print_doc_details('doc-9.5')

----------------------------------------------- TEXT -----------------------------------------------
0 After the Expense Report is received , a new account must be created if the employee does not already have one .
1 The report is then reviewed for automatic approval .
2 Amounts under $200 are automatically approved , whereas amounts equal to or over $200 require approval of the supervisor .
3 In case of rejection , the employee must receive a rejection notice by email .
4 Otherwise , the reimbursement goes to the employees direct deposit bank account .
5 If the request is not completed in 7 days , then the employee must receive an approval in progress email .
6 If the request is not finished within 30 days , then the process is stopped and the employee receives an email cancellation notice and must re-submit the expense report .

-------------------------------------------- ACTIVITIES --------------------------------------------
0 [['received'], ['created']]
1 [['reviewed']]
2 [['app

## Sentences around keyword

In [35]:
for doc_name in pet_reader.document_names:
    sentences = pet_reader.get_doc_sentences(doc_name)
    
    for i, gs in enumerate(pet_reader.token_dataset.GetANDGateways(doc_name)):
        if gs:
            print(doc_name, i, gs)
    
    
#     for i, s in enumerate(sentences):
#         if 'meantime' in [t.lower() for t in s]:
#             print(f" {doc_name} ".center(100, '-'))
#             print(' '.join(sentences[max(i - 1, 0)]))
#             print(i, ' '.join(s))
#             print(' '.join(sentences[min(i + 1, len(sentences) - 1)]))
            
        

doc-2.2 25 [['At', 'the', 'same', 'time']]
doc-1.1 9 [['In', 'the', 'meantime']]
doc-1.3 6 [['While']]
doc-3.2 2 [['meantime']]
doc-1.4 7 [['While']]
doc-3.5 8 [['Meantime']]
doc-2.1 34 [['two', 'concurrent', 'activities', 'are', 'triggered']]
doc-1.2 3 [['whereas']]


## Contradictory Gateways

In [13]:
same_gateway_relations = []
same_gateway_relations_doc_names = []
for doc_name in pet_reader.document_names:
    doc_relations = pet_reader.relations_dataset.GetRelations(pet_reader.get_document_number(doc_name))
    doc_same_gateway_relations = doc_relations[labels.SAME_GATEWAY]
    print(doc_name, len(doc_same_gateway_relations))
    same_gateway_relations.extend(doc_same_gateway_relations)
    same_gateway_relations_doc_names.extend([doc_name for i in range(len(doc_same_gateway_relations))])

doc-9.5 2
doc-9.1 2
doc-3.8 1
doc-10.4 0
doc-1.4 0
doc-10.1 0
doc-3.6 2
doc-10.12 0
doc-8.3 1
doc-3.1 0
doc-1.1 1
doc-10.7 0
doc-10.14 3
doc-5.3 2
doc-7.1 1
doc-10.13 0
doc-10.6 0
doc-3.2 1
doc-6.1 2
doc-1.3 0
doc-4.1 0
doc-3.5 1
doc-6.2 0
doc-8.1 0
doc-9.3 0
doc-2.1 6
doc-10.5 0
doc-10.3 0
doc-5.4 2
doc-10.10 0
doc-10.9 0
doc-5.2 1
doc-9.4 0
doc-10.11 0
doc-2.2 3
doc-8.2 0
doc-6.3 0
doc-9.2 2
doc-3.7 0
doc-1.2 2
doc-5.1 2
doc-10.8 0
doc-10.2 2
doc-3.3 1
doc-6.4 2


In [43]:
# GOLD PAIRS
contradictory_gateways = [(' '.join([t.lower() for t in sg[labels.SOURCE_ENTITY]]), 
                           ' '.join([t.lower() for t in sg[labels.TARGET_ENTITY]])) for sg in same_gateway_relations]
print(len(contradictory_gateways))
contradictory_gateways = sorted(list(set(contradictory_gateways)), key=lambda pair: pair[0])
print(len(contradictory_gateways))   

for a, b in contradictory_gateways:
    print(a, " | ", b)

42
14
either  |  or
for the case  |  for the case
if  |  otherwise
if  |  if
in case  |  in case
in case  |  otherwise
in case  |  if
in case of  |  otherwise
in case of  |  in case of
in the case of  |  if
otherwise  |  if
should  |  if
sometimes  |  sometimes
under certain circumstances  |  otherwise


In [49]:
 with open(os.path.join(ROOT_DIR, 'data/keywords/contradictory_gateways_gold_new.txt'), 'w') as file:
        for pair in contradictory_gateways:
            file.write("%s\n" % f"{''.join(pair[0])};{''.join(pair[1])}")
        
        

In [39]:
print(len(same_gateway_relations))
for i, same_gateway_relation in enumerate(same_gateway_relations):
        for key, value in same_gateway_relation.items():
            print(f"{key}: {value}")
        print()

42
source-head-sentence-ID: 1
source-head-word-ID: 0
source-entity-type: XOR Gateway
source-entity: ['If']
target-head-sentence-ID: 2
target-head-word-ID: 0
target-entity-type: XOR Gateway
target-entity: ['If']

source-head-sentence-ID: 2
source-head-word-ID: 0
source-entity-type: XOR Gateway
source-entity: ['If']
target-head-sentence-ID: 3
target-head-word-ID: 0
target-entity-type: XOR Gateway
target-entity: ['If']

source-head-sentence-ID: 11
source-head-word-ID: 0
source-entity-type: XOR Gateway
source-entity: ['In', 'case', 'of']
target-head-sentence-ID: 13
target-head-word-ID: 0
target-entity-type: XOR Gateway
target-entity: ['In', 'case', 'of']

source-head-sentence-ID: 17
source-head-word-ID: 0
source-entity-type: XOR Gateway
source-entity: ['In', 'the', 'case', 'of']
target-head-sentence-ID: 19
target-head-word-ID: 0
target-entity-type: XOR Gateway
target-entity: ['If']

source-head-sentence-ID: 20
source-head-word-ID: 19
source-entity-type: XOR Gateway
source-entity: ['either'

In [15]:
# HOW OFTEN > 2 BRANCHES
print(len(len(same_gateway_relations)))
involved_gateways = {}
for sg, doc_name in zip(same_gateway_relations, same_gateway_relations_doc_names):
    g1 = f"{doc_name}-{sg[SOURCE_SENTENCE_ID]}-{sg[SOURCE_HEAD_TOKEN_ID]}-{sg[SOURCE_ENTITY]}"
    g2 = f"{doc_name}-{sg[TARGET_SENTENCE_ID]}-{sg[TARGET_HEAD_TOKEN_ID]}-{sg[TARGET_ENTITY]}"
    if g1 in involved_gateways:
        involved_gateways[g1] += 1
    else:
        involved_gateways[g1] = 1
    if g2 in involved_gateways:
        involved_gateways[g2] += 1
    else:
        involved_gateways[g2] = 1
            
involved_gateways = dict(sorted(involved_gateways.items(), key=lambda item: item[1], reverse=True))
for g, number in involved_gateways.items():
    print(g, number)

doc-9.5-4-0-['Otherwise'] 2
doc-9.1-2-0-['If'] 2
doc-10.14-1-0-['If'] 2
doc-10.14-2-0-['If'] 2
doc-5.3-4-0-['If'] 2
doc-6.1-9-2-['if'] 2
doc-2.1-7-0-['In', 'case'] 2
doc-2.1-22-0-['For', 'the', 'case'] 2
doc-9.2-2-0-['If'] 2
doc-5.1-3-0-['If'] 2
doc-10.2-8-0-['If'] 2
doc-6.4-2-9-['sometimes'] 2
doc-9.5-3-0-['In', 'case', 'of'] 1
doc-9.5-5-0-['If'] 1
doc-9.1-1-0-['If'] 1
doc-9.1-3-0-['If'] 1
doc-3.8-4-0-['If'] 1
doc-3.8-5-0-['Otherwise'] 1
doc-3.6-1-0-['If'] 1
doc-3.6-2-0-['Otherwise'] 1
doc-3.6-5-0-['If'] 1
doc-3.6-6-0-['Otherwise'] 1
doc-8.3-3-0-['Under', 'certain', 'circumstances'] 1
doc-8.3-3-11-['otherwise'] 1
doc-1.1-6-0-['If'] 1
doc-1.1-7-0-['If'] 1
doc-10.14-0-0-['If'] 1
doc-10.14-3-0-['If'] 1
doc-5.3-3-0-['If'] 1
doc-5.3-5-0-['If'] 1
doc-7.1-4-0-['If'] 1
doc-7.1-6-0-['Otherwise'] 1
doc-3.2-1-0-['If'] 1
doc-3.2-1-11-['otherwise'] 1
doc-6.1-6-0-['Should'] 1
doc-6.1-10-22-['if'] 1
doc-3.5-5-0-['If'] 1
doc-3.5-6-0-['Otherwise'] 1
doc-2.1-6-0-['In', 'case'] 1
doc-2.1-7-26-['otherwis

In [25]:
same_xor_gateway_threshold = 1

# DISTANCES BETWEEN RELATED GATEWAYS
sentence_distances_between = []
for sg in same_gateway_relations:
    print(abs(sg[SOURCE_SENTENCE_ID] - sg[TARGET_SENTENCE_ID]), abs(sg[SOURCE_SENTENCE_ID] - sg[TARGET_SENTENCE_ID]) > same_xor_gateway_threshold,
          sg[SOURCE_SENTENCE_ID], sg[TARGET_SENTENCE_ID])
    sentence_distances_between.append(abs(sg[SOURCE_SENTENCE_ID] - sg[TARGET_SENTENCE_ID]))
    

import statistics
print(statistics.mean(sentence_distances_between))
print(statistics.median(sentence_distances_between))
print(sentence_distances_between)

1 False 3 4
1 False 4 5
1 False 1 2
1 False 2 3
1 False 4 5
1 False 1 2
1 False 5 6
0 False 3 3
1 False 6 7
1 False 0 1
1 False 1 2
1 False 2 3
1 False 3 4
1 False 4 5
2 True 4 6
0 False 1 1
3 True 6 9
1 False 9 10
1 False 5 6
1 False 6 7
0 False 7 7
1 False 12 13
1 False 15 16
1 False 21 22
3 True 22 25
1 False 6 7
1 False 9 10
1 False 3 4
2 True 11 13
2 True 17 19
0 False 20 20
1 False 1 2
2 True 2 4
0 False 1 1
0 False 5 5
1 False 2 3
1 False 3 4
1 False 7 8
1 False 8 9
1 False 3 4
0 False 2 2
0 False 2 2
1
1.0
[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 1, 0, 1, 1, 1, 3, 1, 1, 1, 2, 2, 0, 1, 2, 0, 0, 1, 1, 1, 1, 1, 0, 0]
