# Key Word Approach
variables with prefix ``doc_`` contain data from the dataset
variables with prefix ``o_`` contain data from own computations

In [1]:
import itertools

from petreader.RelationsExtraction import RelationsExtraction
from petreader.TokenClassification import TokenClassification
from petreader import labels

In [2]:
relations_dataset = RelationsExtraction()
token_dataset = TokenClassification()

Reusing dataset pet (C:\Users\janek\.cache\huggingface\datasets\patriziobellan___pet\relations-extraction\1.0.1\38434e2af57af533c400c8975f37e43c08bb77739085a3c026a862b2efb668d2)




 _______ _     _ _______       _____  _______ _______      ______  _______ _______ _______ _______ _______ _______
    |    |_____| |______      |_____] |______    |         |     \ |_____|    |    |_____| |______ |______    |   
    |    |     | |______      |       |______    |         |_____/ |     |    |    |     | ______| |______    |   
                                                                                                                  
Discover more at: [https://pdi.fbk.eu/pet-dataset/]



  0%|          | 0/1 [00:00<?, ?it/s]

Reusing dataset pet (C:\Users\janek\.cache\huggingface\datasets\patriziobellan___pet\token-classification\1.0.1\38434e2af57af533c400c8975f37e43c08bb77739085a3c026a862b2efb668d2)




 _______ _     _ _______       _____  _______ _______      ______  _______ _______ _______ _______ _______ _______
    |    |_____| |______      |_____] |______    |         |     \ |_____|    |    |_____| |______ |______    |   
    |    |     | |______      |       |______    |         |_____/ |     |    |    |     | ______| |______    |   
                                                                                                                  
Discover more at: [https://pdi.fbk.eu/pet-dataset/]



  0%|          | 0/1 [00:00<?, ?it/s]

## 1 Prepare Document Data

### 1.1 Read Example Doc

In [3]:
doc_id = 0
doc_name = token_dataset.GetDocumentName(doc_id)
print(f"  {doc_name}  ".center(50, '*'))
doc_text = relations_dataset.GetDocument(doc_id)
print(doc_text)
doc_activities = token_dataset.GetDocumentActivities(doc_name)

print(" activities and NER labels (per sentences) ".center(50, '*'))
print(doc_activities)
doc_sentence_ner_labels = relations_dataset.GetSentencesWithIdsAndNerTagLabels(doc_id)
print(doc_sentence_ner_labels[:4])
doc_relations = relations_dataset.GetRelations(doc_id)
doc_flow_relations, doc_same_gateway_relations = doc_relations[labels.FLOW], doc_relations[labels.SAME_GATEWAY]

print(" same gateway relations ".center(50, '*'))
for same_gateway_relation in doc_same_gateway_relations:
    for key, value in same_gateway_relation.items():
        print(f"{key}: {value}")
    print()

*******************  doc-1.1  ********************
A small company manufactures customized bicycles . Whenever the sales department receives an order , a new process instance is created . A member of the sales department can then reject or accept the order for a customized bike . In the former case , the process instance is finished . In the latter case , the storehouse and the engineering department are informed . The storehouse immediately processes the part list of the order and checks the required quantity of each part . If the part is available in-house , it is reserved . If it is not available , it is back-ordered . This procedure is repeated for each item on the part list . In the meantime , the engineering department prepares everything for the assembling of the ordered bicycle . If the storehouse has successfully reserved or back-ordered every item of the part list and the preparation activity has finished , the engineering department assembles the bicycle . Afterwards , the s

### 1.2 Preprocess sentences

In [4]:
num_sentences = len(doc_activities) # activities is 2 dim list (one per sentence)
print(num_sentences)
doc_sentences_raw = [sentence.strip() for sentence in doc_text.split(".") if sentence.strip() != ""]
for s in doc_sentences_raw:
    print(s)
assert num_sentences == len(doc_sentences_raw)  # check if number of extracted sentences == from dataset 

12
A small company manufactures customized bicycles
Whenever the sales department receives an order , a new process instance is created
A member of the sales department can then reject or accept the order for a customized bike
In the former case , the process instance is finished
In the latter case , the storehouse and the engineering department are informed
The storehouse immediately processes the part list of the order and checks the required quantity of each part
If the part is available in-house , it is reserved
If it is not available , it is back-ordered
This procedure is repeated for each item on the part list
In the meantime , the engineering department prepares everything for the assembling of the ordered bicycle
If the storehouse has successfully reserved or back-ordered every item of the part list and the preparation activity has finished , the engineering department assembles the bicycle
Afterwards , the sales department ships the bicycle to the customer and finishes the pro

### 1.3 Filter Gateways Tokens

In [5]:
def filter_ner_labels(sentence_ner_labels, target_label):
    return [[token for token in s_list if target_label in token[2]]
                        for s_list in sentence_ner_labels]

doc_xor_gateway = filter_ner_labels(doc_sentence_ner_labels, labels.XOR_GATEWAY)
doc_and_gateway = filter_ner_labels(doc_sentence_ner_labels, labels.AND_GATEWAY)
print(doc_xor_gateway)
print(doc_and_gateway)

[[], [], [('or', 9, 'B-XOR Gateway')], [], [], [], [('If', 0, 'B-XOR Gateway')], [('If', 0, 'B-XOR Gateway')], [], [], [('If', 0, 'B-XOR Gateway')], []]
[[], [], [], [], [], [], [], [], [], [('In', 0, 'B-AND Gateway'), ('the', 1, 'I-AND Gateway'), ('meantime', 2, 'I-AND Gateway')], [], []]


### 1.4 Filter Sequence Flows

In [6]:
def filter_flow_relations(flow_relations, entity_type_list):
    """
    filter list of flow relations (single dictionaries) for source or target entity type = given entity tyoe
    """
    return [flow_relation for flow_relation in flow_relations if flow_relation[labels.SOURCE_ENTITY_TYPE] in entity_type_list
                                                               or flow_relation[labels.TARGET_ENTITY_TYPE] in entity_type_list]

doc_flow_relations_xor = filter_flow_relations(doc_flow_relations, [labels.XOR_GATEWAY])
doc_flow_relations_and = filter_flow_relations(doc_flow_relations, [labels.AND_GATEWAY])
doc_flow_relations_gateways = filter_flow_relations(doc_flow_relations, [labels.XOR_GATEWAY, labels.AND_GATEWAY, labels.CONDITION_SPECIFICATION])
print(f"Flow relations involving XOR gateways {len(doc_flow_relations_xor)}; "\
      f"AND gateways {len(doc_flow_relations_and)}; overall gateways {len(doc_flow_relations_gateways)}; overall {len(doc_flow_relations)}")

Flow relations involving XOR gateways 10; AND gateways 3; overall gateways 16; overall 19


### 1.5 Enrich activities with token index

In [7]:
doc_activity_tokens = []
for i, (tokens, activities) in enumerate(zip(doc_sentence_ner_labels, doc_activities)):
    sentence_activity_tokens = []
    # note: activity is a list because it could consist of more words (neglect here)
    for activity in activities:
        activity_token_triple = [token_triple for token_triple in tokens if token_triple[0] == activity[0]][0]
        sentence_activity_tokens.append((activity, activity_token_triple[1]))
    doc_activity_tokens.append(sentence_activity_tokens)

## 2 Extract Gateways

### 2.1 Key Word List
#### A) take words from all existing gateways in PET dataset as gold list for detection

In [8]:
def get_gateway_key_words(dataset_gateway_list):
    flattened = list(itertools.chain(*dataset_gateway_list))
    phrases = [" ".join(g).lower() for g in flattened]  # join phrases together if multiple words
    unique = list(set(phrases))
    unique.sort()
    return unique

xor_key_words_gold = get_gateway_key_words(token_dataset.GetXORGateways())
and_key_words_gold = get_gateway_key_words(token_dataset.GetANDGateways())

print(f"XOR gold ({len(xor_key_words_gold)})", xor_key_words_gold)
print(f"AND gold ({len(and_key_words_gold)})", and_key_words_gold)

XOR gold (15) ['either', 'for', 'for each patient for which', 'for the case', 'if', 'in case', 'in case of', 'in the case of', 'it can also happen that', 'or', 'otherwise', 'should', 'sometimes', 'under certain circumstances', 'whereas']
AND gold (6) ['at the same time', 'in the meantime', 'meantime', 'two concurrent activities are triggered', 'whereas', 'while']


#### B) Curated List from Literature

In [9]:
# Ferreira et al. 2017
with open('data/keywords/literature_xor.txt') as f:
    xor_key_words_literature = f.read().splitlines()
    xor_key_words_literature.sort()

with open('data/keywords/literature_and.txt') as f:
    and_key_words_literature = f.read().splitlines()
    and_key_words_literature.sort()

print(f"XOR literature ({len(xor_key_words_literature)})", xor_key_words_literature)
print(f"AND literature ({len(and_key_words_literature)})", and_key_words_literature)

XOR literature (14) ['either', 'if', 'if not', 'in case', 'in case of', 'only', 'only if', 'or', 'otherwise', 'till', 'unless', 'until', 'when', 'whether']
AND literature (11) ['at the same time', 'concurrently', 'in addition to', 'in parallel', 'in parallel with this', 'in the meantime', 'meantime', 'meanwhile', 'simultaneously', 'whereas', 'while']


In [10]:
for s in doc_sentences_raw:
    print(s)

A small company manufactures customized bicycles
Whenever the sales department receives an order , a new process instance is created
A member of the sales department can then reject or accept the order for a customized bike
In the former case , the process instance is finished
In the latter case , the storehouse and the engineering department are informed
The storehouse immediately processes the part list of the order and checks the required quantity of each part
If the part is available in-house , it is reserved
If it is not available , it is back-ordered
This procedure is repeated for each item on the part list
In the meantime , the engineering department prepares everything for the assembling of the ordered bicycle
If the storehouse has successfully reserved or back-ordered every item of the part list and the preparation activity has finished , the engineering department assembles the bicycle
Afterwards , the sales department ships the bicycle to the customer and finishes the proces

### 2.2 Extraction Algorithm

In [11]:
def extract_gateways(sentence_list, key_words, target_gateway_label):
    """
    extracts gateways in a key-word-based manner given a document structured in a list of sentences
    if two phrases would match to a token (e.g. 'in the meantime' and 'meantime'), the longer phrase is extracted
    target_gateway_label: str, must be 'XOR Gateway' or 'AND Gateway'
    
    result list is two dimensional -> list of tuples (word, position in sentence, tag) for each sentence
    this produces the same structure as sentences and their NER labels are annotated in PET dataset
    """
    gateways = []
    # sort key words descending by length of words in phrase
    key_words.sort(key=lambda key_word_phrase: len(key_word_phrase.split(" ")), reverse=True)

    # 1) extract gateways
    for s_idx, sentence in enumerate(sentence_list):
        # print(f" SENTENCE {s_idx} ".center(50, '-'))
        # print(sentence_list[s_idx])
        sentence_gateways = []
        sentence_to_search = f" {sentence.lower()} "  # lowercase and wrap with spaces for search of key words
        tokens = sentence.split(" ")
        tokens_lower = sentence.lower().split(" ")
        tokens_already_matched_with_key_phrase = []

        # iterate over key phrases
        for key_phrase in key_words:
            key_phrase_to_search = f" {key_phrase} "

            # if key phrase is in sentence, search index and extract
            if key_phrase_to_search in sentence_to_search:
                key_phrase_tokens = key_phrase.split(" ")
                
                # check key phrase for every token
                for t_idx, token in enumerate(tokens_lower):
                    candidate = True
                    # iterate over key phrase tokens in case of multiple world phrase
                    for key_phrase_token_idx, key_phrase_token in enumerate(key_phrase_tokens):
                        # check if token is not part of key phrase or token is already matched with another phrase
                        # if yes, stop processing candidate
                        if not tokens_lower[t_idx + key_phrase_token_idx] == key_phrase_token or \
                            t_idx + key_phrase_token_idx in tokens_already_matched_with_key_phrase:
                            candidate = False
                            break
                    
                    # add tokens to result only if all tokens are matched and not already part of a longer phrase
                    if candidate:
                        for i, key_phrase_token in enumerate(key_phrase_tokens):
                            prefix = "B" if i == 0 else "I"
                            # append tuples with extract information as in PET and process information about gateways to filter later
                            sentence_gateways.append((tokens[t_idx + i], t_idx + i, f"{prefix}-{labels.XOR_GATEWAY}"))
                            tokens_already_matched_with_key_phrase.append(t_idx + i)

        sentence_gateways.sort(key=lambda gateway_triple: gateway_triple[1])
        gateways.append(sentence_gateways)

    return gateways

In [12]:
# available key word lists: xor_key_words_gold, and_key_words_gold, xor_key_words_literature, and_key_words_literature
o_xor_gateways = extract_gateways(doc_sentences_raw, xor_key_words_gold, labels.XOR_GATEWAY)
o_and_gateways = extract_gateways(doc_sentences_raw, and_key_words_gold, labels.AND_GATEWAY)

print("XOR GATEWAYS")
for idx, sentence_gateways in enumerate(o_xor_gateways):
    print(idx, sentence_gateways)
print("\nAND GATEWAYS")
for idx, sentence_gateways in enumerate(o_and_gateways):
    print(idx, sentence_gateways)

XOR GATEWAYS
0 []
1 []
2 [('or', 9, 'B-XOR Gateway'), ('for', 13, 'B-XOR Gateway')]
3 []
4 []
5 []
6 [('If', 0, 'B-XOR Gateway')]
7 [('If', 0, 'B-XOR Gateway')]
8 [('for', 4, 'B-XOR Gateway')]
9 [('for', 9, 'B-XOR Gateway')]
10 [('If', 0, 'B-XOR Gateway'), ('or', 6, 'B-XOR Gateway')]
11 []

AND GATEWAYS
0 []
1 []
2 []
3 []
4 []
5 []
6 []
7 []
8 []
9 [('In', 0, 'B-XOR Gateway'), ('the', 1, 'I-XOR Gateway'), ('meantime', 2, 'I-XOR Gateway')]
10 []
11 []


## 3 Extract Control Flows

### 3.1 Helper Methods

In [13]:
def get_flow_relation_representation(sentence_idx, token_idx, entity_type, entity, source=True):
    if source:
        return {
            labels.SOURCE_SENTENCE_ID: sentence_idx,
            labels.SOURCE_HEAD_TOKEN_ID: token_idx,
            labels.SOURCE_ENTITY_TYPE: entity_type,
            labels.SOURCE_ENTITY: entity
        }
    else:
        return {
            labels.TARGET_SENTENCE_ID: sentence_idx,
            labels.TARGET_HEAD_TOKEN_ID: token_idx,
            labels.TARGET_ENTITY_TYPE: entity_type,
            labels.TARGET_ENTITY: entity
        }
        

def merge_source_target_dicts(source_dict, target_dict):
    return {**source_dict, **target_dict}

### 3.2 Involving AND gateways

In [14]:
# 1) METHODS FOR EXTRACTING THE PREVIOUS (INCL. SECOND PREVIOUS) AND NEXT ACTIVITY

def get_previous_activity(sentence_idx, token_idx, doc_activity_tokens, skip_first=False, one_already_found=False):
    """
    search recursive for the second last previous activity from a start point defined by sentence_idx and token_idx
    sentence_idx: sentence index where to start the search
    token_idx: token index where to stat the search
    doc_activity_tokens: list of activity lists (describes whole document)
    skip_first: True if searching for the second previous activity, False (default) when searching for the previous activity
    one_already_found: flag if one activity was already found and skipped for return in course of search for the second previous
    
    returns: triple of (sentence idx, token_idx, token)
    """
    # search for activities left to the token in target sentence if token is given else in the whole
    if token_idx is not None:
        previous_activities_sentence = [a_t for a_t in doc_activity_tokens[sentence_idx] if a_t[1] < token_idx]
    else:
        previous_activities_sentence = [a_t for a_t in doc_activity_tokens[sentence_idx]]
    
    if previous_activities_sentence:
        # return when just searching the first last activity OR when one was already found before
        previous_activity = previous_activities_sentence[-1]
        # A) base case: activity found
        if not skip_first or one_already_found:
            return (sentence_idx, previous_activity[1], previous_activity[0])
        # B) recursive case: continue search for second previous activity at index of previous activity
        else:
            return get_previous_activity(sentence_idx, previous_activity[1], doc_activity_tokens, one_already_found=True)
    # B) recursive case: continue search for previous activity in previous sentence
    else:
        next_sentence_idx = sentence_idx - 1
        # no sentences any more to search
        if next_sentence_idx == -1:
            return None
        # otherwise search recursively the previous sentence
        else:
            return get_previous_activity(next_sentence_idx, None, doc_activity_tokens, 
                                         skip_first=skip_first, one_already_found=one_already_found)

def get_following_activity(sentence_idx, token_idx, doc_activity_tokens):
    """
    search recursive for the next following activity from a start point defined by sentence_idx and token_idx
    sentence_idx: sentence index where to start the search
    token_idx: token index where to stat the search
    doc_activity_tokens: list of activity lists (describes whole document)
    
    returns: triple of (sentence idx, token_idx, token)
    """
    # search for activities right to the token in target sentence if token is given else in the whole
    if token_idx is not None:
        following_activities_sentence = [a_t for a_t in doc_activity_tokens[sentence_idx] if a_t[1] > token_idx]
    else:
        following_activities_sentence = [a_t for a_t in doc_activity_tokens[sentence_idx]]
    
    # if activities were found, take the last one
    if following_activities_sentence:
        a_t = following_activities_sentence[-1]
        return (sentence_idx, a_t[1], a_t[0])
    else:
        next_sentence_idx = sentence_idx + 1
        # no sentences any more to search
        if next_sentence_idx == len(doc_activity_tokens):
            return None
        # otherwise search recursively the following sentence
        else:
            return get_following_activity(next_sentence_idx, None, doc_activity_tokens)

        
# 2) EXTRACT RELATIONS
def extract_and_flow_relations(sentences, doc_activity_tokens, own_gateways):
    """
    extract flow relations for already found AND gateways following the logic:
    + for every gateway, to extract parallel branches, add relation to next activity after and before, because
      thats the pattern how AND key phrases are usually used (oriented by rules of Ferreira et al. 2017)
    + for each case, check over borders if not found in same sentence
    + to extract the flow relation that points to the gateway merge point, take the second before
    + Assumption: only one parallel gateway per sentence

    sentences: list of sentences (used only for debugging)
    doc_activity_tokens: list of activity tokens (word, idx) for each sentence
    own_gateways: list of own extracted gateway for each sentence
    
    return: list of flow relations in source/target dict representation
    """
    relations = []
    
    for s_idx, (sentence, activity_tokens, gateways) in enumerate(zip(sentences, doc_activity_tokens, own_gateways)):
        if gateways:
            # assume only one gateway
            gateway_lead_token = gateways[0]
            gateway_entity = [g[0] for g in gateways]

            # 1) Find related activities (previous and following are concurrent activities; second previous the one before the gateway)
            previous_activity = get_previous_activity(s_idx, gateway_lead_token[1], doc_activity_tokens)
            second_previous_activity = get_previous_activity(s_idx, gateway_lead_token[1], doc_activity_tokens, skip_first=True)
            following_activity = get_following_activity(s_idx, gateway_lead_token[1], doc_activity_tokens)

            # 2) Get representations for flow object dictionaries
            gateway_source_rep = get_flow_relation_representation(s_idx, gateway_lead_token[1], labels.AND_GATEWAY, 
                                                                  entity=gateway_entity, source=True)
            gateway_target_rep = get_flow_relation_representation(s_idx, gateway_lead_token[1], labels.AND_GATEWAY, 
                                                                  entity=gateway_entity, source=False)

            previous_activity_target_rep = get_flow_relation_representation(previous_activity[0], previous_activity[1], 
                                                                            labels.ACTIVITY, previous_activity[2], source=False)
            second_previous_activity_target_rep = get_flow_relation_representation(second_previous_activity[0], second_previous_activity[1], 
                                                                                   labels.ACTIVITY, second_previous_activity[2], source=True)
            following_activity_target_rep = get_flow_relation_representation(following_activity[0], following_activity[1], 
                                                                             labels.ACTIVITY, following_activity[2], source=False)

            # 3) Create relations (second previous -> gateway; gateway -> previous; gateway -> following)
            relations.append(merge_source_target_dicts(second_previous_activity_target_rep, gateway_target_rep))
            relations.append(merge_source_target_dicts(gateway_source_rep, previous_activity_target_rep))
            relations.append(merge_source_target_dicts(gateway_source_rep, following_activity_target_rep))
    
    return relations

o_flow_relations_and = extract_and_flow_relations(doc_sentences_raw, doc_activity_tokens, o_and_gateways)

In [15]:
for i, flow_relation in enumerate(o_flow_relations_and):
    for key, value in flow_relation.items():
        print(f"{key}: {value}")
    print()

source-head-sentence-ID: 6
source-head-word-ID: 9
source-entity-type: Activity
source-entity: ['reserved']
target-head-sentence-ID: 9
target-head-word-ID: 0
target-entity-type: AND Gateway
target-entity: ['In', 'the', 'meantime']

source-head-sentence-ID: 9
source-head-word-ID: 0
source-entity-type: AND Gateway
source-entity: ['In', 'the', 'meantime']
target-head-sentence-ID: 7
target-head-word-ID: 8
target-entity-type: Activity
target-entity: ['back-ordered']

source-head-sentence-ID: 9
source-head-word-ID: 0
source-entity-type: AND Gateway
source-entity: ['In', 'the', 'meantime']
target-head-sentence-ID: 9
target-head-word-ID: 7
target-entity-type: Activity
target-entity: ['prepares']



### 3.2 Involving XOR gateways

#### Input B): Extracted Gateways:

In [16]:
for i, sentence_gateways in enumerate(o_xor_gateways):
    print(i, sentence_gateways)

0 []
1 []
2 [('or', 9, 'B-XOR Gateway'), ('for', 13, 'B-XOR Gateway')]
3 []
4 []
5 []
6 [('If', 0, 'B-XOR Gateway')]
7 [('If', 0, 'B-XOR Gateway')]
8 [('for', 4, 'B-XOR Gateway')]
9 [('for', 9, 'B-XOR Gateway')]
10 [('If', 0, 'B-XOR Gateway'), ('or', 6, 'B-XOR Gateway')]
11 []


#### Input A): Activities:

In [17]:
for i, sentence_activities in enumerate(doc_activities):
    print(i, sentence_activities)

0 []
1 [['receives']]
2 [['reject'], ['accept']]
3 []
4 [['informed']]
5 [['processes'], ['checks']]
6 [['reserved']]
7 [['back-ordered']]
8 []
9 [['prepares']]
10 [['assembles']]
11 [['ships']]


#### Gold Data: Flow Relations that involve AND Gateways

In [18]:
print(len(doc_flow_relations))
for i, flow_relation in enumerate(doc_flow_relations):
    for key, value in flow_relation.items():
        print(f"{key}: {value}")
    print()

19
source-head-sentence-ID: 1
source-head-word-ID: 4
source-entity-type: Activity
source-entity: ['receives']
target-head-sentence-ID: 2
target-head-word-ID: 9
target-entity-type: XOR Gateway
target-entity: ['or']

source-head-sentence-ID: 2
source-head-word-ID: 9
source-entity-type: XOR Gateway
source-entity: ['or']
target-head-sentence-ID: 2
target-head-word-ID: 8
target-entity-type: Activity
target-entity: ['reject']

source-head-sentence-ID: 2
source-head-word-ID: 9
source-entity-type: XOR Gateway
source-entity: ['or']
target-head-sentence-ID: 2
target-head-word-ID: 10
target-entity-type: Activity
target-entity: ['accept']

source-head-sentence-ID: 2
source-head-word-ID: 10
source-entity-type: Activity
source-entity: ['accept']
target-head-sentence-ID: 4
target-head-word-ID: 12
target-entity-type: Activity
target-entity: ['informed']

source-head-sentence-ID: 4
source-head-word-ID: 12
source-entity-type: Activity
source-entity: ['informed']
target-head-sentence-ID: 9
target-head-wo

In [19]:
for i, flow_relation in enumerate(doc_flow_relations_xor):
    for key, value in flow_relation.items():
        print(f"{key}: {value}")
    print()

source-head-sentence-ID: 1
source-head-word-ID: 4
source-entity-type: Activity
source-entity: ['receives']
target-head-sentence-ID: 2
target-head-word-ID: 9
target-entity-type: XOR Gateway
target-entity: ['or']

source-head-sentence-ID: 2
source-head-word-ID: 9
source-entity-type: XOR Gateway
source-entity: ['or']
target-head-sentence-ID: 2
target-head-word-ID: 8
target-entity-type: Activity
target-entity: ['reject']

source-head-sentence-ID: 2
source-head-word-ID: 9
source-entity-type: XOR Gateway
source-entity: ['or']
target-head-sentence-ID: 2
target-head-word-ID: 10
target-entity-type: Activity
target-entity: ['accept']

source-head-sentence-ID: 5
source-head-word-ID: 11
source-entity-type: Activity
source-entity: ['checks']
target-head-sentence-ID: 6
target-head-word-ID: 0
target-entity-type: XOR Gateway
target-entity: ['If']

source-head-sentence-ID: 6
source-head-word-ID: 0
source-entity-type: XOR Gateway
source-entity: ['If']
target-head-sentence-ID: 6
target-head-word-ID: 1
ta

In [20]:
for i, s in enumerate(doc_sentences_raw):
    print(i, s)

0 A small company manufactures customized bicycles
1 Whenever the sales department receives an order , a new process instance is created
2 A member of the sales department can then reject or accept the order for a customized bike
3 In the former case , the process instance is finished
4 In the latter case , the storehouse and the engineering department are informed
5 The storehouse immediately processes the part list of the order and checks the required quantity of each part
6 If the part is available in-house , it is reserved
7 If it is not available , it is back-ordered
8 This procedure is repeated for each item on the part list
9 In the meantime , the engineering department prepares everything for the assembling of the ordered bicycle
10 If the storehouse has successfully reserved or back-ordered every item of the part list and the preparation activity has finished , the engineering department assembles the bicycle
11 Afterwards , the sales department ships the bicycle to the custom

## 3.3 Involving Remaining Gold Activities

In [22]:
# doc_activities = token_dataset.GetDocumentActivities(doc_name)
for sentence_activities in doc_activities:
    print(sentence_activities)

# flatten activities to a list of document activities
activities_flattened = [activitiy for sentence_activities in doc_activities for activitiy in sentence_activities]
print(activities_flattened)

[]
[['receives']]
[['reject'], ['accept']]
[]
[['informed']]
[['processes'], ['checks']]
[['reserved']]
[['back-ordered']]
[]
[['prepares']]
[['assembles']]
[['ships']]
[['receives'], ['reject'], ['accept'], ['informed'], ['processes'], ['checks'], ['reserved'], ['back-ordered'], ['prepares'], ['assembles'], ['ships']]


In [37]:
def create_activity_flows(doc_activity_tokens):
    activities_flattened = [(i, activitiy) for i, sentence_activities in enumerate(doc_activity_tokens) 
                            for activitiy in sentence_activities]
    flow_relations = []
    for i in range(len(activities_flattened) - 1):
        s_idx_1, a1 = activities_flattened[i]
        s_idx_2, a2 = activities_flattened[i+1]
        if True:
            flow_relations.append({labels.SOURCE_ENTITY: a1[0], labels.TARGET_ENTITY: a2[0]})
        else:
            a1 = get_flow_relation_representation(s_idx_1, a1[1], labels.ACTIVITY, a1[0], source=True)
            a2 = get_flow_relation_representation(s_idx_2, a2[1], labels.ACTIVITY, a2[0], source=False)
            flow_relations.append(merge_source_target_dicts(a1, a2))
    return flow_relations


gold_activity_flows = create_activity_flows(doc_activity_tokens)
for i, flow_relation in enumerate(gold_activity_flows):
    for key, value in flow_relation.items():
        print(f"{i} | {key}: {value}")
    print()

0 | source-entity: ['receives']
0 | target-entity: ['reject']

1 | source-entity: ['reject']
1 | target-entity: ['accept']

2 | source-entity: ['accept']
2 | target-entity: ['informed']

3 | source-entity: ['informed']
3 | target-entity: ['processes']

4 | source-entity: ['processes']
4 | target-entity: ['checks']

5 | source-entity: ['checks']
5 | target-entity: ['reserved']

6 | source-entity: ['reserved']
6 | target-entity: ['back-ordered']

7 | source-entity: ['back-ordered']
7 | target-entity: ['prepares']

8 | source-entity: ['prepares']
8 | target-entity: ['assembles']

9 | source-entity: ['assembles']
9 | target-entity: ['ships']



## 4 Evaluate Extraction