In [29]:
from parso.python.tree import Flow
%matplotlib inline
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import pandas as pd

pd.set_option('display.width', 2000)

### Define the project that you want to do analysis on

In [30]:
case_study = 'bpic14'
# case_study = 'bpic17'
use_sample = False

In [31]:
# retrieve configuration for case_study
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config["db_name"]}")
print(f"user: {config["user"]}")
print(f"uri: {config["uri"]}")
print(f"password: {config["password"]}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
user: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


### Prepare so we can use PromG to execute queries

In [32]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

# Start Analysis
Make sure the data imported using import_data.ipynb ensured it is in the schema as defined in the paper.
This analysis build further on the procedd_discovery

In [33]:
# determine time duration between two events
materialize_time = '''
CALL apoc.periodic.iterate('
    MATCH (e1:Event) - [df:DF] -> (e2:Event)
    RETURN df, duration.between(e1.timestamp, e2.timestamp) AS d',
    'SET df.duration = d',
    {batchSize:$batch_size, parallel: true})
'''

db_connection.exec_query(materialize_time)

[{'batches': 243,
  'total': 2424541,
  'timeTaken': 10,
  'committedOperations': 2424541,
  'failedOperations': 0,
  'failedBatches': 0,
  'retries': 0,
  'errorMessages': {},
  'batch': {'total': 243, 'errors': {}, 'committed': 243, 'failed': 0},
  'operations': {'total': 2424541,
   'errors': {},
   'committed': 2424541,
   'failed': 0},
  'wasTerminated': False,
  'failedParams': {},
  'updateStatistics': {'relationshipsDeleted': 0,
   'relationshipsCreated': 0,
   'nodesDeleted': 0,
   'nodesCreated': 0,
   'labelsRemoved': 0,
   'labelsAdded': 0,
   'propertiesSet': 2424541}}]

In [34]:
# Add performance measures to DFG on eventType level

# Query to derive a Multi-Entity DF-Graph by aggregating instance-level DF relationships at the event type level.
df_performance_str = '''\
    CALL apoc.periodic.iterate('
        // find all consecutive event types for specific object types
        MATCH (e1:Event) - [e2e:DF] -> (e2:Event)
        MATCH (e1) - [:INSTANCE_OF] -> (et1:EventType)
        MATCH (e2) - [:INSTANCE_OF] -> (et2:EventType)
        MATCH (et1) - [e2e_type:DF {objectType: e2e.objectType}] -> (et2)
        WITH e2e_type, // count for each oType, how often we have observed DF between events that are an instance of et1 and et2
            avg(e2e.duration) as avg_duration, min(e2e.duration) as min_duration, max(e2e.duration) as max_duration 
        RETURN avg_duration, min_duration, max_duration, e2e_type
    ','
    SET e2e_type = {duration_avg: avg_duration, duration_min: min_duration, duration_max: max_duration}
    ', 
    {batchSize:$batch_size, parallel: true})
'''

results = db_connection.exec_query(df_performance_str)
print(tabulate(results))


--  ------  --  ------  -  -  -  --  ---------------------------------------------------------  -----------------------------------------------------------------  -----  --  -----------------------------------------------------------------------------------------------------------------------------------------------------------
13  122067  34  122067  0  0  0  {}  {'total': 13, 'errors': {}, 'committed': 13, 'failed': 0}  {'total': 122067, 'errors': {}, 'committed': 122067, 'failed': 0}  False  {}  {'relationshipsDeleted': 0, 'relationshipsCreated': 0, 'nodesDeleted': 0, 'nodesCreated': 0, 'labelsRemoved': 0, 'labelsAdded': 0, 'propertiesSet': 610335}
--  ------  --  ------  -  -  -  --  ---------------------------------------------------------  -----------------------------------------------------------------  -----  --  -----------------------------------------------------------------------------------------------------------------------------------------------------------


In [35]:
# Sojourn time for an event and the responsible delaying object
sojourn_time = '''
    MATCH (:Event) -[df:DF]-> (e:Event) -[]-> (o) - [:INSTANCE_OF] -> (ot:ObjectType {objectType: df.objectType})
    MATCH (e) - [:INSTANCE_OF] -> (et:EventType WHERE et.agg is null)    
    WITH et, df, e, collect(o.id) AS objects ORDER BY df.duration ASC
    WITH et, e, objects, collect(df)[0] AS delayingDF
    RETURN e.id, et.eventType AS eventType, objects, delayingDF.id AS delayingObject, delayingDF.duration AS sojournTime ORDER BY sojournTime DESC LIMIT 10
'''

results = db_connection.exec_query(sojourn_time)
print(tabulate(results))

----------------------  -------------  -------------  ---------  ----------------
14729_Detail_Change_re  Requested End  ['C00008503']  C00008503  P13Y
16966_Detail_Change_re  Requested End  ['C00009941']  C00009941  P9Y11M27DT14H18M
17120_Detail_Change_re  Requested End  ['C00010031']  C00010031  P9Y11M26DT13H48M
29494_Detail_Change_re  Requested End  ['C00017968']  C00017968  P8Y2M5DT3H29M
29624_Detail_Change_re  Requested End  ['C00018098']  C00018098  P8Y2M5DT2H30M
29456_Detail_Change_re  Requested End  ['C00017930']  C00017930  P7Y7M16DT14H9M
29457_Detail_Change_re  Requested End  ['C00017931']  C00017931  P7Y7M16DT14H9M
29489_Detail_Change_re  Requested End  ['C00017963']  C00017963  P7Y7M16DT14H8M
29551_Detail_Change_re  Requested End  ['C00018025']  C00018025  P7Y7M16DT14H7M
29586_Detail_Change_re  Requested End  ['C00018060']  C00018060  P7Y7M16DT14H6M
----------------------  -------------  -------------  ---------  ----------------


In [36]:
#Flow time for an event and the responsible enabling object
flow_time = '''
    MATCH (:Event) -[df:DF]-> (e:Event) -[]-> (o) - [:INSTANCE_OF] -> (ot:ObjectType {objectType: df.objectType})
    MATCH (e) - [:INSTANCE_OF] -> (et:EventType WHERE et.agg is null)
    WITH et, df, e, collect(o.id) AS objects ORDER BY df.duration DESC
    WITH et, e, objects, collect(df)[0] AS enablingDF
    MATCH (e2:Event)-[enablingDF]->(e)
    RETURN e.id, et.eventType AS eventType, objects, enablingDF.id AS delayingObject, enablingDF.duration AS flowTime ORDER BY flowTime DESC LIMIT 10
'''

results = db_connection.exec_query(flow_time)
print(tabulate(results))

----------------------  -------------  -------------  ---------  ----------------
14729_Detail_Change_re  Requested End  ['C00008503']  C00008503  P13Y
16966_Detail_Change_re  Requested End  ['C00009941']  C00009941  P9Y11M27DT14H18M
17120_Detail_Change_re  Requested End  ['C00010031']  C00010031  P9Y11M26DT13H48M
29494_Detail_Change_re  Requested End  ['C00017968']  C00017968  P8Y2M5DT3H29M
29624_Detail_Change_re  Requested End  ['C00018098']  C00018098  P8Y2M5DT2H30M
29457_Detail_Change_re  Requested End  ['C00017931']  C00017931  P7Y7M16DT14H9M
29456_Detail_Change_re  Requested End  ['C00017930']  C00017930  P7Y7M16DT14H9M
29489_Detail_Change_re  Requested End  ['C00017963']  C00017963  P7Y7M16DT14H8M
29551_Detail_Change_re  Requested End  ['C00018025']  C00018025  P7Y7M16DT14H7M
29586_Detail_Change_re  Requested End  ['C00018060']  C00018060  P7Y7M16DT14H6M
----------------------  -------------  -------------  ---------  ----------------


In [37]:
# Synchronization time = time between first and last preceding event (difference between soujournTime and flowTime)
sync_time = '''
    MATCH (:Event) -[df:DF]-> (e:Event) -[]-> (o) - [:INSTANCE_OF] -> (ot:ObjectType {objectType: df.objectType})
    MATCH (e) - [:INSTANCE_OF] -> (et:EventType WHERE et.agg is null)
    WITH et, df, e, collect(o.id) AS objects ORDER BY df.duration DESC
    WITH et, objects, e, collect(df) AS incomingDF
    WITH et, objects, e, incomingDF[0] as enablingDF, incomingDF[-1] as delayingDF
    MATCH (e2_enabling:Event)-[enablingDF]->(e)
    MATCH (e2_delaying:Event)-[delayingDF]->(e)
    RETURN e.id, et.eventType as eventType, objects, enablingDF.id as first, delayingDF.id as last, enablingDF.duration - delayingDF.duration AS synchronizationTime ORDER BY synchronizationTime DESC LIMIT 10
'''

results = db_connection.exec_query(sync_time)
print(tabulate(results))

-------------------------  ----------------  --------------------------  ---------  ---------  ----------
229_BPIC14Interaction_ior  Open Interaction  ['SD0000233', 'SD0000233']  SD0000233  SD0000233  PT1H58M22S
298_BPIC14Interaction_ior  Open Interaction  ['SD0000303', 'SD0000303']  SD0000303  SD0000303  PT1H57M47S
123_BPIC14Interaction_ior  Open Interaction  ['SD0000125', 'SD0000125']  SD0000125  SD0000125  PT1H57M35S
173_BPIC14Interaction_ior  Open Interaction  ['SD0000177', 'SD0000177']  SD0000177  SD0000177  PT1H57M17S
249_BPIC14Interaction_ior  Open Interaction  ['SD0000254', 'SD0000254']  SD0000254  SD0000254  PT1H56M55S
148_BPIC14Interaction_ior  Open Interaction  ['SD0000151', 'SD0000151']  SD0000151  SD0000151  PT1H56M54S
264_BPIC14Interaction_ior  Open Interaction  ['SD0000269', 'SD0000269']  SD0000269  SD0000269  PT1H56M48S
228_BPIC14Interaction_ior  Open Interaction  ['SD0000232', 'SD0000232']  SD0000232  SD0000232  PT1H56M31S
251_BPIC14Interaction_ior  Open Interaction  [