In [23]:
from pathlib import Path

import pandas as pd
from promg.modules.db_management import DBManagement
from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import yaml

import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go

In [24]:
case_study = 'bpic14'

conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [25]:
# clear previously created nodes and relations
# start/end relations
q_clear_start = '''
:auto
MATCH (n)<-[r:START]-(e:Event)
CALL (r) {
    DELETE r
} IN TRANSACTIONS
 RETURN count(r) '''
db_connection.exec_query(q_clear_start)

[{'count(r)': 0}]

In [26]:
q_clear_end = '''
:auto
MATCH (n)<-[r:END]-(e:Event)
 CALL (r) {
    DELETE r
 } IN TRANSACTIONS

 RETURN count(r)
'''
db_connection.exec_query(q_clear_end)


[{'count(r)': 0}]

In [27]:
# high-level events
qClearStartHLE = '''
:auto
MATCH (n)<-[r:START]-(e:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS

 RETURN count(r)
 '''
db_connection.exec_query(qClearStartHLE)


[{'count(r)': 0}]

In [28]:
qClearEndHLE = '''
:auto
MATCH (n)<-[r:END]-(e:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS

 RETURN count(r)
'''
db_connection.exec_query(qClearEndHLE)


[{'count(r)': 0}]

In [29]:
q_clear_df_hle = '''
:auto
MATCH (e2:HighLevelEvent)<-[r:DF]-(e:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS

 RETURN count(r)
'''
db_connection.exec_query(q_clear_df_hle)


[{'count(r)': 0}]

In [30]:
qClearHighLevelEvent = '''
:auto
MATCH (h:HighLevelEvent)
CALL (h) {
    DETACH DELETE h
} IN TRANSACTIONS
 RETURN count(h)
'''
db_connection.exec_query(qClearHighLevelEvent)


[{'count(h)': 0}]

In [31]:
q_clear_start = '''
:auto
MATCH (n)<-[r:START_CORR]-(e:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS
'''

db_connection.exec_query(q_clear_start)
q_clear_end = '''
:auto
MATCH (n)<-[r:END_CORR]-(e:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS
'''
db_connection.exec_query(q_clear_end)

[]

In [32]:
qClearDuring = '''
:auto
MATCH (:HighLevelEvent)<-[r:DURING]-(:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS
'''
db_connection.exec_query(qClearDuring)

[]

In [33]:
object_types = ["Change", "Incident", "Interaction"]

In [34]:
# infer start and end events for each object type
q_start_event = '''
    // Infer start event of an object
    MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT ()-[:DF {id:n.sysId}]->(e)
    MERGE (n)<-[:START]-(e)
    RETURN count(n)
'''

q_end_event = '''
    // Infer start event of an object

    MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT (e)-[:DF {id:n.sysId}]->()
    MERGE (n)<-[:END]-(e)

    RETURN count(n)
    '''

for object_type in object_types:
    print(f"Inferring start and end events for object type: {object_type}")
    q_start_event_result = Query(
        query_str=q_start_event,
        parameters={"objectType": object_type}
    )

    print(db_connection.exec_query(q_start_event_result))

    q_end_event_result = Query(
        query_str=q_end_event,
        parameters={"objectType": object_type}
    )
    print(db_connection.exec_query(q_end_event_result))

Inferring start and end events for object type: Change
[{'count(n)': 16611}]
[{'count(n)': 16611}]
Inferring start and end events for object type: Incident
[{'count(n)': 46368}]
[{'count(n)': 46368}]
Inferring start and end events for object type: Interaction
[{'count(n)': 146553}]
[{'count(n)': 146553}]


In [35]:
# materialize time between events as duration on DF edge
q_materialize_duration_on_df = '''
    :auto
    MATCH (e1:Event)-[df:DF]->(e2:Event) WHERE df.duration IS NULL
    CALL (e1, df, e2) {
        SET df.duration = duration.inSeconds(e1.timestamp, e2.timestamp).seconds
    } IN TRANSACTIONS
    RETURN count(*) AS total
'''

db_connection.exec_query(q_materialize_duration_on_df)

[{'total': 2100886}]

In [36]:
# build index for high-level events
qCreateIndexHLE_sys = f'''
    CREATE INDEX highLevelEventSysIdIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.sysId);
'''
db_connection.exec_query(qCreateIndexHLE_sys)

qCreateIndexHLE_timestamp = f'''
        CREATE INDEX highLevelEventEndTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.timestamp);
    '''
db_connection.exec_query(qCreateIndexHLE_timestamp)

[]

In [37]:

# build high-level events
qBuildHighLevelEvent = '''
    :auto
    MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
    MATCH (eStart:Event)-[st:START]->(n)<-[en:END]-(eEnd:Event)
    WITH DISTINCT eStart, eEnd
    CALL (eStart, eEnd) {
        MERGE (h:HighLevelEvent {sysId: "HLE_" + eStart.sysId})
        ON CREATE SET h.timestamp=eStart.timestamp, h.activity=$objectType, h.subActivity = eStart.activity
        MERGE (h)-[:START]->(eStart)
        RETURN h
        UNION
        MERGE (h:HighLevelEvent {sysId: "HLE_" + eEnd.sysId})
        ON CREATE SET h.timestamp=eEnd.timestamp, h.activity=$objectType, h.subActivity = eEnd.activity
        MERGE (h)-[:END]->(eEnd)
        RETURN h
    } IN TRANSACTIONS
    RETURN count(h)
'''

for object_type in object_types:
    query = Query(
        query_str=qBuildHighLevelEvent,
        parameters={
            "objectType": object_type,
        }
    )

    result = db_connection.exec_query(query)
    print(f"{result[0]['count(h)']} high level events for {object_type}")

33217 high level events for Change
92736 high level events for Incident
293106 high level events for Interaction


In [38]:
# lift CORR to high-level events
qLiftCorrToHL = '''
    :auto
    MATCH (h:HighLevelEvent)-[:START|END]->(:Event)-[:CORR]->(n)
    WITH distinct h, n
    CALL (h, n) {
        MERGE(h) - [:CORR]->(n)
    } IN TRANSACTIONS
'''
db_connection.exec_query(qLiftCorrToHL)

[]

In [39]:
# build DF relations between high-level events
# step 1: build HL-DF relations between events

qBuildDfRelationsHLE = '''
    :auto
    // get only START/END events of each HL event correlated to CI_SC
    MATCH (ot:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) <-[:CORR]-(h:HighLevelEvent)
    // order by time
    WHERE h.timestamp IS NOT NULL
    WITH o, h ORDER BY h.timestamp, ID(h)
    // We want to prioritize those events that have already a DF, but False becomes before true, so negate
    WITH o, collect(h) AS events
    UNWIND range(0, size(events)-2) AS idx
    WITH events[idx] AS fromEv, events[idx+1] AS toEv, o
    CALL (fromEv,toEv,o) {
        MERGE (fromEv)-[rel:DF {objectType:$objectType, id:o.sysId}]->(toEv)
        RETURN rel
    } IN TRANSACTIONS
    RETURN count(rel) AS total
'''

In [40]:
for object_type in object_types + ["CI_SC"]:
    query = Query(
        query_str=qBuildDfRelationsHLE,
        parameters={
            "objectType": object_type,
        }
    )

    print(f"DF for {object_type}: {db_connection.exec_query(query)}")

DF for Change: [{'total': 16606}]
DF for Incident: [{'total': 46368}]
DF for Interaction: [{'total': 146553}]
DF for CI_SC: [{'total': 426299}]


Fix DF for those with 0 minutes and are overlapping

In [41]:
query = '''
    MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle3)
    MATCH (hle2) - [:DF {objectType: hle2.activity}] -> (hle4)
    MATCH (hle:HighLevelEvent) - [df12:DF  {objectType: 'CI_SC'}] -> (hle2:HighLevelEvent)
        - [df23:DF  {objectType: 'CI_SC', id:df12.id}] -> (hle3:HighLevelEvent)
        - [df34:DF  {objectType: 'CI_SC', id:df12.id}] -> (hle4:HighLevelEvent)
    WITH hle, hle2, hle3, hle4, df12, df23, df34, duration.inSeconds(hle3.timestamp, hle4.timestamp).seconds as seconds
    WHERE seconds = 0
    DELETE df23
    DELETE df34
    MERGE (hle2)-[:DF {objectType: 'CI_SC', id:df12.id}] -> (hle4)
    MERGE (hle4)-[:DF {objectType: 'CI_SC', id:df12.id}] -> (hle3)
'''

db_connection.exec_query(query)

[]

In [42]:
query = '''
    MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle3)
    MATCH (hle2) - [:DF {objectType: hle2.activity}] -> (hle4)
    MATCH (hle:HighLevelEvent) - [df12:DF  {objectType: 'CI_SC'}] -> (hle2:HighLevelEvent)
        - [df23:DF  {objectType: 'CI_SC', id:df12.id}] -> (hle3:HighLevelEvent)
        - [df34:DF  {objectType: 'CI_SC', id:df12.id}] -> (hle4:HighLevelEvent)
    WITH hle, hle2, hle3, hle4, df12, df23, df34, duration.inSeconds(hle.timestamp, hle2.timestamp).seconds as seconds
    WHERE seconds = 0
    DELETE df12
    DELETE df23
    MERGE (hle)-[:DF {objectType: 'CI_SC', id:df34.id}] -> (hle3)
    MERGE (hle3)-[:DF {objectType: 'CI_SC', id:df34.id}] -> (hle2)
'''

db_connection.exec_query(query)

[]

In [43]:
# infer start and end events for each object type

object_type = "CI_SC"
print(f"Inferring start and end events for object type: {object_type}")

q_start_high_level_event_str = '''
:auto
// Infer start event of an object
MATCH (ot:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (n)
CALL (n) {
    MATCH (n) <- [:CORR] - (e:HighLevelEvent)
    RETURN e ORDER BY e.startTime, id(e) LIMIT 1
}
CALL (n, e) {
    MERGE (n)<-[:START_CORR]-(e)
} IN TRANSACTIONS
RETURN count(n)
'''

q_start_hl_event = Query(
    query_str=q_start_high_level_event_str,
    parameters={
        "objectType": object_type,
    }
)

db_connection.exec_query(q_start_hl_event)

Inferring start and end events for object type: CI_SC


[{'count(n)': 22301}]