In [4]:
from pathlib import Path

import pandas as pd
from promg.modules.db_management import DBManagement
from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import yaml

import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go

In [5]:
case_study = 'bpic14'

conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [6]:
# clear previously created nodes and relations
# start/end relations
qClearStart = f'''MATCH (n)<-[r:START]-(e:Event) DELETE r'''
db_connection.exec_query(qClearStart)
qClearEnd = f'''MATCH (n)<-[r:END]-(e:Event) DELETE r'''
db_connection.exec_query(qClearEnd)
# high-level events
qClearStartHLE = f'''MATCH (n)<-[r:START]-(e:HighLevelEvent) DELETE r'''
db_connection.exec_query(qClearStartHLE)
qClearEndHLE = f'''MATCH (n)<-[r:END]-(e:HighLevelEvent) DELETE r'''
db_connection.exec_query(qClearEndHLE)
qClearHighLevelEvent = f'''MATCH (h:HighLevelEvent) DETACH DELETE h'''
db_connection.exec_query(qClearHighLevelEvent)
qClearHL_DF = f'''MATCH (:Event)-[r:HL_DF]-(e:Event) DELETE r'''
db_connection.exec_query(qClearHL_DF)

[]

In [7]:
qClearStart = '''
:auto
MATCH (n)<-[r:START_CORR]-(e:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS
'''

db_connection.exec_query(qClearStart)
qClearEnd = '''
:auto
MATCH (n)<-[r:END_CORR]-(e:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS
'''
db_connection.exec_query(qClearEnd)

[]

In [8]:
qClearDuring = '''
:auto
MATCH (:HighLevelEvent)<-[r:DURING]-(:HighLevelEvent)
CALL (r) {
    DELETE r
} IN TRANSACTIONS
'''
db_connection.exec_query(qClearDuring)

[]

In [9]:
object_types = ["Change", "Incident", "Interaction"]

In [10]:
# infer start and end events for each object type
q_start_event = '''
    // Infer start event of an object
    MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT ()-[:DF {id:n.sysId}]->(e)
    MERGE (n)<-[:START]-(e)
    RETURN count(n)
'''

q_end_event = '''
    // Infer start event of an object

    MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT (e)-[:DF {id:n.sysId}]->()
    MERGE (n)<-[:END]-(e)

    RETURN count(n)
    '''

for object_type in object_types:
    print(f"Inferring start and end events for object type: {object_type}")
    q_start_event_result = Query(
        query_str=q_start_event,
        parameters={"objectType": object_type}
    )

    print(db_connection.exec_query(q_start_event_result))

    q_end_event_result = Query(
        query_str=q_end_event,
        parameters={"objectType": object_type}
    )
    print(db_connection.exec_query(q_end_event_result))

Inferring start and end events for object type: Change
[{'count(n)': 16611}]
[{'count(n)': 16611}]
Inferring start and end events for object type: Incident
[{'count(n)': 46368}]
[{'count(n)': 46368}]
Inferring start and end events for object type: Interaction
[{'count(n)': 146553}]
[{'count(n)': 146553}]


In [11]:
# materialize time between events as duration on DF edge
q_materialize_duration_on_df = '''
    :auto
    MATCH (e1:Event)-[df:DF]->(e2:Event) WHERE df.duration IS NULL
    CALL (e1, df, e2) {
        SET df.duration = duration.inSeconds(e1.timestamp, e2.timestamp).seconds
    } IN TRANSACTIONS
    RETURN count(*) AS total
'''

db_connection.exec_query(q_materialize_duration_on_df)

[{'total': 2100886}]

In [12]:
split_high_level_events = True

In [13]:
# build index for high-level events
qCreateIndexHLE_sys = f'''
    CREATE INDEX highLevelEventSysIdIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.sysId);
'''
db_connection.exec_query(qCreateIndexHLE_sys)

qCreateIndexHLE_start = f'''
    CREATE INDEX highLevelEventStartTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.startTime);
'''
db_connection.exec_query(qCreateIndexHLE_start)
qCreateIndexHLE_end = f'''
    CREATE INDEX highLevelEventEndTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.endTime);
'''
db_connection.exec_query(qCreateIndexHLE_end)


In [None]:
# build high-level events
qBuildHighLevelEvent = '''
    :auto
    MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
    MATCH (eStart:Event)-[st:START]->(n)<-[en:END]-(eEnd:Event)
    WITH DISTINCT eStart, eEnd
    CALL (eStart, eEnd) {
        MERGE (h:HighLevelEvent {sysId: "HLE_" + eStart.sysId + "_" + eEnd.sysId})
        ON CREATE SET h.startTime=eStart.timestamp, h.endTime=eEnd.timestamp, h.activity=$objectType
        MERGE (h)-[:START]->(eStart)
        MERGE (h)-[:END]->(eEnd)
        RETURN h
    } IN TRANSACTIONS
    RETURN count(h)
'''

In [14]:
for object_type in object_types:
    query = Query(
        query_str=qBuildHighLevelEvent,
        parameters={
            "objectType": object_type,
        }
    )

    result = db_connection.exec_query(query)
    print(f"{result[0]['count(h)']} high level events for {object_type}")

33217 high level events for Change
92736 high level events for Incident
293106 high level events for Interaction


In [16]:
# lift CORR to high-level events
qLiftCorrToHL = '''
    :auto
    MATCH (h:HighLevelEvent)-[:START|END]->(:Event)-[:CORR]->(n)
    WITH distinct h, n
    CALL (h, n) {
        MERGE(h) - [:CORR]->(n)
    } IN TRANSACTIONS
'''
db_connection.exec_query(qLiftCorrToHL)

[]

In [12]:
# build DF relations between high-level events
# step 1: build HL-DF relations between events

qBuildDfRelationsHLE = '''
    :auto
    // get only START/END events of each HL event correlated to CI_SC
    MATCH (ot:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) <-[:CORR]-(h:HighLevelEvent)-->(e:Event)
    // order by time
    WHERE e.timestamp IS NOT NULL
    WITH o, e ORDER BY e.timestamp, ID(e)
    WITH o, collect(e) AS events
    UNWIND range(0, size(events)-2) AS idx
    WITH events[idx] AS fromEv, events[idx+1] AS toEv, o
    CALL (fromEv,toEv,o) {
        // get their HL events correlated to the same object
        MATCH (fromEv)<--(h1:HighLevelEvent)-[:CORR]->(o)<-[:CORR]-(h2:HighLevelEvent)-->(toEv)
        WHERE h1 <> h2
        // and add their DF relation
        MERGE (h1)-[rel:DF {objectType:$objectType, id:o.sysId}]->(h2)
        RETURN rel
    } IN TRANSACTIONS
    RETURN count(rel) AS total
'''

In [None]:
for object_type in object_types + ["CI_SC"]:
    query = Query(
        query_str=qBuildDfRelationsHLE,
        parameters={
            "objectType": object_type,
        }
    )

    print(f"DF for {object_type}: {db_connection.exec_query(query)}")

DF for Change: [{'total': 16611}]
DF for Incident: [{'total': 46368}]
DF for Interaction: [{'total': 146553}]


In [14]:
# infer start and end events for each object type

object_type = "CI_SC"
print(f"Inferring start and end events for object type: {object_type}")

q_start_high_level_event_str = '''
:auto
// Infer start event of an object
MATCH (ot:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (n)
CALL (n) {
    MATCH (n) <- [:CORR] - (e:HighLevelEvent)
    RETURN e ORDER BY e.startTime, id(e) LIMIT 1
}
CALL (n, e) {
    MERGE (n)<-[:START_CORR]-(e)
} IN TRANSACTIONS
RETURN count(n)
'''

q_start_hl_event = Query(
    query_str=q_start_high_level_event_str,
    parameters={
        "objectType": object_type,
    }
)

db_connection.exec_query(q_start_hl_event)

Inferring start and end events for object type: CI_SC


[{'count(n)': 22137}]

In [3]:
# find overlapping high-level events and create DURING relations
# TODO FIGURE OUT!!!!!
qFindContainedHLEvents = '''
:auto
    MATCH (h1:HighLevelEvent) - [df:DF] -> (h2:HighLevelEvent) - [:DF {id: df.id}] -> (h1)
    WHERE h1 <> h2 AND h1.startTime < h2.startTime AND h2.endTime < h1.endTime
      CALL (h1, h2) {
        MERGE (h2)-[:DURING]->(h1)
    } IN TRANSACTIONS
    RETURN count(h2) AS total
'''
db_connection.exec_query(qFindContainedHLEvents)

[{'total': 18823}]

In [None]:
# find overlapping high-level events and create DURING relations

qFindContainedHLEvents = '''
:auto
    MATCH (h1:HighLevelEvent) - [df:DF] -> (h2:HighLevelEvent) - [df2:DF {id: df.id}] -> (h1)
    WHERE h1 <> h2 AND h1.startTime < h2.startTime AND h2.endTime < h1.endTime
      CALL (h1, h2) {
        MERGE (h2)-[:DURING]->(h1)
    } IN TRANSACTIONS
    RETURN count(h2) AS total
'''
db_connection.exec_query(qFindContainedHLEvents)

In [None]:
# lift DF relations from events to high-level events
qLiftDfRelations = '''
    :auto
    MATCH (h1:HighLevelEvent)-[:END]->(e1:Event)-[df:DF]->(e2:Event)<-[:START]-(h2:HighLevelEvent)
    CALL (h1, df, h2) {
        MERGE (h1)-[df2:DF]->(h2)
        ON CREATE SET df2 = properties(df)
    }
    YIELD total
    RETURN total
'''
result = db_connection.exec_query(qLiftDfRelations)
print(result)



In [None]:
# lift DF relations from events to high-level events
with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qLiftDfRelations = f'''
        MATCH (h1:HighLevelEvent)-[:END]->(e1:Event)-[df:DF]->(e2:Event)<-[:START]-(h2:HighLevelEvent)
        CALL {{
            WITH h1,df,h2
            MERGE (h1)-[df2:DF]->(h2) ON CREATE SET df2 = properties(df)
        }} IN TRANSACTIONS OF 1000 ROWS
        RETURN count(*) AS total
    '''
    result = session.run(qLiftDfRelations)
    result.consume()
    print(result.single)

<neo4j._sync.work.result.Result object at 0x000002754A4B4DD0>


In [None]:
# find overlapping high-level events and create DURING relations
with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qFindContainedHLEvents = f'''
        MATCH (h1:HighLevelEvent)-[c1:CORR]->(n:CI_SC)<-[c2:CORR]-(h2:HighLevelEvent) WHERE h1 <> h2
        WHERE h1.startTime < h2.startTime AND h2.endTime < h1.endTime
        CALL {{
            WITH h1,h2
            MERGE (h2)-[:DURING]->(h1)
        }} IN TRANSACTIONS OF 1000 ROWS
        RETURN count(*) AS total
    '''
    result = session.run(qFindContainedHLEvents)
    result.consume()
    print(result.single)