In [1]:
from pathlib import Path
from promg.modules.db_management import DBManagement
from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import yaml

In [2]:
case_study = 'bpic14'

conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [4]:
# clear previously created nodes and relations
# start/end relations
qClearStart = f'''MATCH (n)<-[r:START]-(e:Event) DELETE r'''
db_connection.exec_query(qClearStart)
qClearEnd = f'''MATCH (n)<-[r:END]-(e:Event) DELETE r'''
db_connection.exec_query(qClearEnd)
# high-level events
qClearStartHLE = f'''MATCH (n)<-[r:START]-(e:HighLevelEvent) DELETE r'''
db_connection.exec_query(qClearStartHLE)
qClearEndHLE = f'''MATCH (n)<-[r:END]-(e:HighLevelEvent) DELETE r'''
db_connection.exec_query(qClearEndHLE)
qClearHighLevelEvent = f'''MATCH (h:HighLevelEvent) DETACH DELETE h'''
db_connection.exec_query(qClearHighLevelEvent)
qClearHL_DF = f'''MATCH (:Event)-[r:HL_DF]-(e:Event) DELETE r'''
db_connection.exec_query(qClearHL_DF)

[]

In [10]:
# fix DF relations for incidents
qDeleteDFIncidents = f'''
    MATCH (e1:Event)-[df:DF {{objectType:"Incident"}}]->(e2:Event)
    DELETE df
'''
db_connection.exec_query(qDeleteDFIncidents)

with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qBuildDFIncidents = f'''
        MATCH (e:Event)-[:CORR]->(o:Incident)
        WHERE e.timestamp IS NOT NULL
        WITH o, e ORDER BY e.timestamp, ID(e)
        WITH o, collect(e) AS events
        UNWIND range(0, size(events)-2) AS idx
        WITH events[idx] AS fromEv, events[idx+1] AS toEv, o.sysId AS objSysId
        MERGE (fromEv)-[rel:DF {{objectType:"Incident", id:objSysId}}]->(toEv)
    '''
    result = session.run(qBuildDFIncidents)
    result.consume()
    print(str(result.single))

<bound method Result.single of <neo4j._sync.work.result.Result object at 0x00000221C9219910>>


In [13]:
# infer start and end events for each object type
objectTypes = ["Change", "Incident", "Interaction"]
for objectType in objectTypes:
    print(f"Inferring start and end events for object type: {objectType}")

    qStartEvent = f'''
    // Infer start event of an object
    MATCH (n:{objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT ()-[:DF {{id:n.sysId}}]->(e)
    MERGE (n)<-[:START]-(e)
    '''
    result = db_connection.exec_query(qStartEvent)
    print(result)

    qEndEvent = f'''
    // Infer start event of an object
    MATCH (n:{objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT (e)-[:DF {{id:n.sysId}}]->()
    MERGE (n)<-[:END]-(e)
    '''
    db_connection.exec_query(qEndEvent)
    print(result)

Inferring start and end events for object type: Change
[]
[]
Inferring start and end events for object type: Incident
[]
[]
Inferring start and end events for object type: Interaction
[]
[]


In [12]:
# materialize time between events as duration on DF edge
with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qMaterializeDurationonDF = f'''
        MATCH (e1:Event)-[df:DF]->(e2:Event) WHERE df.duration IS NOT NULL
        CALL {{
            WITH e1,e2,df
            SET df.duration = duration.inSeconds(e1.timestamp, e2.timestamp).seconds    
        }} IN TRANSACTIONS OF 1000 ROWS
        RETURN count(*) AS total
    '''
    result = session.run(qMaterializeDurationonDF)
    result.consume()
    print(str(result.single))

<bound method Result.single of <neo4j._sync.work.result.Result object at 0x00000221C97381D0>>


In [15]:
# build index for high-level events
qCreateIndexHLE_sys = f'''
    CREATE INDEX highLevelEventSysIdIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.sysId);
'''
db_connection.exec_query(qCreateIndexHLE_sys) 
qCreateIndexHLE_start = f'''
    CREATE INDEX highLevelEventStartTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.startTime);
'''
db_connection.exec_query(qCreateIndexHLE_start) 
qCreateIndexHLE_end = f'''
    CREATE INDEX highLevelEventEndTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.endTime);
'''
db_connection.exec_query(qCreateIndexHLE_end) 

# build high-level events
qBuildHighLevelEvent = f'''
    MATCH (eStart:Event)-[st:START]->(n)<-[en:END]-(eEnd:Event) 
    MERGE (h:HighLevelEvent {{sysId: eStart.sysId + "_" + eEnd.sysId}})
    ON CREATE SET h.startTime=eStart.timestamp, h.endTime=eEnd.timestamp, h.activity=labels(n)[0]
    MERGE (h)-[:START]->(eStart)
    MERGE (h)-[:END]->(eEnd)
'''
db_connection.exec_query(qBuildHighLevelEvent)

[]

In [16]:
# lift CORR to high-level events
qLiftCorrToHL = f'''
    MATCH (h:HighLevelEvent)-[:START]->(eStart:Event)-[:CORR]->(n)
    MERGE (h)-[:CORR]->(n)
'''
db_connection.exec_query(qLiftCorrToHL)

[]

In [18]:
# build DF relations between high-level events
# step 1: build HL-DF relations between events
with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qBuildDfRelationsHLE = f'''
        // get only START/END events of each HL event correlated to CI_SC
        MATCH (o:CI_SC)<-[:CORR]-(h:HighLevelEvent)-->(e:Event)
        // order by time
        WHERE e.timestamp IS NOT NULL
        WITH o, e ORDER BY e.timestamp, ID(e)
        WITH o, collect(e) AS events
        UNWIND range(0, size(events)-2) AS idx
        WITH events[idx] AS fromEv, events[idx+1] AS toEv, o
        CALL {{
            // for each pair of succeeding START/END events
            WITH fromEv,toEv,o
            //MERGE (fromEv)-[rel:HL_DF {{objectType:"CI_SC", id:o.sysId}}]->(toEv)
            // get their HL events correlated to the same object
            MATCH (fromEv)<--(h1:HighLevelEvent)-[:CORR]->(o)<-[:CORR]-(h2:HighLevelEvent)-->(toEv) WHERE h1 <> h2
            // and add their DF relation
            MERGE (h1)-[rel:DF {{objectType:"CI_SC", id:o.sysId}}]->(h2)
        }} IN TRANSACTIONS OF 10000 ROWS
        RETURN count(*) AS total
    '''
    result = session.run(qBuildDfRelationsHLE)
    result.consume()
    print(str(result.single))

<bound method Result.single of <neo4j._sync.work.result.Result object at 0x00000221C8C224D0>>


In [None]:
# build DF relations between high-level events
with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qBuildDfRelationsHLE = f'''
        // get only START/END events of each HL event correlated to CI_SC
        MATCH (o:CI_SC)<-[:CORR]-(h:HighLevelEvent)-->(e:Event)
        // order by time
        WHERE e.timestamp IS NOT NULL
        WITH o, e ORDER BY e.timestamp, ID(e)
        WITH o, collect(e) AS events
        UNWIND range(0, size(events)-2) AS idx
        WITH events[idx] AS fromEv, events[idx+1] AS toEv, o
        CALL {{
            // for each pair of succeeding START/END events
            WITH fromEv,toEv,o
            // get their HL events correlated to the same object
            MATCH (fromEv)<--(h1:HighLevelEvent)-[:CORR]->(o)<-[:CORR]-(h2:HighLevelEvent)-->(toEv) WHERE h1 <> h2
            // and add their DF relation
            MERGE (h1)-[rel:DF {{objectType:"CI_SC", id:o.sysId}}]->(h2)
        }} IN TRANSACTIONS OF 10000 ROWS
        RETURN count(*) AS total
    '''
    result = session.run(qBuildDfRelationsHLE)
    result.consume()
    print(str(result.single))

KeyboardInterrupt: 

In [None]:
# lift DF relations from events to high-level events
qLiftDfRelations = f'''
    call apoc.periodic.iterate(
    'MATCH (h1:HighLevelEvent)-[:END]->(e1:Event)-[df:DF]->(e2:Event)<-[:START]-(h2:HighLevelEvent)
     RETURN h1,df,h2',
    'MERGE (h1)-[df2:DF]->(h2) ON CREATE SET df2 = properties(df)',
    {{batchSize:1000, parallel:false}})
    YIELD total
    RETURN total
'''
result = db_connection.exec_query(qLiftDfRelations)
print(result)



In [None]:
# lift DF relations from events to high-level events
with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qLiftDfRelations = f'''
        MATCH (h1:HighLevelEvent)-[:END]->(e1:Event)-[df:DF]->(e2:Event)<-[:START]-(h2:HighLevelEvent)
        CALL {{
            WITH h1,df,h2
            MERGE (h1)-[df2:DF]->(h2) ON CREATE SET df2 = properties(df)
        }} IN TRANSACTIONS OF 1000 ROWS
        RETURN count(*) AS total
    '''
    result = session.run(qLiftDfRelations)
    result.consume()
    print(result.single)

<neo4j._sync.work.result.Result object at 0x000002754A4B4DD0>


In [None]:
# find overlapping high-level events and create DURING relations
with db_connection.driver.get_session(database=db_connection.db_name) as session:
    qFindContainedHLEvents = f'''
        MATCH (h1:HighLevelEvent)-[c1:CORR]->(n:CI_SC)<-[c2:CORR]-(h2:HighLevelEvent) WHERE h1 <> h2
        WHERE h1.startTime < h2.startTime AND h2.endTime < h1.endTime
        CALL {{
            WITH h1,h2
            MERGE (h2)-[:DURING]->(h1)
        }} IN TRANSACTIONS OF 1000 ROWS
        RETURN count(*) AS total
    '''
    result = session.run(qFindContainedHLEvents)
    result.consume()
    print(result.single)