In [49]:
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option('display.width', 2000)
%matplotlib inline


In [50]:
case_study = 'bpic14'
load = False
infer_high_level_events = False


In [51]:
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [52]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

# Check whether the RELATED_INCIDENT has CI_SC as foreign key.
Be more precise, Ava!

In [53]:
query_check_related = '''
    MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc:Incident)
    MATCH (int) - [:AFFECTED_CI_SC] -> (ci_sc_int:CI_SC)
    MATCH (inc) - [:AFFECTED_CI_SC] -> (ci_sc_inc:CI_SC)
    RETURN ci_sc_int = ci_sc_inc, count(inc)
'''

pd.DataFrame(db_connection.exec_query(query_check_related))

Unnamed: 0,ci_sc_int = ci_sc_inc,count(inc)
0,True,49006
1,False,1733


In [54]:
query_check_related = '''
    MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc:Incident)
    MATCH (int) - [:AFFECTED_CI_SC] -> (ci_sc_int:CI_SC)
    MATCH (inc) - [:AFFECTED_CI_SC] -> (ci_sc_inc:CI_SC)
    WHERE ci_sc_int <> ci_sc_inc
    RETURN int.sysId, inc.sysId order by inc.sysId
'''

pd.DataFrame(db_connection.exec_query(query_check_related))

Unnamed: 0,int.sysId,inc.sysId
0,SD0000479,IM0000310
1,SD0000504,IM0000328
2,SD0000584,IM0000391
3,SD0004713,IM0000391
4,SD0025853,IM0000393
...,...,...
1728,SD0145941,IM0046710
1729,SD0145961,IM0046710
1730,SD0146005,IM0046805
1731,SD0146833,IM0046857


Not all are related

# Check for PATTERNS

Self-contained objects
e.g. (Change Start) -> (Change End) for the same Change for single CI_SC

In [55]:
query = '''
MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle1)
MATCH (hle) - [:CORR] -> (ci_sc:CI_SC)
MATCH (hle) - [:DF {objectType: 'CI_SC', id: ci_sc.sysId}] -> (hle1)
RETURN ci_sc.sysId as sysId, hle.activity as activity, count(hle) as contained ORDER BY sysId

'''

result = pd.DataFrame(db_connection.exec_query(query))
result

Unnamed: 0,sysId,activity,contained
0,#N/B_WBS000284,Interaction,12
1,ACS000001_WBS000252,Incident,1
2,ADB000003_WBS000253,Change,1
3,ADB000004_WBS000253,Change,3
4,ADB000006_WBS000253,Change,8
...,...,...,...
11786,ZOS000029_WBS000199,Change,1
11787,ZOS000030_WBS000199,Change,1
11788,ZOS000031_WBS000199,Change,1
11789,ZOS000032_WBS000199,Change,1


In [56]:
query = '''
MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle1)
MATCH (hle) - [:CORR] -> (ci_sc:CI_SC)
RETURN ci_sc.sysId as sysId, hle.activity as activity, count(hle) as events ORDER BY sysId

'''

result_events = pd.DataFrame(db_connection.exec_query(query))
result_events

Unnamed: 0,sysId,activity,events
0,#N/B_WBS000284,Interaction,16
1,ACS000001_WBS000252,Incident,1
2,ACS000001_WBS000252,Interaction,1
3,ADB000001_WBS000253,Change,2
4,ADB000002_WBS000253,Change,1
...,...,...,...
16108,ZOS000029_WBS000199,Change,1
16109,ZOS000030_WBS000199,Change,1
16110,ZOS000031_WBS000199,Change,1
16111,ZOS000032_WBS000199,Change,1


In [57]:
combined_result = result.merge(result_events, on=['sysId', 'activity'])
combined_result['%'] = round(combined_result['contained']/combined_result['events']*100, 2)
combined_result

Unnamed: 0,sysId,activity,contained,events,%
0,#N/B_WBS000284,Interaction,12,16,75.00
1,ACS000001_WBS000252,Incident,1,1,100.00
2,ADB000003_WBS000253,Change,1,2,50.00
3,ADB000004_WBS000253,Change,3,5,60.00
4,ADB000006_WBS000253,Change,8,12,66.67
...,...,...,...,...,...
11786,ZOS000029_WBS000199,Change,1,1,100.00
11787,ZOS000030_WBS000199,Change,1,1,100.00
11788,ZOS000031_WBS000199,Change,1,1,100.00
11789,ZOS000032_WBS000199,Change,1,1,100.00


In [64]:
# Pivot the DataFrame
pivot_df = combined_result.pivot(
    index=['sysId'],
    columns='activity',
    values=['contained', 'events', '%']
).reset_index()

columns = [pivot_df.columns[0]]
for object_type in ['Change', 'Incident', 'Interaction']:
    columns.extend([('contained', object_type), ('events', object_type), ('%', object_type)])

pivot_df = pivot_df[columns]
pivot_df = pivot_df.fillna(0)
pivot_df

Unnamed: 0_level_0,sysId,contained,events,%,contained,events,%,contained,events,%
activity,Unnamed: 1_level_1,Change,Change,Change,Incident,Incident,Incident,Interaction,Interaction,Interaction
0,#N/B_WBS000284,0.0,0.0,0.00,0.0,0.0,0.0,12.0,16.0,75.0
1,ACS000001_WBS000252,0.0,0.0,0.00,1.0,1.0,100.0,0.0,0.0,0.0
2,ADB000003_WBS000253,1.0,2.0,50.00,0.0,0.0,0.0,0.0,0.0,0.0
3,ADB000004_WBS000253,3.0,5.0,60.00,0.0,0.0,0.0,0.0,0.0,0.0
4,ADB000006_WBS000253,8.0,12.0,66.67,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
10757,ZOS000029_WBS000199,1.0,1.0,100.00,0.0,0.0,0.0,0.0,0.0,0.0
10758,ZOS000030_WBS000199,1.0,1.0,100.00,0.0,0.0,0.0,0.0,0.0,0.0
10759,ZOS000031_WBS000199,1.0,1.0,100.00,0.0,0.0,0.0,0.0,0.0,0.0
10760,ZOS000032_WBS000199,1.0,1.0,100.00,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
query = '''
MATCH (hle:HighLevelEvent) - [:DF {objectType: hle.activity}] -> (activity_hle:HighLevelEvent)
MATCH (hle) - [:DF {objectType: 'CI_SC'}] -> (other_hle)
MATCH (hle) - [:CORR] -> (n) - [:IS_OF_TYPE] - (ot:ObjectType {objectType: hle.activity})
WHERE activity_hle <> other_hle
SET n.self_contained = False
RETURN hle.activity as activity, count(distinct n) as not_self_contained
'''

result_not_contained_objects = pd.DataFrame(db_connection.exec_query(query))
result_not_contained_objects

Unnamed: 0,activity,not_self_contained
0,Incident,40396
1,Interaction,79423
2,Change,6233


In [82]:
query = '''
MATCH (hle:HighLevelEvent) - [:CORR] -> (n) - [:IS_OF_TYPE] - (ot:ObjectType {objectType: hle.activity})
WHERE n.self_contained IS NULL OR n.self_contained = True
SET n.self_contained = True
RETURN hle.activity as activity, count(distinct n) as self_contained
'''

result_contained_objects = pd.DataFrame(db_connection.exec_query(query))
result_contained_objects

Unnamed: 0,activity,self_contained
0,Incident,5972
1,Interaction,67130
2,Change,10378


In [85]:
result_objects = result_contained_objects.merge(result_not_contained_objects, on='activity')
result_objects['sum_total'] = result_objects['self_contained'] + result_objects['not_self_contained']
result_objects['% contained'] = round(result_objects['self_contained']/result_objects['sum_total']*100,2)
result_objects

Unnamed: 0,activity,self_contained,not_self_contained,sum_total,% contained
0,Incident,5972,40396,46368,12.88
1,Interaction,67130,79423,146553,45.81
2,Change,10378,6233,16611,62.48


# Overlapping activities
e.g. (Start Incident) -> (Start Interaction) -> (End Incident) -> (End Interaction)
for single CI_SC

In [88]:
query = '''
MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle3)
MATCH (hle2) - [:DF {objectType: hle2.activity}] -> (hle4)
MATCH (hle:HighLevelEvent) - [df:DF  {objectType: 'CI_SC'}] -> (hle2:HighLevelEvent)
- [:DF  {objectType: 'CI_SC', id:df.id}] -> (hle3:HighLevelEvent)
- [:DF  {objectType: 'CI_SC', id:df.id}] -> (hle4:HighLevelEvent)
MATCH (hle) - [:CORR] -> (n) - [:IS_OF_TYPE] - (:ObjectType {objectType: hle.activity})
MATCH (hle2) - [:CORR] -> (n2) - [:IS_OF_TYPE] - (:ObjectType {objectType: hle2.activity})
SET n.overlapped = True
SET n2.overlapped = True
RETURN hle.activity as activity, hle2.activity as second_activity, count(distinct n) as overlapped
'''

overlapped_objects = pd.DataFrame(db_connection.exec_query(query))
overlapped_objects

Unnamed: 0,activity,second_activity,overlapped
0,Interaction,Incident,3863
1,Incident,Incident,14
2,Change,Incident,1
3,Incident,Interaction,14
4,Interaction,Interaction,2241
5,Change,Interaction,2
6,Change,Change,131
7,Interaction,Change,5


In [89]:
query = '''
MATCH (hle:HighLevelEvent) - [:CORR] -> (n) - [:IS_OF_TYPE] - (ot:ObjectType {objectType: hle.activity})
WHERE n.overlapped IS NULL OR n.overlapped = False
SET n.overlapped = False
RETURN hle.activity as activity, count(distinct n) as not_overlapped
'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,activity,not_overlapped
0,Incident,42462
1,Interaction,138187
2,Change,16344


In [92]:
query = '''
MATCH (n) <- [:CORR] - (:HighLevelEvent)
MATCH (n) - [:IS_OF_TYPE] - (ot:ObjectType)
WHERE ot.objectType IN ['Change', 'Incident', 'Interaction']
RETURN  ot.objectType as objectType, n.self_contained as self_contained, n.overlapped as overlapped, count(n) as count ORDER BY objectType, self_contained, overlapped
'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,objectType,self_contained,overlapped,count
0,Change,False,False,11932
1,Change,False,True,534
2,Change,True,False,20751
3,Incident,False,False,72980
4,Incident,False,True,7812
5,Incident,True,False,11944
6,Interaction,False,False,142114
7,Interaction,False,True,16732
8,Interaction,True,False,134260


# Enclosing objects
e.g. (Start Incident) -> (Start Interaction) -> (End Interaction) -> (End Incident)
for single CI_SC

In [109]:
enclosing_objects = []

for i in range(1, 20):
    query = '''
    MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle4)
    MATCH (hle2) - [:DF {objectType: hle2.activity}] -> (hle3)
    MATCH (hle:HighLevelEvent) - [df:DF  {objectType: 'CI_SC'}] -> (hle2:HighLevelEvent)
    - [:DF*$i  {objectType: 'CI_SC', id:df.id}] -> (hle3:HighLevelEvent)
    - [:DF  {objectType: 'CI_SC', id:df.id}] -> (hle4:HighLevelEvent)
    MATCH (hle) - [:CORR] -> (n) - [:IS_OF_TYPE] - (:ObjectType {objectType: hle.activity})
    SET n.enclosing = True
    RETURN hle.activity as activity, hle2.activity as second_activity, count(distinct n) as enclosing
    '''

    query = Query(
        query_str=query,
        template_string_parameters={'i': i}
    )

    enclosing_object = db_connection.exec_query(query)
    enclosing_objects.extend(enclosing_object)
    print(f"Fixed {i} iteration, found {sum([object['enclosing'] for object in enclosing_object])} objects")

result = pd.DataFrame(enclosing_objects)


Fixed 1 iteration, found 8804 objects
Fixed 2 iteration, found 807 objects
Fixed 3 iteration, found 1755 objects
Fixed 4 iteration, found 322 objects
Fixed 5 iteration, found 921 objects
Fixed 6 iteration, found 226 objects
Fixed 7 iteration, found 603 objects
Fixed 8 iteration, found 172 objects
Fixed 9 iteration, found 434 objects
Fixed 10 iteration, found 128 objects
Fixed 11 iteration, found 351 objects
Fixed 12 iteration, found 152 objects
Fixed 13 iteration, found 276 objects
Fixed 14 iteration, found 115 objects
Fixed 15 iteration, found 251 objects
Fixed 16 iteration, found 100 objects
Fixed 17 iteration, found 204 objects
Fixed 18 iteration, found 81 objects
Fixed 19 iteration, found 164 objects


In [110]:
result = pd.pivot_table(
    result,
    index=['activity', 'second_activity'],
    aggfunc='sum'
)
result

Unnamed: 0_level_0,Unnamed: 1_level_0,enclosing
activity,second_activity,Unnamed: 2_level_1
Change,Change,379
Change,Incident,1
Change,Interaction,41
Incident,Change,31
Incident,Incident,86
Incident,Interaction,858
Interaction,Change,7
Interaction,Incident,10655
Interaction,Interaction,3808


In [111]:
query = '''
MATCH (n) <- [:CORR] - (:HighLevelEvent)
MATCH (n) - [:IS_OF_TYPE] - (ot:ObjectType)
WHERE ot.objectType IN ['Change', 'Incident', 'Interaction']
RETURN  ot.objectType as objectType, n.self_contained as self_contained, n.overlapped as overlapped, n.enclosing as enclosing, count(n) as count ORDER BY objectType, self_contained, overlapped
'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,objectType,self_contained,overlapped,enclosing,count
0,Change,False,False,,11102
1,Change,False,False,True,830
2,Change,False,True,,528
3,Change,False,True,True,6
4,Change,True,False,,20751
5,Incident,False,False,,71030
6,Incident,False,False,True,1950
7,Incident,False,True,,7812
8,Incident,True,False,,11944
9,Interaction,False,False,,113174


# CREATE CONTAINMENT GRAPH

In [None]:
# build index for high-level events
qCreateIndexHLE_sys = f'''
    CREATE INDEX higherLevelEventSysIdIndex IF NOT EXISTS FOR (h:HigherLevelEvent) ON (h.sysId);
'''
db_connection.exec_query(qCreateIndexHLE_sys)

qCreateIndexHLE_start = f'''
    CREATE INDEX higherLevelEventStartTimeIndex IF NOT EXISTS FOR (h:HigherLevelEvent) ON (h.startTime);
'''
db_connection.exec_query(qCreateIndexHLE_start)
qCreateIndexHLE_end = f'''
    CREATE INDEX higherLevelEventEndTimeIndex IF NOT EXISTS FOR (h:HigherLevelEvent) ON (h.endTime);
'''
db_connection.exec_query(qCreateIndexHLE_end)


In [117]:
query = '''
:auto
    MATCH (hle_start:HighLevelEvent) - [:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
    WHERE NOT EXISTS ((hle_start) <- [:CONTAINS] - (:HigherLevelEvent))
    CALL (hle_start, hle_end) {
        CREATE (hler:HigherLevelEvent {sysId: "HLE_" + hle_start.sysId + "_" + hle_end.sysId, activity: hle_start.activity, startTime: hle_start.timestamp, endTime: hle_end.timestamp})
        CREATE (hler) - [:CONTAINS] -> (hle_start)
        CREATE (hler) - [:CONTAINS] -> (hle_end)
    } IN TRANSACTIONS
    RETURN hle_start.activity, count(hle_start)
'''
db_connection.exec_query(query)

[{'hle_start.activity': 'Change', 'count(hle_start)': 7190},
 {'hle_start.activity': 'Incident', 'count(hle_start)': 55608},
 {'hle_start.activity': 'Interaction', 'count(hle_start)': 176121}]

In [None]:
query = '''
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH (hle_start) - [df:DF {objectType: 'CI_SC'}] -> (hle_end) - [df2:DF {objectType: 'CI_SC', id: df.id}] -> (new_hle_start:HighLevelEvent)
MATCH (new_hle_start)  - [new_df:DF {objectType: new_hle_start.activity}] -> (new_hle_end:HighLevelEvent)
MATCH (next_hler) - [:CONTAINS] -> (new_hle_start)
CALL (hler, next_hler, df) {
    MERGE (hler) - [:DF {type: 'subsequent', objectType: 'CI_SC', id: df.id}] -> (next_hler)
} IN TRANSACTIONS

'''
db_connection.exec_query(query)

In [120]:
query = '''
:auto
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH (hle_start) - [df:DF {objectType: 'CI_SC'}] -> (hle_other_start) WHERE hle_end <> hle_other_start
MATCH (hle_other_start) - [df_act_other {objectType: hle_other_start.activity}] -> (hle_other_end)
WITH hler, hle_start, hle_end, hle_other_start, hle_other_end, df_act, df, df_act_other, CASE
WHEN hle_other_end.timestamp <= hle_end.timestamp THEN 'contains'
ELSE 'overlaps' END AS type
MATCH (next_hler:HigherLevelEvent) - [:CONTAINS] -> (hle_other_start)
CALL (hler, next_hler, df, type) {
    MERGE (hler) - [new_df:DF {objectType: 'CI_SC', id: df.id}] -> (next_hler)
    SET new_df.type = type
} IN TRANSACTIONS

'''
db_connection.exec_query(query)

[]

In [121]:
query = '''
:auto
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH (hle_end) - [df:DF {objectType: 'CI_SC'}] -> (hle_other_end)
MATCH (hle_other_start) - [df_act_other {objectType: hle_other_start.activity}] -> (hle_other_end)
WITH hler, hle_start, hle_end, hle_other_start, hle_other_end, df_act, df, df_act_other, CASE
WHEN hle_other_start.timestamp <= hle_start.timestamp THEN 'contains'
ELSE 'overlaps' END AS type
MATCH (next_hler:HigherLevelEvent) - [:CONTAINS] -> (hle_other_start)
CALL (hler, next_hler, df, type) {
    MERGE (hler) - [new_df:DF {objectType: 'CI_SC', id: df.id}] -> (next_hler)
    SET new_df.type = type
} IN TRANSACTIONS

'''
db_connection.exec_query(query)

[]

In [123]:
query = '''
:auto
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH (hle_end) - [df:DF {objectType: 'CI_SC'}] -> (hle_other_start)
MATCH (hle_other_start) - [df_act_other {objectType: hle_other_start.activity}] -> (hle_other_end)
MATCH (next_hler:HigherLevelEvent) - [:CONTAINS] -> (hle_other_start)
CALL (hler, next_hler, df) {
    MERGE (hler) - [new_df:DF {objectType: 'CI_SC', id: df.id}] -> (next_hler)
    SET new_df.type = 'subsequent'
} IN TRANSACTIONS

'''
db_connection.exec_query(query)

[]