In [1]:
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option('display.width', 2000)
%matplotlib inline


In [2]:
case_study = 'bpic14'
load = False
infer_high_level_events = False


In [3]:
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [4]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

# Check whether the RELATED_INCIDENT has CI_SC as foreign key.
Be more precise, Ava!

In [5]:
query_check_related = '''
    MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc:Incident)
    MATCH (int) - [:AFFECTED_CI_SC] -> (ci_sc_int:CI_SC)
    MATCH (inc) - [:AFFECTED_CI_SC] -> (ci_sc_inc:CI_SC)
    RETURN ci_sc_int = ci_sc_inc, count(inc)
'''

pd.DataFrame(db_connection.exec_query(query_check_related))

Unnamed: 0,ci_sc_int = ci_sc_inc,count(inc)
0,True,49006
1,False,1733


In [6]:
query_check_related = '''
    MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc:Incident)
    MATCH (int) - [:AFFECTED_CI_SC] -> (ci_sc_int:CI_SC)
    MATCH (inc) - [:AFFECTED_CI_SC] -> (ci_sc_inc:CI_SC)
    WHERE ci_sc_int <> ci_sc_inc
    RETURN int.sysId, inc.sysId order by inc.sysId
'''

pd.DataFrame(db_connection.exec_query(query_check_related))

Unnamed: 0,int.sysId,inc.sysId
0,SD0000479,IM0000310
1,SD0000504,IM0000328
2,SD0000584,IM0000391
3,SD0004713,IM0000391
4,SD0025853,IM0000393
...,...,...
1728,SD0145941,IM0046710
1729,SD0145961,IM0046710
1730,SD0146005,IM0046805
1731,SD0146833,IM0046857


Not all are related

# Check for PATTERNS

Self-contained objects
e.g. (Change Start) -> (Change End) for the same Change for single CI_SC

In [7]:
query = '''
MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle1)
MATCH (hle) - [:CORR] -> (ci_sc:CI_SC)
MATCH (hle) - [:DF {objectType: 'CI_SC', id: ci_sc.sysId}] -> (hle1)
RETURN ci_sc.sysId as sysId, hle.activity as activity, count(hle) as contained ORDER BY sysId

'''

result = pd.DataFrame(db_connection.exec_query(query))
result

Unnamed: 0,sysId,activity,contained
0,#N/B_WBS000284,Interaction,12
1,ACS000001_WBS000252,Incident,1
2,ADB000001_WBS000253,Change,2
3,ADB000002_WBS000253,Change,1
4,ADB000003_WBS000253,Change,2
...,...,...,...
13376,ZOS000029_WBS000199,Change,1
13377,ZOS000030_WBS000199,Change,1
13378,ZOS000031_WBS000199,Change,1
13379,ZOS000032_WBS000199,Change,1


In [8]:
query = '''
MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle1)
MATCH (hle) - [:CORR] -> (ci_sc:CI_SC)
RETURN ci_sc.sysId as sysId, hle.activity as activity, count(hle) as events ORDER BY sysId

'''

result_events = pd.DataFrame(db_connection.exec_query(query))
result_events

Unnamed: 0,sysId,activity,events
0,#N/B_WBS000284,Interaction,16
1,ACS000001_WBS000252,Incident,1
2,ACS000001_WBS000252,Interaction,1
3,ADB000001_WBS000253,Change,2
4,ADB000002_WBS000253,Change,1
...,...,...,...
16108,ZOS000029_WBS000199,Change,1
16109,ZOS000030_WBS000199,Change,1
16110,ZOS000031_WBS000199,Change,1
16111,ZOS000032_WBS000199,Change,1


In [9]:
combined_result = result.merge(result_events, on=['sysId', 'activity'])
combined_result['%'] = round(combined_result['contained']/combined_result['events']*100, 2)
combined_result

Unnamed: 0,sysId,activity,contained,events,%
0,#N/B_WBS000284,Interaction,12,16,75.0
1,ACS000001_WBS000252,Incident,1,1,100.0
2,ADB000001_WBS000253,Change,2,2,100.0
3,ADB000002_WBS000253,Change,1,1,100.0
4,ADB000003_WBS000253,Change,2,2,100.0
...,...,...,...,...,...
13376,ZOS000029_WBS000199,Change,1,1,100.0
13377,ZOS000030_WBS000199,Change,1,1,100.0
13378,ZOS000031_WBS000199,Change,1,1,100.0
13379,ZOS000032_WBS000199,Change,1,1,100.0


In [10]:
# Pivot the DataFrame
pivot_df = combined_result.pivot(
    index=['sysId'],
    columns='activity',
    values=['contained', 'events', '%']
).reset_index()

columns = [pivot_df.columns[0]]
for object_type in ['Change', 'Incident', 'Interaction']:
    columns.extend([('contained', object_type), ('events', object_type), ('%', object_type)])

pivot_df = pivot_df[columns]
pivot_df = pivot_df.fillna(0)
pivot_df

Unnamed: 0_level_0,sysId,contained,events,%,contained,events,%,contained,events,%
activity,Unnamed: 1_level_1,Change,Change,Change,Incident,Incident,Incident,Interaction,Interaction,Interaction
0,#N/B_WBS000284,0.0,0.0,0.0,0.0,0.0,0.0,12.0,16.0,75.0
1,ACS000001_WBS000252,0.0,0.0,0.0,1.0,1.0,100.0,0.0,0.0,0.0
2,ADB000001_WBS000253,2.0,2.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
3,ADB000002_WBS000253,1.0,1.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ADB000003_WBS000253,2.0,2.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
12267,ZOS000029_WBS000199,1.0,1.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
12268,ZOS000030_WBS000199,1.0,1.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
12269,ZOS000031_WBS000199,1.0,1.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
12270,ZOS000032_WBS000199,1.0,1.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
query = '''
MATCH (hle:HighLevelEvent) - [:DF {objectType: hle.activity}] -> (activity_hle:HighLevelEvent)
MATCH (hle) - [:DF {objectType: 'CI_SC'}] -> (other_hle)
MATCH (hle) - [:CORR] -> (n) - [:IS_OF_TYPE] - (ot:ObjectType {objectType: hle.activity})
WHERE activity_hle <> other_hle
SET n.self_contained = False
RETURN hle.activity as activity, count(distinct n) as not_self_contained
'''

result_not_contained_objects = pd.DataFrame(db_connection.exec_query(query))
result_not_contained_objects

Unnamed: 0,activity,not_self_contained
0,Incident,40396
1,Interaction,79423
2,Change,6233


In [82]:
query = '''
MATCH (hle:HighLevelEvent) - [:CORR] -> (n) - [:IS_OF_TYPE] - (ot:ObjectType {objectType: hle.activity})
WHERE n.self_contained IS NULL OR n.self_contained = True
SET n.self_contained = True
RETURN hle.activity as activity, count(distinct n) as self_contained
'''

result_contained_objects = pd.DataFrame(db_connection.exec_query(query))
result_contained_objects

Unnamed: 0,activity,self_contained
0,Incident,5972
1,Interaction,67130
2,Change,10378


In [85]:
result_objects = result_contained_objects.merge(result_not_contained_objects, on='activity')
result_objects['sum_total'] = result_objects['self_contained'] + result_objects['not_self_contained']
result_objects['% contained'] = round(result_objects['self_contained']/result_objects['sum_total']*100,2)
result_objects

Unnamed: 0,activity,self_contained,not_self_contained,sum_total,% contained
0,Incident,5972,40396,46368,12.88
1,Interaction,67130,79423,146553,45.81
2,Change,10378,6233,16611,62.48


# Overlapping activities
e.g. (Start Incident) -> (Start Interaction) -> (End Incident) -> (End Interaction)
for single CI_SC

In [88]:
query = '''
MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle3)
MATCH (hle2) - [:DF {objectType: hle2.activity}] -> (hle4)
MATCH (hle:HighLevelEvent) - [df:DF  {objectType: 'CI_SC'}] -> (hle2:HighLevelEvent)
- [:DF  {objectType: 'CI_SC', id:df.id}] -> (hle3:HighLevelEvent)
- [:DF  {objectType: 'CI_SC', id:df.id}] -> (hle4:HighLevelEvent)
MATCH (hle) - [:CORR] -> (n) - [:IS_OF_TYPE] - (:ObjectType {objectType: hle.activity})
MATCH (hle2) - [:CORR] -> (n2) - [:IS_OF_TYPE] - (:ObjectType {objectType: hle2.activity})
SET n.overlapped = True
SET n2.overlapped = True
RETURN hle.activity as activity, hle2.activity as second_activity, count(distinct n) as overlapped
'''

overlapped_objects = pd.DataFrame(db_connection.exec_query(query))
overlapped_objects

Unnamed: 0,activity,second_activity,overlapped
0,Interaction,Incident,3863
1,Incident,Incident,14
2,Change,Incident,1
3,Incident,Interaction,14
4,Interaction,Interaction,2241
5,Change,Interaction,2
6,Change,Change,131
7,Interaction,Change,5


In [89]:
query = '''
MATCH (hle:HighLevelEvent) - [:CORR] -> (n) - [:IS_OF_TYPE] - (ot:ObjectType {objectType: hle.activity})
WHERE n.overlapped IS NULL OR n.overlapped = False
SET n.overlapped = False
RETURN hle.activity as activity, count(distinct n) as not_overlapped
'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,activity,not_overlapped
0,Incident,42462
1,Interaction,138187
2,Change,16344


In [92]:
query = '''
MATCH (n) <- [:CORR] - (:HighLevelEvent)
MATCH (n) - [:IS_OF_TYPE] - (ot:ObjectType)
WHERE ot.objectType IN ['Change', 'Incident', 'Interaction']
RETURN  ot.objectType as objectType, n.self_contained as self_contained, n.overlapped as overlapped, count(n) as count ORDER BY objectType, self_contained, overlapped
'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,objectType,self_contained,overlapped,count
0,Change,False,False,11932
1,Change,False,True,534
2,Change,True,False,20751
3,Incident,False,False,72980
4,Incident,False,True,7812
5,Incident,True,False,11944
6,Interaction,False,False,142114
7,Interaction,False,True,16732
8,Interaction,True,False,134260


# Enclosing objects
e.g. (Start Incident) -> (Start Interaction) -> (End Interaction) -> (End Incident)
for single CI_SC

In [109]:
enclosing_objects = []

for i in range(1, 20):
    query = '''
    MATCH (hle) - [:DF {objectType: hle.activity}] -> (hle4)
    MATCH (hle2) - [:DF {objectType: hle2.activity}] -> (hle3)
    MATCH (hle:HighLevelEvent) - [df:DF  {objectType: 'CI_SC'}] -> (hle2:HighLevelEvent)
    - [:DF*$i  {objectType: 'CI_SC', id:df.id}] -> (hle3:HighLevelEvent)
    - [:DF  {objectType: 'CI_SC', id:df.id}] -> (hle4:HighLevelEvent)
    MATCH (hle) - [:CORR] -> (n) - [:IS_OF_TYPE] - (:ObjectType {objectType: hle.activity})
    SET n.enclosing = True
    RETURN hle.activity as activity, hle2.activity as second_activity, count(distinct n) as enclosing
    '''

    query = Query(
        query_str=query,
        template_string_parameters={'i': i}
    )

    enclosing_object = db_connection.exec_query(query)
    enclosing_objects.extend(enclosing_object)
    print(f"Fixed {i} iteration, found {sum([object['enclosing'] for object in enclosing_object])} objects")

result = pd.DataFrame(enclosing_objects)


Fixed 1 iteration, found 8804 objects
Fixed 2 iteration, found 807 objects
Fixed 3 iteration, found 1755 objects
Fixed 4 iteration, found 322 objects
Fixed 5 iteration, found 921 objects
Fixed 6 iteration, found 226 objects
Fixed 7 iteration, found 603 objects
Fixed 8 iteration, found 172 objects
Fixed 9 iteration, found 434 objects
Fixed 10 iteration, found 128 objects
Fixed 11 iteration, found 351 objects
Fixed 12 iteration, found 152 objects
Fixed 13 iteration, found 276 objects
Fixed 14 iteration, found 115 objects
Fixed 15 iteration, found 251 objects
Fixed 16 iteration, found 100 objects
Fixed 17 iteration, found 204 objects
Fixed 18 iteration, found 81 objects
Fixed 19 iteration, found 164 objects


In [110]:
result = pd.pivot_table(
    result,
    index=['activity', 'second_activity'],
    aggfunc='sum'
)
result

Unnamed: 0_level_0,Unnamed: 1_level_0,enclosing
activity,second_activity,Unnamed: 2_level_1
Change,Change,379
Change,Incident,1
Change,Interaction,41
Incident,Change,31
Incident,Incident,86
Incident,Interaction,858
Interaction,Change,7
Interaction,Incident,10655
Interaction,Interaction,3808


In [111]:
query = '''
MATCH (n) <- [:CORR] - (:HighLevelEvent)
MATCH (n) - [:IS_OF_TYPE] - (ot:ObjectType)
WHERE ot.objectType IN ['Change', 'Incident', 'Interaction']
RETURN  ot.objectType as objectType, n.self_contained as self_contained, n.overlapped as overlapped, n.enclosing as enclosing, count(n) as count ORDER BY objectType, self_contained, overlapped
'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,objectType,self_contained,overlapped,enclosing,count
0,Change,False,False,,11102
1,Change,False,False,True,830
2,Change,False,True,,528
3,Change,False,True,True,6
4,Change,True,False,,20751
5,Incident,False,False,,71030
6,Incident,False,False,True,1950
7,Incident,False,True,,7812
8,Incident,True,False,,11944
9,Interaction,False,False,,113174


# CREATE CONTAINMENT GRAPH

In [None]:
# build index for high-level events
qCreateIndexHLE_sys = f'''
    CREATE INDEX higherLevelEventSysIdIndex IF NOT EXISTS FOR (h:HigherLevelEvent) ON (h.sysId);
'''
db_connection.exec_query(qCreateIndexHLE_sys)

qCreateIndexHLE_start = f'''
    CREATE INDEX higherLevelEventStartTimeIndex IF NOT EXISTS FOR (h:HigherLevelEvent) ON (h.startTime);
'''
db_connection.exec_query(qCreateIndexHLE_start)
qCreateIndexHLE_end = f'''
    CREATE INDEX higherLevelEventEndTimeIndex IF NOT EXISTS FOR (h:HigherLevelEvent) ON (h.endTime);
'''
db_connection.exec_query(qCreateIndexHLE_end)


In [16]:
query = '''
:auto
    MATCH (hle_start:HighLevelEvent) - [:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
    WHERE NOT EXISTS ((hle_start) <- [:CONTAINS] - (:HigherLevelEvent))
    CALL (hle_start, hle_end) {
        CREATE (hler:HigherLevelEvent {sysId: "HLE_" + hle_start.sysId + "_" + hle_end.sysId, activity: hle_start.activity, startTime: hle_start.timestamp, endTime: hle_end.timestamp})
        CREATE (hler) - [:CONTAINS] -> (hle_start)
        CREATE (hler) - [:CONTAINS] -> (hle_end)
    } IN TRANSACTIONS
    RETURN hle_start.activity, count(hle_start)
'''
db_connection.exec_query(query)

[{'hle_start.activity': 'Change', 'count(hle_start)': 16606},
 {'hle_start.activity': 'Incident', 'count(hle_start)': 46368},
 {'hle_start.activity': 'Interaction', 'count(hle_start)': 146553}]

In [98]:
query = '''
:auto
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH (hle_prev) - [df:DF {objectType: 'CI_SC'}] -> (hle_start)
MATCH (hle_prev) - [df_act_other {objectType: hle_prev.activity}] - (hle_prev2)
WITH hler, hle_start, hle_end, hle_prev, hle_prev2, df, CASE
// hle_prev.timestamp <= hle_start.timestamp per definition (and hle_prev2 cannot be between hle_prev and hle_start, so no hle_prev < hle_prev2 < hle_start)
WHEN hle_prev2.timestamp <= hle_prev.timestamp THEN 'precedes' // both events happen before hle: hle_prev2 < hle_prev < hle_start < hle_end
// hle_prev_2 happens after hle_prev, and therefore after hle_start
WHEN hle_end.timestamp <= hle_prev2.timestamp THEN 'contains' // hle is contained in hle_prev: hle_prev < hle_start < hle_end < hle_prev2
WHEN hle_prev2.timestamp < hle_end.timestamp THEN 'overlaps' // hle overlaps with hle_prev: hle_prev < hle_start < hle_prev2 < hle_end
ELSE 'undefined'
END AS transition_type
MATCH (prev_hler:HigherLevelEvent) - [:CONTAINS] -> (hle_prev)
CALL (prev_hler, hler, df, transition_type) {
    MERGE (prev_hler) - [new_df:DF {objectType: 'CI_SC', id: df.id}] -> (hler)
    SET new_df.type = transition_type
} IN TRANSACTIONS
RETURN transition_type, count(prev_hler)
'''

db_connection.exec_query(query)

[{'transition_type': 'precedes', 'count(prev_hler)': 117323},
 {'transition_type': 'contains', 'count(prev_hler)': 51146},
 {'transition_type': 'overlaps', 'count(prev_hler)': 38219}]

[{'transition_type': 'contains', 'count(next_hler)': 67049},
 {'transition_type': 'overlaps', 'count(next_hler)': 30161}]

[]

In [33]:
query = '''
:auto
MATCH (hler1:HigherLevelEvent) - [df:DF {objectType: 'CI_SC'}] -> (hler2:HigherLevelEvent)
RETURN hler1.activity as first_activity, hler2.activity as second_activity, df.type as type, count(df) as count order by type, count desc
'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,first_activity,second_activity,type,count
0,Interaction,Incident,contains,18329
1,Incident,Interaction,contains,15302
2,Interaction,Interaction,contains,14254
3,Incident,Incident,contains,1552
4,Change,Change,contains,1058
5,Incident,Change,contains,348
6,Change,Interaction,contains,207
7,Interaction,Change,contains,87
8,Change,Incident,contains,9
9,Interaction,Incident,overlaps,18059


In [80]:
query = '''
:auto
MATCH (hler1:HigherLevelEvent) - [df:DF {objectType: 'CI_SC'}] -> (hler2:HigherLevelEvent)
MATCH (ci_sc:CI_SC {sysId:df.id})
RETURN ci_sc.type as type, ci_sc.subtype as subtype, count(distinct ci_sc) as num_ci
'''
total_result = pd.DataFrame(db_connection.exec_query(query))
total_result

Unnamed: 0,type,subtype,num_ci
0,networkcomponents,Switch,225
1,application,Server Based Application,592
2,software,System Software,223
3,database,RAC Service,80
4,computer,Linux Server,602
...,...,...,...
82,storage,Virtual Tape Server,1
83,computer,NonStop Harddisk,2
84,computer,NonStop Storage,1
85,#N/B,#N/B,1


In [81]:
query = '''
:auto
MATCH (hler1:HigherLevelEvent) - [df:DF {objectType: 'CI_SC'}] -> (hler2:HigherLevelEvent)
WHERE hler1.activity <> 'Change' AND hler2.activity = 'Change' AND df.type = 'contains'
WITH hler1, df, hler2
MATCH (ci_sc:CI_SC {sysId: df.id})
RETURN ci_sc.type as type, ci_sc.subtype as subtype, count(distinct ci_sc) as contained
'''
result = pd.DataFrame(db_connection.exec_query(query))
# result
total_result = total_result.merge(result, on=['type', 'subtype'])
total_result

Unnamed: 0,type,subtype,num_ci,contained
0,application,Server Based Application,592,70
1,software,System Software,223,2
2,computer,Linux Server,602,1
3,software,Automation Software,22,2
4,application,Web Based Application,112,25
5,computer,Windows Server,907,5
6,application,Desktop Application,273,8
7,networkcomponents,Network Component,176,1
8,hardware,DataCenterEquipment,95,4
9,networkcomponents,Protocol,2,1


In [40]:
query = '''
:auto
MATCH (ci_sc:CI_SC )
RETURN count(ci_sc)
'''
db_connection.exec_query(query)

[{'count(ci_sc)': 15307}]

Unnamed: 0,type,subtype,num_ci
0,networkcomponents,Switch,225
1,application,Server Based Application,592
2,software,System Software,223
3,database,RAC Service,80
4,computer,Linux Server,602
...,...,...,...
82,storage,Virtual Tape Server,1
83,computer,NonStop Harddisk,2
84,computer,NonStop Storage,1
85,#N/B,#N/B,1


In [82]:
query = '''
:auto
MATCH (hler1:HigherLevelEvent) - [df:DF {objectType: 'CI_SC'}] -> (hler2:HigherLevelEvent)
WHERE hler1.activity = 'Change' AND hler2.activity <> 'Change' AND df.type = 'precedes'
WITH hler1, df, hler2
MATCH (ci_sc:CI_SC {sysId: df.id})
RETURN ci_sc.type as type, ci_sc.subtype as subtype, count(distinct ci_sc) as precedes
'''
result = pd.DataFrame(db_connection.exec_query(query))

# result
total_result = total_result.merge(result, on=['type', 'subtype'])



In [85]:
total_result

Unnamed: 0,type,subtype,num_ci,contained,precedes
0,application,Server Based Application,592,70,179
1,software,System Software,223,2,6
2,computer,Linux Server,602,1,12
3,software,Automation Software,22,2,2
4,application,Web Based Application,112,25,40
5,computer,Windows Server,907,5,58
6,application,Desktop Application,273,8,34
7,networkcomponents,Network Component,176,1,4
8,hardware,DataCenterEquipment,95,4,11
9,networkcomponents,Protocol,2,1,2


In [94]:
query = '''
:auto
MATCH (hler1:HigherLevelEvent) - [df:DF {objectType: 'CI_SC'}] -> (hler2:HigherLevelEvent)
WHERE hler1.activity <> 'Change' AND hler2.activity = 'Change' AND df.type = 'contains'
WITH hler1, df, hler2
MATCH (ci_sc:CI_SC {sysId: df.id})
MATCH (ci_sc) <- [:CORR] - (hle:HighLevelEvent)
RETURN ci_sc.type as type, ci_sc.subtype as subtype, ci_sc.sysId as id, count(distinct hle) as event_count, count(distinct df) as contains
'''
contains_result = pd.DataFrame(db_connection.exec_query(query))
contains_result

Unnamed: 0,type,subtype,id,event_count,contains
0,software,Automation Software,ASW000010_WBS000284,206,9
1,application,Web Based Application,WBA000133_WBS000073,16764,21
2,application,Web Based Application,WBA000017_WBS000138,324,2
3,application,Server Based Application,SBA000678_WBS000255,46,1
4,application,Server Based Application,SBA000296_WBS000053,126,2
...,...,...,...,...,...
150,application,Web Based Application,WBA000126_WBS000148,232,1
151,application,Web Based Application,WBA000092_WBS000073,474,1
152,computer,Windows Server,WSR000834_WBS000102,10,1
153,application,Web Based Application,WBA000005_WBS000302,28,1


In [95]:
query = '''
:auto
MATCH (hler1:HigherLevelEvent) - [df:DF {objectType: 'CI_SC'}] -> (hler2:HigherLevelEvent)
WHERE hler1.activity = 'Change' AND hler2.activity <> 'Change' AND df.type = 'precedes'
WITH hler1, df, hler2
MATCH (ci_sc:CI_SC {sysId: df.id})
MATCH (ci_sc) <- [:CORR] - (hle:HighLevelEvent)
RETURN ci_sc.type as type, ci_sc.subtype as subtype, ci_sc.sysId as id,  count(distinct hle) as event_count, count(distinct df) as precedes
'''
precedes_result = pd.DataFrame(db_connection.exec_query(query))
merged_result = contains_result.merge(precedes_result, on=['type', 'subtype', 'id', 'event_count'])
merged_result


Unnamed: 0,type,subtype,id,event_count,contains,precedes
0,software,Automation Software,ASW000010_WBS000284,206,9,10
1,application,Web Based Application,WBA000133_WBS000073,16764,21,29
2,application,Web Based Application,WBA000017_WBS000138,324,2,6
3,application,Server Based Application,SBA000678_WBS000255,46,1,3
4,application,Server Based Application,SBA000296_WBS000053,126,2,7
...,...,...,...,...,...,...
135,application,Web Based Application,WBA000126_WBS000148,232,1,7
136,application,Web Based Application,WBA000092_WBS000073,474,1,3
137,computer,Windows Server,WSR000834_WBS000102,10,1,1
138,application,Web Based Application,WBA000005_WBS000302,28,1,1


In [119]:
query = '''
:auto
MATCH (hle:HighLevelEvent) - [:CORR] -> (ci_sc:CI_SC {sysId: $sysId})
WITH ci_sc, hle, EXISTS((hle) - [:DF {objectType: hle.activity} ] -> (:HighLevelEvent)) as is_open
 WITH ci_sc, hle, CASE is_open
WHEN TRUE THEN hle.activity + ' OPEN'
ELSE hle.activity + ' CLOSE' END AS activity
RETURN ci_sc.sysId as `case:concept:name`, activity as `concept:name`, hle.timestamp as `time:timestamp` ORDER BY hle.timestamp
'''

event_log_query = Query(
    query_str=query,
    parameters={'sysId': 'WBA000082_WBS000055'}
)

result = pd.DataFrame(db_connection.exec_query(event_log_query))
result

Unnamed: 0,case:concept:name,concept:name,time:timestamp
0,WBA000082_WBS000055,Interaction OPEN,2013-08-22T12:56:00.000000000+01:00
1,WBA000082_WBS000055,Incident OPEN,2013-08-22T13:09:42.000000000+01:00
2,WBA000082_WBS000055,Interaction OPEN,2013-08-26T15:13:00.000000000+01:00
3,WBA000082_WBS000055,Incident OPEN,2013-08-26T15:38:26.000000000+01:00
4,WBA000082_WBS000055,Interaction OPEN,2013-09-11T13:53:00.000000000+01:00
...,...,...,...
495,WBA000082_WBS000055,Incident CLOSE,2014-03-28T10:01:57.000000000+01:00
496,WBA000082_WBS000055,Interaction CLOSE,2014-03-28T10:02:00.000000000+01:00
497,WBA000082_WBS000055,Interaction CLOSE,2014-03-28T12:58:00.000000000+01:00
498,WBA000082_WBS000055,Incident CLOSE,2014-03-28T12:58:36.000000000+01:00


In [135]:
query = '''
:auto
MATCH (hle:HighLevelEvent) - [:CORR] -> (ci_sc:CI_SC)
WITH ci_sc, hle, EXISTS((hle) - [:DF {objectType: hle.activity} ] -> (:HighLevelEvent)) as is_open
 WITH ci_sc, hle, CASE is_open
WHEN TRUE THEN 'OPEN'
ELSE 'CLOSE' END AS open_type
SET hle.open_type = open_type
'''

result = pd.DataFrame(db_connection.exec_query(query))
result

In [None]:
# TODO COUNT NUMBER OF OPEN ITEMS OVER TIME

In [132]:
query = '''
:auto
MATCH (hle:HighLevelEvent) - [:CORR] -> (ci_sc:CI_SC {sysId: $sysId})
MATCH (hle) - [:DF {objectType: hle.activity} ] -> (hle2:HighLevelEvent)
WITH hle.activity as activity, duration.inSeconds(hle.timestamp, hle2.timestamp).seconds*1.0/60 as difference_minutes, count(hle) as count ORDER BY activity, difference_minutes
RETURN activity, min(difference_minutes), avg(difference_minutes), max(difference_minutes)
'''

event_log_query = Query(
    query_str=query,
    parameters={'sysId': 'WBA000082_WBS000055'}
)

result = pd.DataFrame(db_connection.exec_query(event_log_query))
result

Unnamed: 0,activity,min(difference_minutes),avg(difference_minutes),max(difference_minutes)
0,Change,0.0,483.333333,2880.0
1,Incident,8.35,62638.191237,230599.883333
2,Interaction,1.0,56000.455357,230627.0


In [133]:
event_log_query = Query(
    query_str=query,
    parameters={'sysId': 'ASW000010_WBS000235'}
)

result = pd.DataFrame(db_connection.exec_query(event_log_query))
result

Unnamed: 0,activity,min(difference_minutes),avg(difference_minutes),max(difference_minutes)
0,Change,0.0,1771.0,11422.0
1,Incident,6.8,5674.163636,60025.033333
2,Interaction,3.0,3688.028571,60028.0


In [131]:
query = '''
:auto
MATCH (ci_sc:CI_SC)
WITH distinct ci_sc
CALL (ci_sc) {
    MATCH (all_hle:HighLevelEvent) - [:CORR] -> (ci_sc)
    RETURN count(all_hle) as count}
MATCH (hle:HighLevelEvent) - [:CORR] -> (ci_sc:CI_SC)
MATCH (hle) - [:DF {objectType: hle.activity} ] -> (hle2:HighLevelEvent)
WITH ci_sc.sysId as id, hle.activity as activity, duration.inSeconds(hle.timestamp, hle2.timestamp).seconds*1.0/60 as difference_minutes, count ORDER BY activity, difference_minutes
RETURN id, activity, min(difference_minutes), avg(difference_minutes), max(difference_minutes), count order by id, activity
'''

event_log_query = Query(
    query_str=query,
    parameters={'sysId': 'WBA000082_WBS000055'}
)

result = pd.DataFrame(db_connection.exec_query(event_log_query))
result

Latest transaction was rolled back
This was your latest query: 

MATCH (hle:HighLevelEvent) - [:CORR] -> (ci_sc:CI_SC)
CALL (ci_sc) {
    MATCH (all_hle:HighLevelEvent) - [:CORR] -> (ci_sc)
    RETURN count(all_hle) as count}
MATCH (hle) - [:DF {objectType: hle.activity} ] -> (hle2:HighLevelEvent)
WITH ci_sc.sysId as id, hle.activity as activity, duration.inSeconds(hle.timestamp, hle2.timestamp).seconds*1.0/60 as difference_minutes, count ORDER BY activity, difference_minutes
RETURN id, activity, min(difference_minutes), avg(difference_minutes), max(difference_minutes), count order by id, activity

{neo4j_code: Neo.TransientError.General.DatabaseUnavailable} {message: The transaction has been terminated. Retry your operation in a new transaction, and you should see a successful result. The database is not currently available to serve your request, refer to the database logs for more details. Retrying your request at a later time may succeed. } {gql_status: 50N42} {gql_status_descriptio

In [114]:
import pandas as pd
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualizer
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.visualization.petri_net import visualizer as pn_visualizer
import pm4py

In [115]:
import pm4py
log = log_converter.apply(result)

In [118]:
pm4py.visualization.dotted_chart.visualizer.apply(log, attributes=['case:concept:name'])

TypeError: list indices must be integers or slices, not list

In [97]:
query = '''
:auto
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH  (hle_end) - [df:DF {objectType: 'CI_SC'}] -> (hle_next)
MATCH (next_hler:HigherLevelEvent) - [:CONTAINS] -> (hle_next)
WHERE NOT EXISTS ((hler) - [:DF] -> (next_hler))

MATCH (hle_next) - [df_act_other {objectType: hle_next.activity}] - (hle_next2)
WITH hler, next_hler, hle_start, hle_end, hle_next, hle_next2, df, CASE
//  hle_end.timestamp <= hle_next.timestamp  per definition (and hle_next2 cannot be between hle_next and hle_end, so no hle_end < hle_next2 < hle_next)
WHEN hle_next.timestamp <= hle_next2.timestamp THEN 'precedes' // both events happen after hle:  hle_start < hle_end < hle_next < hle_next2
// hle_next2 happens before hle_next, and therefore after hle_end
WHEN hle_next2.timestamp <= hle_start.timestamp THEN 'contains' // hle is contained in hle_next: hle_next_2 < hle_start < hle_end < hle_next
WHEN hle_start.timestamp < hle_next2.timestamp THEN 'overlaps' // hle overlaps with hle_next: hle_start < hle_next2 < hle_end < hle_next
ELSE 'undefined'
END AS transition_type

CALL (next_hler, hler, df, transition_type) {
    MERGE (hler) - [new_df:DF {objectType: 'CI_SC', id: df.id}] -> (next_hler)
    SET new_df.type = transition_type
} IN TRANSACTIONS
RETURN transition_type, count(next_hler)
'''

db_connection.exec_query(query)

[{'transition_type': 'contains', 'count(next_hler)': 67049},
 {'transition_type': 'overlaps', 'count(next_hler)': 30161}]

In [121]:
query = '''
:auto
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH (hle_end) - [df:DF {objectType: 'CI_SC'}] -> (hle_prev_end)
MATCH (hle_prev_start) - [df_act_other {objectType: hle_prev_start.activity}] -> (hle_prev_end)
WITH hler, hle_start, hle_end, hle_prev_start, hle_prev_end, df_act, df, df_act_other, CASE
WHEN hle_prev_start.timestamp <= hle_start.timestamp THEN 'contains'
ELSE 'overlaps' END AS type
MATCH (next_hler:HigherLevelEvent) - [:CONTAINS] -> (hle_prev_start)
CALL (hler, next_hler, df, type) {
    MERGE (hler) - [new_df:DF {objectType: 'CI_SC', id: df.id}] -> (next_hler)
    SET new_df.type = type
} IN TRANSACTIONS

'''
db_connection.exec_query(query)

[]

In [123]:
query = '''
:auto
MATCH (hler:HigherLevelEvent) - [:CONTAINS] -> (hle_start) - [df_act:DF {objectType: hle_start.activity}] -> (hle_end:HighLevelEvent)
MATCH (hle_end) - [df:DF {objectType: 'CI_SC'}] -> (hle_prev_start)
MATCH (hle_prev_start) - [df_act_other {objectType: hle_prev_start.activity}] -> (hle_prev_end)
MATCH (next_hler:HigherLevelEvent) - [:CONTAINS] -> (hle_prev_start)
CALL (hler, next_hler, df) {
    MERGE (hler) - [new_df:DF {objectType: 'CI_SC', id: df.id}] -> (next_hler)
    SET new_df.type = 'subsequent'
} IN TRANSACTIONS

'''
db_connection.exec_query(query)

[]