In [5]:
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option('display.width', 2000)
%matplotlib inline


In [6]:
case_study = 'bpic14'
load = False


In [7]:
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [8]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

In [9]:
if load:
    # Import the original modeled data
    %run./ 0_analysis_and_model.ipynb

In [10]:
# infer start and end events for each object type
objectTypes = ["Change", "Incident", "Interaction"]
for objectType in objectTypes:
    print(f"Inferring start and end events for object type: {objectType}")

    qStartEvent = f'''
    // Infer start event of an object
    MATCH (n:{objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT ()-[:DF {{id:n.sysId}}]->(e)
    MERGE (n)<-[:START]-(e)
    '''
    result = db_connection.exec_query(qStartEvent)

    qEndEvent = f'''
    // Infer start event of an object
    MATCH (n:{objectType})
    MATCH (n)<-[c:CORR]-(e:Event) WHERE NOT (e)-[:DF {{id:n.sysId}}]->()
    MERGE (n)<-[:END]-(e)
    '''
    db_connection.exec_query(qEndEvent)

Inferring start and end events for object type: Change
Inferring start and end events for object type: Incident
Inferring start and end events for object type: Interaction


In [7]:
# Explore start and end activities for each type

# infer start and end events for each object type
print(f"Inferring start and end events activities per object type")

qNumberStart = f'''
// Infer start event of an object
MATCH (n)<-[:START]-(e:Event)
RETURN labels(n)[0] as label, e.activity as start_activity, count(e) as number_of_start_events order by label, number_of_start_events DESC
'''
start_result = pd.DataFrame(db_connection.exec_query(qNumberStart))


qNumberEnd = f'''
// Infer start event of an object
MATCH (n)<-[:END]-(e:Event)
RETURN labels(n)[0] as label, e.activity as end_activity, count(e) as number_of_end_events order by label, number_of_end_events DESC
'''
end_result = pd.DataFrame(db_connection.exec_query(qNumberEnd))

Inferring start and end events activities per object type


In [8]:
start_result

Unnamed: 0,label,start_activity,number_of_start_events
0,Change,Start,16610
1,Change,End,1
2,Incident,Opened,46368
3,Incident,Update,15
4,Incident,Assignment,11
5,Incident,Closed,10
6,Incident,Status Change,10
7,Incident,Open,10
8,Incident,Operator Update,6
9,Incident,Caused By CI,4


In [9]:
end_result

Unnamed: 0,label,end_activity,number_of_end_events
0,Change,End,16607
1,Change,Start,4
2,Incident,Closed,27515
3,Incident,Caused By CI,15524
4,Incident,Quality Indicator Fixed,2654
5,Incident,Quality Indicator,675
6,Incident,Update,20
7,Incident,Assignment,14
8,Incident,Status Change,10
9,Incident,Open,10


In [10]:
# CHECK the cardinality between incident and interaction

query = '''
MATCH (int:Interaction)
OPTIONAL MATCH (int) - [:RELATED_INCIDENT] -> (inc:Incident)
WITH int, count(inc) as number_of_related_incidents
RETURN number_of_related_incidents, count(int) ORDER BY number_of_related_incidents
'''
pd.DataFrame(db_connection.exec_query(query))



Unnamed: 0,number_of_related_incidents,count(int)
0,0,95250
1,1,51303


All Interactions have at most one incident

In [11]:
# CHECK the cardinality between incident and interaction

query = '''
MATCH (inc:Incident)
OPTIONAL MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc)
WITH inc, count(int) as number_of_related_interactions
RETURN number_of_related_interactions, count(inc) ORDER BY number_of_related_interactions
'''
pd.DataFrame(db_connection.exec_query(query))



Unnamed: 0,number_of_related_interactions,count(inc)
0,0,1008
1,1,42532
2,2,2625
3,3,422
4,4,110
5,5,31
6,6,17
7,7,7
8,8,5
9,9,8


In [12]:
query = '''
    MATCH (sc:CI_SC)
    RETURN  EXISTS ((sc) <- [:AFFECTED_CI_SC] - (:Interaction)) as has_interaction,
            EXISTS ((sc) <- [:AFFECTED_CI_SC] - (:Change)) as has_change,
            EXISTS ((sc) <- [:AFFECTED_CI_SC] - (:Incident)) as has_incident,
    count(sc) as count ORDER BY has_interaction, has_change, has_incident
'''

result = pd.DataFrame(db_connection.exec_query(query))
result

Unnamed: 0,has_interaction,has_change,has_incident,count
0,False,False,False,1025
1,False,False,True,472
2,False,True,False,9584
3,False,True,True,32
4,True,False,False,1542
5,True,False,True,2001
6,True,True,False,83
7,True,True,True,568


In [23]:

import plotly.express as px

fig = px.treemap(
    result,
    path = [px.Constant("all"), "has_change", "has_interaction", "has_incident"],
    values = "count"
)
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show();

In [41]:
query = '''
    MATCH (sc:CI_SC) - [:RELATED_CI] - (ci:ConfigurationItem)
    RETURN  collect(distinct ci.ciType + ' ' + ci.ciSubtype) as types,
            size(collect(distinct ci.ciType + ' ' + ci.ciSubtype)) as size,
            EXISTS ((sc) <- [:AFFECTED_CI_SC] - (:Interaction)) as has_interaction,
            EXISTS ((sc) <- [:AFFECTED_CI_SC] - (:Change)) as has_change,
            EXISTS ((sc) <- [:AFFECTED_CI_SC] - (:Incident)) as has_incident,
    count(sc) as count ORDER BY has_interaction, has_change, has_incident
'''

result = pd.DataFrame(db_connection.exec_query(query))
result

Unnamed: 0,types,size,has_interaction,has_change,has_incident,count
0,"[application Standard Application, subapplicat...",40,False,False,False,1024
1,"[computer VDI, application Server Based Applic...",24,False,False,True,472
2,"[subapplication Web Based Application, applica...",77,False,True,False,9584
3,"[storage Controller, application Server Based ...",13,False,True,True,32
4,"[application Server Based Application, compute...",37,True,False,False,1542
5,"[application Server Based Application, subappl...",54,True,False,True,2001
6,"[storage Controller, application Server Based ...",17,True,True,False,83
7,"[hardware MigratieDummy, software Automation S...",47,True,True,True,568


In [47]:
# ARE INCIDENT + INTERACTION on the same CI_SC
# Look at timeline CI_SC!!
# Push into sequence, from one change to another.
# ORDERING OR SET (dependent on perspective CHANGE/INCIDENT).
# is there always one CI_SC picked or is a random distribution.
# changes over multiple CI_SC, Incident/Interaction on multiple CI_SC.

query = '''MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc:Incident)
CALL (inc) {
    MATCH (e:Event) - [:CORR] -> (all_int:Interaction) - [:RELATED_INCIDENT] -> (inc)
    RETURN e as first_int_event ORDER BY e.timestamp LIMIT 1
}

CALL (inc) {
    MATCH (e:Event) - [:CORR] -> (inc)
    RETURN e as first_inc_event ORDER BY e.timestamp LIMIT 1
}

RETURN count(distinct inc) as number_of_inc, first_int_event.timestamp <= first_inc_event.timestamp as int_before_inc'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,number_of_inc,int_before_inc
0,45291,True
1,126,False


Most Incidents (99.7%) have a preceding Interaction.
Hypothesis, these incidents were caused by an interaction preceding the event collection.


Incidents are CAUSED by an interaction if there was no incident yet, but otherwise we refer to an already opened incident.


In [46]:

query = '''MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc:Incident)
CALL (inc) {
    MATCH (e:Event) - [:CORR] -> (first_int:Interaction) - [:RELATED_INCIDENT] -> (inc)
    RETURN first_int, e as first_int_event ORDER BY e.timestamp LIMIT 1
}
RETURN inc.sysId, first_int.sysId, first_int_event LIMIT 10

'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,inc.sysId,first_int.sysId,first_int_event
0,IM0000299,SD0000456,"{'interactionId': 'SD0000456', 'activity': 'Op..."
1,IM0000301,SD0000459,"{'interactionId': 'SD0000459', 'activity': 'Op..."
2,IM0000303,SD0000458,"{'interactionId': 'SD0000458', 'activity': 'Op..."
3,IM0000304,SD0000462,"{'interactionId': 'SD0000462', 'activity': 'Op..."
4,IM0000305,SD0000466,"{'interactionId': 'SD0000466', 'activity': 'Op..."
5,IM0000306,SD0000467,"{'interactionId': 'SD0000467', 'activity': 'Op..."
6,IM0000307,SD0000471,"{'interactionId': 'SD0000471', 'activity': 'Op..."
7,IM0000308,SD0000477,"{'interactionId': 'SD0000477', 'activity': 'Op..."
8,IM0000308,SD0000477,"{'interactionId': 'SD0000477', 'activity': 'Op..."
9,IM0000309,SD0000481,"{'interactionId': 'SD0000481', 'activity': 'Op..."


In [31]:
query = '''MATCH (int:Interaction) - [:RELATED_INCIDENT] -> (inc:Incident)
CALL (inc) {
    MATCH (e:Event) - [:CORR] -> (int:Interaction) - [:RELATED_INCIDENT] -> (inc)
    RETURN e as first_int_event ORDER BY e.timestamp LIMIT 1
}

CALL (inc) {
    MATCH (e:Event) - [:CORR] -> (inc)
    RETURN e as first_inc_event ORDER BY e.timestamp LIMIT 1
}
WITH int, inc, first_int_event, first_inc_event
WHERE first_int_event.timestamp > first_inc_event.timestamp
RETURN inc.sysId, duration.inSeconds(first_inc_event.timestamp, first_int_event.timestamp).minutes as dif_minutes'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,inc.sysId,dif_minutes
0,IM0000320,35828
1,IM0000321,47362
2,IM0000321,47362
3,IM0000321,47362
4,IM0000322,38678
...,...,...
225,IM0046889,67
226,IM0046889,67
227,IM0046889,67
228,IM0046889,67
