## Prepare Project
### Import Libraries and set up database connection

In [1]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

# Import pandas
import pandas as pd

pd.set_option('display.width', 2000)

# Import logging and suppress warnings
import logging

logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("pd").setLevel(logging.ERROR)

# Import Path
from pathlib import Path

# Import promg
from promg import Query

In [2]:
from util.db_helper_functions import get_db_connection, get_graph_statistics
from util.assign_types_functions import add_object_type_node
from util.enrichment_methods import materialize_objects, extend_relationships, build_df_edges

## Set up connection

In [3]:
conf_path = Path('bpic14', 'config.yaml')
db_connection = get_db_connection(conf_path)

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


# 3. Analysis

## Iteration 0: Filter Data based on Timestamps

_Note: We did explain this filtering in the paper as this is focused on data preparation and not so much on enrichment_

**Exploration**
Since events for the Incidents can come from two different data sources (`BPIC14Incident.csv` and `Detail_Incident_Activity.csv`) we check whether the start and end of the Incidents termporally align.

Every Incident should start with its opening which is recorded in `BPIC14Incident.csv`.
Then it should be followed by an `Open` event on the `:DetailIncident` level.

Let's check whether that is the case.

In [9]:
succeeding_events_q = '''
    MATCH (e1:Event {activity: 'Open'}) - [:IS_OF_TYPE] -> (et:EventType {eventType: 'IncidentEvent'})
    CALL (e1) {
        MATCH (e1) - [:CORR] -> (:Incident) <- [:CORR] - (e2)
        WHERE e1.timestamp <= e2.timestamp AND e1 <> e2
        MATCH (e2) - [:IS_OF_TYPE] -> (et2:EventType)
        RETURN e2, et2 ORDER BY e2.timestamp ASC LIMIT 1
    }
    RETURN et.eventType + ':' + e1.activity as first_event, et2.eventType + ':' + e2.activity as second_event, count(e1) as count order by count DESC'''

df = pd.DataFrame(db_connection.exec_query(succeeding_events_q))
print(f"Total number of events: {sum(df['count'])}")
df

Total number of events: 46606


Unnamed: 0,first_event,second_event,count
0,IncidentEvent:Open,IncidentActivityEvent:Open,46369
1,IncidentEvent:Open,IncidentActivityEvent:Assignment,60
2,IncidentEvent:Open,IncidentActivityEvent:Operator Update,46
3,IncidentEvent:Open,IncidentActivityEvent:Reassignment,42
4,IncidentEvent:Open,IncidentActivityEvent:Status Change,27
5,IncidentEvent:Open,IncidentActivityEvent:Update,22
6,IncidentEvent:Open,IncidentActivityEvent:Update from customer,18
7,IncidentEvent:Open,IncidentEvent:Resolve,4
8,IncidentEvent:Open,IncidentActivityEvent:Communication with customer,4
9,IncidentEvent:Open,IncidentActivityEvent:Analysis/Research,3


Indeed, most `(IncidentEvent:Open)` are followed by a `(IncidentActivityEvent:Open)`, however there are some 46606-46369=237 events that are not followed by such event.

Let's check the minimum timestamp of the preceding event. It could be that for some cases, the `(IncidentActivityEvent:Open)` is not recorded because it falls outside the recording period.

In [10]:
succeeding_events_timestamp_q = '''
    MATCH (e1:Event {activity: 'Open'}) - [:IS_OF_TYPE] -> (et:EventType {eventType: 'IncidentEvent'})
    CALL (e1) {
        MATCH (e1) - [:CORR] -> (:Incident) <- [:CORR] - (e2)
        WHERE e1.timestamp <= e2.timestamp AND e1 <> e2
        MATCH (e2) - [:IS_OF_TYPE] -> (et2:EventType)
        RETURN e2, et2 ORDER BY e2.timestamp, e2.activity ASC LIMIT 1
    }
    WITH e1, e2, et2
    WHERE et2.eventType = 'IncidentActivityEvent'
    WITH min(e2.timestamp) as first_occurrence, et2.eventType + ':' + e2.activity as event, count(e1) as count order by count DESC
    RETURN first_occurrence, event, count order by first_occurrence DESC'''

df = pd.DataFrame(db_connection.exec_query(succeeding_events_timestamp_q))
df

Unnamed: 0,first_occurrence,event,count
0,2013-08-19T09:59:53.000000000+01:00,IncidentActivityEvent:Open,46369
1,2013-07-15T09:58:13.000000000+01:00,IncidentActivityEvent:Reopen,1
2,2013-06-20T14:05:30.000000000+01:00,IncidentActivityEvent:Communication with customer,2
3,2013-05-23T12:50:01.000000000+01:00,IncidentActivityEvent:Description Update,3
4,2013-05-10T13:26:58.000000000+01:00,IncidentActivityEvent:Analysis/Research,4
5,2013-04-19T07:54:57.000000000+01:00,IncidentActivityEvent:Communication with vendor,1
6,2013-03-11T15:37:45.000000000+01:00,IncidentActivityEvent:Impact Change,2
7,2013-03-01T13:02:40.000000000+01:00,IncidentActivityEvent:External Vendor Assignment,3
8,2013-02-21T07:12:50.000000000+01:00,IncidentActivityEvent:Caused By CI,4
9,2013-02-11T15:08:15.000000000+01:00,IncidentActivityEvent:Update,12


The `(IncidentActivityEvent:Open)` occurs from 2013-08-19 and from then onwards, it is always the first event after the `(IncidentEvent:Open)`.

Let's check how many events we have before this date and after this date for any `(:Event)` node.


In [11]:
query_str = '''
        MATCH (e:Event) - [:IS_OF_TYPE] -> (et:EventType)
        RETURN et.eventType as eventType, date(e.timestamp) < date("2013-08-19") as before, count(e) as cnt ORDER BY eventType, before DESC
'''
result = pd.DataFrame(db_connection.exec_query(query_str))
table = pd.pivot_table(result, index=['eventType', 'before'], aggfunc="sum")
table['%'] = (round(table.cnt / table.groupby(level=0).cnt.transform("sum") * 100, 2)).astype(str) + '%'
print(table)

                                 cnt       %
eventType             before                
ChangeEvent           False    33254  99.62%
                      True       127   0.38%
IncidentActivityEvent False   461826  98.95%
                      True      4911   1.05%
IncidentEvent         False   137801  99.83%
                      True       237   0.17%
InteractionEvent      False   293561  99.85%
                      True       447   0.15%


Most events (99.4%) happen after 2013-08-19.

Let's check how many objects have at least one event before this date.


In [12]:
query = '''
        MATCH (o) - [:CORR] - (e:Event)
        MATCH (e) - [:IS_OF_TYPE] -> (et:EventType)
        WITH e, o, e.timestamp < dateTime("2013-08-19T09:59:53.000000000+01:00") as before_cutoff
        WITH o, collect(distinct before_cutoff) as before_cutoffs
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType)
        RETURN ot.objectType as objectType, True in before_cutoffs as before, count(distinct o) as cnt
    '''

df_result = pd.DataFrame(db_connection.exec_query(query))
table = pd.pivot_table(df_result, index=['objectType', 'before'], aggfunc="sum")
table['%'] = (round(table.cnt / table.groupby(level=0).cnt.transform("sum") * 100, 2)).astype(str) + '%'
print(table)



                       cnt       %
objectType  before                
Change      False    16611  99.51%
            True        82   0.49%
Incident    False    46378  99.49%
            True       238   0.51%
Interaction False   146553  99.69%
            True       451   0.31%


Most object (99.6%) have only events after 2013-08-19.

**Filtering**
From domain knowledge (and verified by the data) we see that most `(IncidentEvent:Open)` are followed by a `(IncidentActivityEvent:Open)`. The very few that don't have a first succeeding event before 2013-08-19.

We decide to remove those objects that have at least one event before said date, as this only affects a small amount of objects. We also remove all associated events.

In [13]:
results = []

count_query = '''
    MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - [] - (e) - [:IS_OF_TYPE] -> (et:EventType)
    WHERE ot.objectType in ['Incident', 'Interaction', 'Change']
    WITH e, ot, o, e.timestamp < dateTime("2013-08-19T09:59:53.000000000+01:00") as before_cutoff
    WITH ot, o, collect(distinct before_cutoff) as before_cutoffs
    WHERE True in before_cutoffs
    MATCH (o) - [] - (all_e) - [:IS_OF_TYPE] -> (et:EventType)
    RETURN ot.objectType as objectType, count(distinct o) as object_deleted, count(distinct all_e) as events_deleted
'''

df_deleted = pd.DataFrame(db_connection.exec_query(count_query))

delete_query_str = '''
    MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - [] - (e) - [:IS_OF_TYPE] -> (et:EventType)
    WHERE ot.objectType in ['Incident', 'Interaction', 'Change']
    WITH e, o, e.timestamp < dateTime("2013-08-19T09:59:53.000000000+01:00") as before_cutoff
    WITH o, collect(distinct before_cutoff) as before_cutoffs
    WHERE True in before_cutoffs
    MATCH (o) - [] - (all_e) - [:IS_OF_TYPE] -> (et:EventType)
    DETACH DELETE o
    DETACH DELETE all_e
'''

db_connection.exec_query(delete_query_str);

Let's also count the number of objects and events we kept.

In [14]:
kept_count_query = '''
    MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - []  - (e) - [:IS_OF_TYPE] -> (et:EventType)
    WHERE ot.objectType in  ['Incident', 'Interaction', 'Change']
    RETURN ot.objectType as objectType, count(distinct o) as objects_kept, count(distinct e) as events_kept
'''

df_kept = pd.DataFrame(db_connection.exec_query(kept_count_query))

In [15]:
df_both = df_deleted.set_index('objectType').join(df_kept.set_index('objectType'))
df_both['total_objects'] = df_both['objects_kept'] + df_both['object_deleted']
df_both['total_events'] = df_both['events_kept'] + df_both['events_deleted']
df_both['% events deleted'] = round(df_both['events_deleted'] / df_both['total_events'] * 100, 2)
df_both['% objects deleted'] = round(df_both['object_deleted'] / df_both['total_objects'] * 100, 2)
df_both = df_both[
    ['object_deleted', 'objects_kept', 'total_objects', '% objects deleted', 'events_deleted', 'events_kept',
     'total_events', '% events deleted']]
df_both

Unnamed: 0_level_0,object_deleted,objects_kept,total_objects,% objects deleted,events_deleted,events_kept,total_events,% events deleted
objectType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Incident,238,46378,46616,0.51,8691,596084,604775,1.44
Interaction,451,146553,147004,0.31,902,293106,294008,0.31
Change,82,16611,16693,0.49,164,33217,33381,0.49


## Iteration 1: Enrich with a dedicated CI-SC object

**Materialize CI-SC object**

To analyze the lifecycle of a CI, we first assessed its global uniqueness. Our domain model reveals a many-to-many relationship between CIs and SCs. Upon reflection, we found that CIs are locally unique within each SC, making the CI-SC pair the true object of interest. We materialized this relationship as a distinct object, CI-SC, using _Enrich Method 1: Materializing Objects_.

In [4]:
objects_to_materialize = {
    "CI_SC": [{
        "from_object": {
            "label": "ServiceComponent"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "attributes": {  # copy over attributes
                "ciType": "ciType",
                "ciSubtype": "ciSubtype"
            }
        },
        "relation_type": "CONTAINS"
    }]
}

In [5]:
materialize_objects(_db_connection=db_connection,
                    _objects_to_materialize=objects_to_materialize)


=== Materializing Relationships into Objects ===
→ 15327 CI_SC nodes created.
→ (:ObjectType {objectType: "CI_SC"}) created.


Results should show that we created 15327 `(:CI_SC)` nodes.

**Materialize O2O relationships**

We lift the relations from Incidents, Interactions, Changes _o_ to the newly materialized CI-SC pair _ci-sc_ if _o_ is related to both the _ci_ and _sc_ of which the _ci-sc_ pair is composed of using _Enrichment Method 3: Infer O2O_ relationship.

More specifically, we lift the `:AFFECTED_CI` and `:AFFECTED_SC` relationship into `:AFFECTED_CISC` and `:CAUSED_BY_CI` and `:CAUSED_BY_SC` into `:CAUSED_BY_CI_SC`.

In [6]:
o2o_relationships_to_extend = {
    "AFFECTED_CI_SC": [{
        "from_object": {
            "label": "Incident|Interaction|Change",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "AFFECTED_SC"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "AFFECTED_CI"
                }]
        },
        "to_object": {
            "label": "CI_SC",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "RELATED"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "RELATED"
                }]
        }
    }],
    "CAUSED_BY_CI_SC": [{
        "from_object": {
            "label": "Incident",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "CAUSED_BY_SC"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "CAUSED_BY_CI"
                }]
        },
        "to_object": {
            "label": "CI_SC",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "RELATED"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "RELATED"
                }]
        }
    }]
}

In [7]:
extend_relationships(db_connection, o2o_relationships_to_extend)

→ 222884 (:Incident|Interaction|Change) - [:AFFECTED_CI_SC] -> (:CI_SC) Relationship built
→ 42928 (:Incident) - [:CAUSED_BY_CI_SC] -> (:CI_SC) Relationship built


Results should show that we extended with
- 222884 `(:Incident|Interaction|Change) - [:AFFECTED_CI_SC] -> (:CI_SC)` Relationships
- 42928 `(:Incident) - [:CAUSED_BY_CI_SC] -> (:CI_SC)` Relationships

**Materialize E2O relationships**

Since Incidents, Interactions, and Changes relate to one or more CI-SCs, analysts must track CI-SCs across process steps to understand recurring disruptions and their impact on Changes. Isolated views of Interactions, Incidents, or Changes are insufficient. Thus, we linked these events directly to CI-SC objects using _Enrichment Method 2: Infer E2O relations_.

In [8]:
e2o_relationships_to_extend = {
    "CORR": [
        {
            "from_object": {
                "label": "Event",
                "relationships": [
                    {
                        "related_label": "Change",
                        "related_object": "change",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Change",
                        "related_object": "change",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        },
        {
            "from_object": {
                "label": "Event",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        },
        {
            "from_object": {
                "label": "Event",
                "relationships": [
                    {
                        "related_label": "Interaction",
                        "related_object": "interaction",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Interaction",
                        "related_object": "interaction",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        }
    ],
}

In [9]:
extend_relationships(db_connection, e2o_relationships_to_extend)

→ 53413 (:Event) - [:CORR] -> (:CI_SC) Relationship built
→ 596012 (:Event) - [:CORR] -> (:CI_SC) Relationship built
→ 293106 (:Event) - [:CORR] -> (:CI_SC) Relationship built


Results should show that we extend with
- 53413 `(:Event) - [:CORR] -> (:CI_SC)` Relationships (via `:Change`)
- 596012 `(:Event) - [:CORR] -> (:CI_SC)` Relationships (via `:Incident`)
- 293106 `(:Event) - [:CORR] -> (:CI_SC)` Relationships (via `:Interaction`)

**Materialize DF relationships**
We inferred the temporal order of the events as DF relationships with _Enrichment method 4: Inferring DF relationships_.

DF relationships are inferred for objects of a specific object type, we infer the relationships for the following objects as they are of interest to better understand the CI-SC lifecycle.
- Interaction
- Incident
- Change
- CI-SC

In [10]:
object_types_for_df = ['Interaction', 'Incident', 'Change', 'CI_SC']

In [11]:
build_df_edges(_db_connection=db_connection,
               _object_types=object_types_for_df)

→ Index for :Event (timestamp)
-> Interaction DF creation result: 146553
-> Incident DF creation result: 549706
-> Change DF creation result: 16606
-> CI_SC DF creation result: 929621


Complex nesting patterns can be found when exploring the DF-edges visually.
For instance, Fig.6 (left) shows the trace of CI_SC with sysId: WBS000098_APP000003.

To visualize it for yourself, you can do the following steps:
1) In Neo4j Bloom, request CI_SC sysId: WBS000098_APP000003
2) Right-click the node to expand all related :Events by expressing Expand, :CORR
3) Select all events and reveal DF relationships.

To color the event nodes per event type, we first need to add an eventType property using the following query. Then on the right hand side, you can select Event, then go to rule based, add a rule-based styling based on eventType with unique colours.

In [12]:
query = '''
MATCH (e:Event) - [:IS_OF_TYPE] -> (et:EventType)
SET e.eventType = et.eventType
'''

db_connection.exec_query(query);

[]

## Iteration 2: Enrich with High-Level Events
TODO: add comments

In [13]:
def infer_start_event(_db_connection, _object_type):
    # infer start and end events for each object type
    q_start_event = '''
        :auto
        // Infer start event of an object
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (o)<-[]-(e:Event)
        WHERE NOT ()-[:DF {id:o.sysId}]->(e)
        CALL (o, e){
            MERGE (o)<-[rel:START]-(e)
            RETURN rel
        } IN TRANSACTIONS
        RETURN count(rel) as count
    '''

    q_start_event_result = Query(
        query_str=q_start_event,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_start_event_result)

    print(f'→ Inferred Start Events for {res[0]["count"]} objects ({_object_type})')

In [14]:
def infer_end_event(_db_connection, _object_type):
    # infer start and end events for each object type
    q_end_event = '''
        :auto
        // Infer start event of an object
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (o)<-[]-(e:Event)
        WHERE NOT (e)-[:DF {id:o.sysId}]->()
        CALL (o, e){
            MERGE (o)<-[rel:END]-(e)
            RETURN rel
        } IN TRANSACTIONS
        RETURN count(rel) as count
    '''

    q_end_event_result = Query(
        query_str=q_end_event,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_end_event_result)

    print(f'→ Inferred End Events for {res[0]["count"]} objects ({_object_type})')

In [15]:
object_types = ['Interaction', 'Incident', 'Change']
for object_type in object_types:
    infer_start_event(db_connection, object_type)
    infer_end_event(db_connection, object_type)

→ Inferred Start Events for 146553 objects (Interaction)
→ Inferred End Events for 146553 objects (Interaction)
→ Inferred Start Events for 46378 objects (Incident)
→ Inferred End Events for 46378 objects (Incident)
→ Inferred Start Events for 16611 objects (Change)
→ Inferred End Events for 16616 objects (Change)


In [16]:
# build index for high-level events
qCreateIndexHLE_sys = f'''
    CREATE INDEX highLevelEventSysIdIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.sysId);
'''
db_connection.exec_query(qCreateIndexHLE_sys)

qCreateIndexHLE_start = f'''
    CREATE INDEX highLevelEventStartTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.startTime);
'''
db_connection.exec_query(qCreateIndexHLE_start)
qCreateIndexHLE_end = f'''
    CREATE INDEX highLevelEventEndTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.endTime);
'''
db_connection.exec_query(qCreateIndexHLE_end)


[]

[]

[]

In [17]:
def infer_high_level_events(_db_connection, _object_type):
    # build high-level events
    q_build_high_level_event_str = '''
        :auto
        MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (eStart:Event)-[st:START]->(n)<-[en:END]-(eEnd:Event)
        WITH DISTINCT eStart, eEnd
        CALL (eStart, eEnd) {
            MERGE (h:HighLevelEvent {sysId: "HLE_" + eStart.sysId + "_" + eEnd.sysId})
            MERGE (h_et:EventType {eventType: 'HighLevelEvent'})
            MERGE (h) - [:IS_OF_TYPE] -> (h_et)
            ON CREATE SET h.startTime=eStart.timestamp, h.endTime=eEnd.timestamp, h.activity=$objectType
            MERGE (h)-[:START]->(eStart)
            MERGE (h)-[:END]->(eEnd)
            RETURN h
        } IN TRANSACTIONS
        RETURN count(h) as count
    '''

    q_build_high_level_event_result = Query(
        query_str=q_build_high_level_event_str,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_build_high_level_event_result)
    print(f'→ Inferred {res[0]["count"]} HighLevelEvent for ObjectType ({_object_type})')


In [18]:
object_types = ['Interaction', 'Incident', 'Change']
for object_type in object_types:
    infer_high_level_events(db_connection, object_type)

→ Inferred 146553 HighLevelEvent for ObjectType (Interaction)
→ Inferred 46378 HighLevelEvent for ObjectType (Incident)
→ Inferred 16611 HighLevelEvent for ObjectType (Change)


In [20]:
def lift_e2o_relationship_to_hle(_db_connection, _object_type):
    # lift CORR to high-level events
    q_lift_e2o_str = '''
        :auto
        MATCH (h:HighLevelEvent) - [:START|END] -> () --> (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        WITH distinct h, o
        CALL (h, o) {
            MERGE(h) - [c:CORR] -> (o)
            RETURN c
        } IN TRANSACTIONS
        RETURN count(c) as count
    '''

    q_lift_e2o = Query(
        query_str=q_lift_e2o_str,
        parameters={"objectType": _object_type}
    )

    res = db_connection.exec_query(q_lift_e2o)
    print(f'→ Lifted {res[0]["count"]} E2O relationships for ObjectType ({_object_type})')

In [21]:
object_types = ['Interaction', 'Incident', 'Change', 'CI_SC']
for object_type in object_types:
    lift_e2o_relationship_to_hle(db_connection, object_type)

→ Lifted 146553 E2O relationships for ObjectType (Interaction)
→ Lifted 46378 E2O relationships for ObjectType (Incident)
→ Lifted 16611 E2O relationships for ObjectType (Change)
→ Lifted 219630 E2O relationships for ObjectType (CI_SC)


In [22]:
# build DF relations between high-level events
# step 1: build HL-DF relations between events

def discover_df_hle(_db_connection, _object_type):
    q_build_df_relations_hle_str = '''
        :auto
        // get only START/END events of each HL event correlated to CI_SC
        MATCH (ot:ObjectType {objectType: 'CI_SC'}) <- [:IS_OF_TYPE] - (o) <- [:CORR] - (h:HighLevelEvent)
        MATCH (o) <-[:CORR]- (h) -[:START|END] -> (e:Event)
        WITH o, e ORDER BY e.timestamp, elementId(e) // order by time
        WITH o, collect(e) AS events
        UNWIND range(0, size(events)-2) AS idx
        WITH idx, events[idx] AS fromEv, events[idx+1] AS toEv, o
        CALL (o, idx, fromEv, toEv) {
            // get their HL events correlated to the same object
            MATCH (fromEv)<-[:START|END]-(h1:HighLevelEvent)-[:CORR]->(o)<-[:CORR]-(h2:HighLevelEvent)-[:START|END]->(toEv)
            WHERE h1 <> h2
            // and add their DF relation
            MERGE (h1)-[rel:DF {objectType:'CI_SC', id:o.sysId}]->(h2)
            RETURN count(rel) as rel
        } IN TRANSACTIONS
        RETURN sum(rel) AS total

    '''

    q_build_df_relations_hle = Query(
        query_str=q_build_df_relations_hle_str,
        parameters={
            "objectType": _object_type,
        }
    )

    return db_connection.exec_query(q_build_df_relations_hle)

In [23]:
discover_df_hle(db_connection, _object_type='CI_SC')

[{'total': 334901}]

In [24]:
count_events = '''
MATCH (e:HighLevelEvent)
RETURN e.activity, count(e)
'''

db_connection.exec_query(count_events)

[{'e.activity': 'Interaction', 'count(e)': 146553},
 {'e.activity': 'Incident', 'count(e)': 46378},
 {'e.activity': 'Change', 'count(e)': 16611}]

## Iteration 3: Enrich Domain-Perspective with Set Variants Feature
TODO: add comments

In [25]:
def get_activity_set_variants(_db_connection, _object_type, _event_types):
    # get the bag variants on the high_level
    q_set_activity_variants_str = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)
        WHERE et.eventType IN $eventTypes
        WITH o, e.activity AS activity ORDER BY activity
        WITH o, collect(distinct activity) as set_variant
        RETURN ltrim(reduce(initial = "", activity in set_variant | initial + " - (" + activity + ")" ), " - " ) as set_variant, count(o) as count_objects order by count_objects DESC
    '''

    q_set_activity_variants = Query(query_str=q_set_activity_variants_str,
                                    parameters={
                                        'objectType': _object_type,
                                        'eventTypes': event_types
                                    })

    _result = pd.DataFrame(db_connection.exec_query(q_set_activity_variants))
    _result['%_set_variant'] = round(
        _result.groupby(['set_variant']).count_objects.transform("sum") / sum(_result['count_objects']) * 100, 2)
    return _result



In [26]:
event_types = ['HighLevelEvent']
result = get_activity_set_variants(_db_connection=db_connection,
                                   _object_type='CI_SC',
                                   _event_types=event_types)

In [27]:
print(result)

                             set_variant  count_objects  %_set_variant
0                               (Change)           8212          63.61
1             (Incident) - (Interaction)           2038          15.79
2                          (Interaction)           1554          12.04
3  (Change) - (Incident) - (Interaction)            531           4.11
4                             (Incident)            472           3.66
5               (Change) - (Interaction)             71           0.55
6                  (Change) - (Incident)             32           0.25


In [28]:
def assign_exposure_level(_db_connection, _object_type, _event_types):
    q_assign_exposure_level_str = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)
        WHERE et.eventType IN $eventTypes
        WITH o, e.activity AS activity ORDER BY activity
        WITH o, collect(distinct activity) as set_variant
        WITH o, set_variant,
        CASE
            WHEN size(set_variant) = 1 AND 'Change' in set_variant THEN 'internal'
            WHEN size(set_variant) > 1 AND 'Change' in set_variant THEN 'combined'
            ELSE 'exposed'
        END AS exposure
        SET o.exposure_level = exposure'''

    q_assign_exposure_level = Query(query_str=q_assign_exposure_level_str,
                                    parameters={
                                        'objectType': _object_type,
                                        'eventTypes': _event_types
                                    })

    db_connection.exec_query(q_assign_exposure_level)

In [29]:
event_types = ['HighLevelEvent']
assign_exposure_level(_db_connection=db_connection,
                      _object_type='CI_SC',
                      _event_types=event_types)

In [30]:
query = '''
MATCH (o:CI_SC)
WHERE o.exposure_level is not null AND not o.ciType in ['no type', '#N/B']
RETURN o.ciType as ciType, o.exposure_level as exposure_level, count(o) as count
'''

exposure_level_per_type = pd.DataFrame(db_connection.exec_query(query))

In [31]:
# Group by ciType and exposure_level, summing the counts
grouped = exposure_level_per_type.groupby(['ciType', 'exposure_level'], as_index=False).sum()

# Calculate the total count for each ciType
total_counts = grouped.groupby('ciType')['count'].transform('sum')

# Calculate the relative proportion
grouped['relative_proportion'] = round(grouped['count'] / total_counts, 2)

# Pivot the table to get one row per ciType
pivoted = grouped.pivot(
    index='ciType',
    columns='exposure_level',
    values='relative_proportion'
).reset_index()

# Fill NaN values with 0 (for ciTypes missing certain exposure_levels)
pivoted = pivoted.fillna(0)
pivoted = pivoted.sort_values(by=['combined'], ascending=[False])

# Display the result
print(pivoted)

exposure_level                ciType  combined  exposed  internal
1                        application      0.19     0.21      0.60
10                           storage      0.16     0.07      0.77
6                           hardware      0.09     0.20      0.71
11                    subapplication      0.09     0.18      0.73
9                           software      0.02     0.08      0.90
7                  networkcomponents      0.02     0.10      0.88
3                           computer      0.02     0.45      0.53
4                           database      0.01     0.02      0.97
8                  officeelectronics      0.01     0.89      0.11
0                              Phone      0.00     0.02      0.98
2               applicationcomponent      0.00     0.00      0.99
5                      displaydevice      0.00     0.93      0.07


## Iteration 4: Explore how CI-SCs of selected subset are managed together

Determine number of affected CI_SCs per change

In [32]:
ci_scs_per_change_q = '''
    MATCH (ci_sc:CI_SC) <- [:AFFECTED_CI_SC] - (c:Change)
    WITH c, count(distinct ci_sc) as count

    RETURN c.sysId, count
'''

ci_scs_per_change = pd.DataFrame(db_connection.exec_query(ci_scs_per_change_q))

In [34]:
ci_scs_per_change_q = '''
    MATCH (ci_sc:CI_SC) <- [:AFFECTED_CI_SC] - (c:Change)
    WITH c, count(distinct ci_sc) as count

    RETURN count(c) as num_changes, count order by count
'''

change_count = pd.DataFrame(db_connection.exec_query(ci_scs_per_change_q))

# Assuming your table is in a DataFrame called df
total_changes = change_count['num_changes'].sum()

# Calculate the proportion for each group
change_count['proportion'] = change_count['num_changes'] / total_changes

# Optionally, convert to percentage
change_count['percentage'] = change_count['proportion'] * 100

change_count

Unnamed: 0,num_changes,count,proportion,percentage
0,13980,1,0.780221,78.022101
1,2071,2,0.115582,11.55821
2,619,3,0.034546,3.454627
3,563,4,0.031421,3.142092
4,156,5,0.008706,0.870633
5,173,6,0.009655,0.96551
6,50,7,0.00279,0.279049
7,93,8,0.00519,0.519031
8,35,9,0.001953,0.195334
9,36,10,0.002009,0.200915


## Iteration 5: Study how CI-SC pairs are managed jointly or seperately.
Todo: add comments

In [35]:
def get_ci_scs_in_sync(db_connection):
    query = '''
            :auto


        MATCH (e:HighLevelEvent) - [:CORR] -> (o1) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType:'CI_SC'})
MATCH (e) - [:CORR] -> (o2) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType:'CI_SC'})
WHERE o1 < o2
WITH e, o1, o2
CALL (e, o1, o2){
  MATCH (f:HighLevelEvent)  - [:CORR] -> (o1)
  MATCH (f) - [:CORR] -> (o2)
  WHERE f.startTime >= e.startTime AND e <> f
  RETURN f ORDER BY f.startTime limit 1
} IN TRANSACTIONS
CALL (o1, e, f){
  MATCH p = SHORTEST 1 (e) - [:DF* {id: o1.sysId}] -> (f)
  WITH p, nodes(p)[1..-1] as events
  CALL (events) {
    UNWIND events as event
    WITH event ORDER BY event.activity
    RETURN collect(distinct event.activity) as set_variant
    }
  RETURN nodes(p) as p_o1, set_variant as set_variant_o1
} IN TRANSACTIONS
CALL (o2, e, f){
    MATCH p = SHORTEST 1 (e) - [:DF* {id: o2.sysId}] -> (f)
  WITH p, nodes(p)[1..-1] as events
  CALL (events) {
    UNWIND events as event
    WITH event ORDER BY event.activity
    RETURN collect(distinct event.activity) as set_variant
    }

  RETURN nodes(p) as p_o2,  set_variant as  set_variant_o2

} IN TRANSACTIONS
    MATCH (o1) - [:RELATED] -> (ci1:ConfigurationItem)
    MATCH (o2) - [:RELATED] -> (ci2:ConfigurationItem)
    WITH o1, ci1, o2, ci2, p_o1, p_o2, set_variant_o1, set_variant_o2

    CALL (set_variant_o1, set_variant_o2) {
    WITH set_variant_o1, set_variant_o2, CASE
        WHEN size(set_variant_o1) <= size(set_variant_o2) THEN set_variant_o1
        ELSE set_variant_o2 END AS first_interruptions
    WITH set_variant_o1, set_variant_o2, first_interruptions, CASE
        WHEN first_interruptions = set_variant_o1 THEN set_variant_o2
        ELSE set_variant_o1 END AS second_interruptions
    WITH set_variant_o1, set_variant_o2, CASE
        WHEN size(first_interruptions) = 0 THEN " None"
        ELSE ltrim(reduce(init="", item in first_interruptions | init + "-" + item), '-')  END AS interruptions_o1,
    CASE
        WHEN size(second_interruptions) = 0 THEN " None"
        ELSE ltrim(reduce(init="", item in second_interruptions | init + "-" + item), '-') END AS interruptions_o2
    RETURN interruptions_o1, interruptions_o2}
    RETURN o1.ciType as c1Type, o1.ciType as c2Type, interruptions_o1, interruptions_o2, count(o1) as occurrences

    '''

    result = pd.DataFrame(db_connection.exec_query(query=query))
    table = pd.pivot_table(result, index=['interruptions_o1', 'interruptions_o2', 'c1Type', 'c2Type'], aggfunc="sum")
    table['%'] = (
        round(
            table.groupby(['c1Type', 'c2Type', 'interruptions_o1', 'interruptions_o2']).occurrences.transform("sum") /
            table.groupby(['c1Type', 'c2Type']).occurrences.transform("sum") * 100,
            2
        )
    )
    return table


In [36]:
result = get_ci_scs_in_sync(db_connection)
result = result.reset_index()


In [39]:
result = result[
    (result['interruptions_o1'].str.contains('Incident') | result['interruptions_o1'].str.contains('Interaction')) & (
            result['interruptions_o2'].str.contains('Incident') | result['interruptions_o2'].str.contains(
        'Interaction'))]
table = pd.pivot_table(result, index=['interruptions_o1', 'interruptions_o2', 'c1Type', 'c2Type'], aggfunc="sum")
table.occurrences

interruptions_o1             interruptions_o2             c1Type       c2Type     
Change-Incident              Change-Incident-Interaction  application  application     1
                                                          storage      storage         1
Change-Incident-Interaction  Change-Incident-Interaction  application  application    19
                                                          computer     computer        3
                                                          hardware     hardware        1
Change-Interaction           Change-Incident              computer     computer        1
                             Change-Incident-Interaction  application  application    12
                                                          computer     computer       11
                             Change-Interaction           computer     computer        1
Incident                     Change-Incident-Interaction  application  application     1
                           

In [38]:
new_result = result.reset_index()
new_result = new_result.drop(columns=['c1Type', 'c2Type', '%'])
table = pd.pivot_table(new_result, index=['interruptions_o1', 'interruptions_o2'], aggfunc="sum")
table['%'] = (
    round(
        table.groupby(['interruptions_o1', 'interruptions_o2']).occurrences.transform("sum") /
        sum(table.occurrences) * 100,
        2
    )
)
table


Unnamed: 0_level_0,Unnamed: 1_level_0,index,occurrences,%
interruptions_o1,interruptions_o2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Change-Incident,Change-Incident-Interaction,159,2,1.57
Change-Incident-Interaction,Change-Incident-Interaction,246,23,18.11
Change-Interaction,Change-Incident,84,1,0.79
Change-Interaction,Change-Incident-Interaction,171,23,18.11
Change-Interaction,Change-Interaction,87,1,0.79
Incident,Change-Incident-Interaction,89,1,0.79
Incident,Change-Interaction,90,1,0.79
Incident-Interaction,Change-Incident-Interaction,183,8,6.3
Incident-Interaction,Change-Interaction,187,4,3.15
Incident-Interaction,Incident-Interaction,191,26,20.47
