# 0. Prepare Project

In [2]:
from typing import List
%matplotlib inline
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import pandas as pd

pd.set_option('display.width', 2000)

### Define the project that you want to do analysis on

In [3]:
case_study = 'bpic14'
use_sample = False

In [4]:
# retrieve configuration for case_study
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


### Prepare so we can use PromG to load the data and execute queries

In [5]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)
perf = Performance.set_up_performance(config=config)
dataset_descriptions = DatasetDescriptions(config=config)

0it [00:00, ?it/s]

In [5]:
def reset_pbar(pbar=perf.pbar, total=None):
    # clear db
    pbar.reset()
    # TODO update dragons in PromG, #update method to set total for pbar
    pbar.total = total
    pbar.set_postfix_str()

#### Prepare the DB

In [6]:
# read the semantic header --> this details how the data should be structured
semantic_header = SemanticHeader.create_semantic_header(config=config)

In [7]:
# Clear the DB (if use_sample = False, this should not take long on a loaded database)
db_manager = DBManagement(db_connection=db_connection, semantic_header=None)
db_manager.clear_db(
    replace=True);  # in the community version of neo4j, replace is not allowed. In that case, set replace=False

1it [00:06,  6.93s/it, clear_db: took 6.93 seconds]

  perf.perf = pd.concat([perf.perf, pd.DataFrame.from_records([


### Statistics

In [8]:
def get_graph_statistics(_db_connection):
    """
    Statistics about nodes and relations.
    """

    with _db_connection.driver.get_session(database=_db_connection.db_name) as session:
        print("\n=== GRAPH STATISTICS ===")

        try:
            node_query = """
            MATCH (n)
            WITH n, labels(n) as labels
            RETURN reduce(label_str = "(", l in labels | label_str + ":" + l) + ")" as label, count(n) as count ORDER BY count DESC
            """
            node_counts = session.run(node_query)
            print("\n--- Node counts ---")
            for record in node_counts:
                print(f"{record['label']:<30} {record['count']}")

            rel_query = """
            MATCH (n) - [r] -> (n2)
            RETURN "[:" + type(r) + "]" as  type, count(r) as count ORDER BY count DESC
            """
            rel_counts = session.run(rel_query)
            print("\n--- Relationship counts ---")
            for record in rel_counts:
                print(f"{record['type']:<30} {record['count']}")

            total_nodes = session.run("MATCH (n) RETURN count(n) AS total").single()["total"]
            total_rels = session.run("MATCH ()-[r]->() RETURN count(r) AS total").single()["total"]

            print("\n--- Totals ---")
            print(f"Total nodes: {total_nodes}")
            print(f"Total relationships: {total_rels}")

        except Exception as e:
            print(f"Failed to get graph statistics: {e}")

# 0. Load the data

We load every record in our data as a `(:Record)` node. We also create for each of the four logs a `(:Log)` node and create a relationship `(:Log) - [:CONTAINS] -> (:Record)` to indicate from which log the record comes from.

In [9]:
use_sample = False
oced_pg = OcedPg(database_connection=db_connection,
                 dataset_descriptions=dataset_descriptions,
                 semantic_header=semantic_header,
                 use_sample=use_sample)


In [10]:
# first, we load all records
# (if use_sample = False, this should take less than 2 minutes)
reset_pbar(total=11)
oced_pg.load();

  0%|          | 0/11 [00:00<?, ?it/s]             



100%|██████████| 11/11 [00:41<00:00,  3.97s/it, _filter_nodes for BPIC14Interaction: took 0.0 seconds]                      

In [11]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:Log)                         4

--- Relationship counts ---
[:CONTAINS]                    690622

--- Totals ---
Total nodes: 690626
Total relationships: 690622


# 1. Split Entities into Objects and Events

### Logs
For each log, we create a variable.

In [12]:
bpic14_incident = "BPIC14Incident.csv"
bpic14_interaction = "BPIC14Interaction.csv"
bpic14_change = "Detail_Change.csv"
bpic14_incident_activity = "Detail_Incident_Activity.csv"

### Objects Nodes

Create objects directly from the records available in the input files.<br>
We take all entities in the domain model that refer to an object. Those are:

- Incident
- Interaction
- Change
- Knowledge Document
- Resource
- Configuration Item
- Service Component

For every entity, we define how it should be created, considering:
- which log to read from.
- which field to use as the unique sysId
- which attributes to keep
- any constant properties that should be added to the node

**Primary Entities**
The following entities can be directly extracted from their primary logs, where their ID serves as the primary key:
   Entity      | Primary Log Table   | Primary Key   |
 |-------------|---------------------|---------------|
 | Incident    | bpic14_incident     | incidentId    |
 | Interaction | bpic14_interaction  | interactionId |
 | Change      | bpic14_change       | (ID column)   |

---

**Foreign Key References**
The primary entities are also referenced as foreign keys in other logs:
 | Entity      | Referenced In               | Foreign Key Field   |
 |-------------|-----------------------------|---------------------|
 | Incident    | bpic14_interaction          | relatedIncident     |
 |             | bpic14_incident_activity    | incidentId          |
 | Interaction | bpic14_incident             | relatedInteraction |
 | Change      | bpic14_incident             | relatedChange       |

Since there are primary entities that are only referenced as foreign key, we have to include them to make sure we create an entity node for them.
We will also use this information to create relationships at a later stage.

---

**Supporting Entities (Referenced Only as Foreign Keys)**
The following entities are **not** extracted from a primary log but are referenced as foreign keys in other logs:
 | Entity                     | Referenced In               | Foreign Key Field      | Notes                                      |
 |----------------------------|-----------------------------|------------------------|--------------------------------------------|
 | Knowledge Document         | All logs                    | kmNumber               | No primary log; referenced across all logs.|
 | Resource                   | bpic14_incident_activity    | assignmentGroup        |                                            |
 | (Affected) Configuration Item | All logs                  | CINameAff              | For CIs affected by the primary entity.    |
 | (CausedBy) Configuration Item | bpic14_incident           | CINameCBy              |                                            |
 | (Affected) Service Component | All logs                  | serviceComponentAff    | For SCs affected by the log.               |
 | (CausedBy) Service Component | bpic14_incident           | serviceComponentCBy    |                                            |

We will also use this information to create relationships at a later stage.

In [13]:
objects = {
    "Incident": [
        {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "attributes": {
                "incidentId": "incidentId",
                "status": "status",
                "impact": "impact",
                "priority": "priority",
                "category": "category",
                "handleTimeHours": "handleTimeHours",
                "closureCode": "closureCode",
                "alertStatus": "alertStatus",
                "numReassignments": "numReassignments",
                "numRelatedInteractions": "numRelatedInteractions",
                "numRelatedIncidents": "numRelatedIncidents",
                "numRelatedChanges": "numRelatedChanges"
            },
        },
        {
            "log": bpic14_interaction,
            "sysId": "relatedIncident",
            "attributes": {
                "incidentId": "relatedIncident"  #foreign_key
            },
            "constants": {
                "derivedFromInteraction": True
            }
        },
        {
            "log": bpic14_incident_activity,
            "sysId": "incidentId",
            "attributes": {
                "incidentId": "incidentId"  #foreign_key
            }
        }
    ],
    "Interaction": [
        {
            "log": bpic14_interaction,
            "sysId": "interactionId",
            "attributes": {
                "interactionId": "interactionId",
                "status": "status",
                "impact": "impact",
                "priority": "priority",
                "category": "category",
                "handleTimeSecs": "handleTimeSecs",
                "closureCode": "closureCode",
                "firstCallResolution": "firstCallResolution"
            },
        },
        {
            "log": bpic14_incident,
            "sysId": "relatedInteraction",
            "attributes": {
                "interactionId": "relatedInteraction"  #foreign key
            },
        }
    ],
    "Change": [
        {
            "log": bpic14_change,
            "sysId": "changeId",
            "attributes": {
                "changeId": "changeId",
                "type": "changeType",
                "riskAssessment": "riskAssessment",
                "cabApprovalNeeded": "cabApprovalNeeded",
                "plannedStart": "plannedStart",
                "plannedEnd": "plannedEnd",
                "scheduledDowntimeStart": "scheduledDowntimeStart",
                "scheduledDowntimeEnd": "scheduledDowntimeEnd",
                "requestedEndDate": "requestedEndDate",
                "originatedFrom": "originatedFrom",
                "numRelatedInteractions": "numRelatedInteractions",
                "numRelatedIncidents": "numRelatedIncidents"
            },
        }, {
            "log": bpic14_incident,
            "sysId": "relatedChange",
            "attributes": {
                "changeId": "relatedChange"
            },
            "constants": {
                "derivedFromIncident": True
            }
        }
    ],
    "KnowledgeDocument": [
        {
            "log": None,
            "sysId": "kmNumber",
            "attributes": {"kmNumber": "kmNumber"}
        }
    ],
    "Resource": [
        {
            "log": bpic14_incident_activity,
            "sysId": "assignmentGroup",
            "attributes": {"assignmentGroup": "assignmentGroup"}
        }
    ],
    "ConfigurationItem": [
        {  # affected CIs
            "log": None,
            "sysId": "ciNameAff",
            "attributes": {
                "ciName": "ciNameAff",
                "ciType": "ciTypeAff",
                "ciSubtype": "ciSubtypeAff"
            },
            "constants": {
                "affected": True
            }
        },
        {  # caused by CIs
            "log": bpic14_incident,
            "sysId": "ciNameCby",
            "attributes": {
                "ciName": "ciNameCby",
                "ciType": "ciTypeCby",
                "ciSubtype": "ciSubtypeCby"
            },
            "constants": {
                "caused": True
            }

        }

    ],
    "ServiceComponent": [
        {  # affected SCs
            "log": None,
            "sysId": "serviceComponentAff",
            "attributes": {
                "scName": "serviceComponentAff"
            },
            "constants": {
                "affected": True
            }
        },
        {  # caused by SCs
            "log": bpic14_incident,
            "sysId": "serviceComponentCBy",
            "attributes": {
                "scName": "serviceComponentCBy"
            },
            "constants": {
                "caused": True
            }
        },
    ]
}

Define functions to create the nodes in Neo4j using the previous configuration to extract the data from the specified logs.

In [61]:
def create_index(_db_connection, _label):
    index_query_str = f"""
        CREATE INDEX $index_name IF NOT EXISTS
        FOR (n:$label)
        ON (n.sysId)
    """

    index_query = Query(query_str=index_query_str,
                        parameters={
                            "index_name": f"{_label.lower()}_sysId_index"
                        },
                        template_string_parameters={
                            "label": _label
                        })

    _db_connection.exec_query(index_query)
    print(f"Index for :{_label}(sysId)")


def build_entity(_label, _config):
    iterate_query = """
        :auto
        MATCH (l:Log)-[:CONTAINS]->(r:Record)
        WHERE r.$sysId_field IS NOT NULL $log_name_condition $time_field_condition
        WITH r.$sysId_field $id_addition AS sysId, r
        CALL (sysId, r) {
             MERGE (n:$label {sysId: sysId})
             MERGE (n)-[:EXTRACTED_FROM]->(r)
             $attr_updates
             $constants_updates
        } IN TRANSACTIONS
    """
    attr_updates = ""
    time_field_condition = ""

    if "attributes" in _config:
        attr_updates += "SET "
        attr_updates += ", ".join(
            [f"n.{key} = COALESCE(n.{key}, r.{attr})" for key, attr in _config["attributes"].items()])

        if "timestamp" in _config["attributes"]:
            time_field_condition = f"AND r.{_config['attributes']['timestamp']} IS NOT NULL"

    constants_updates = ""
    if "constants" in _config:
        constants_updates += "SET "
        constants_updates += ", ".join(
            [f"n.{key} = COALESCE(n.{key}, {attr})" for key, attr in _config["constants"].items()])

    query = Query(
        query_str=iterate_query,
        parameters={
            "log_name": _config["log"],
        },
        template_string_parameters={
            "label": _label,
            "sysId_field": _config["sysId"],
            "log_name_condition": "AND l.name = $log_name" if _config["log"] else "",
            "time_field_condition": time_field_condition,
            "attr_updates": attr_updates,
            "constants_updates": constants_updates,
            "id_addition": f"+ '{_config['id_addition']}'" if 'id_addition' in _config else ""
        }
    )
    db_connection.exec_query(query)
    print(f"→ {_label} nodes created.")


def build_entities(_db_connection, entities):
    """
    Create entities. Includes indexing.
    """
    print("\n=== INDEXES ===")
    for _label in entities.keys():
        try:
            create_index(_db_connection=_db_connection,
                         _label=_label)
        except Exception as e:
            print(f"Failed to create index for {_label}: {e}")

    print(f"\n=== Building ENTITY NODES ===")

    for _label, _configs in entities.items():
        for _config in _configs:
            try:
                build_entity(_label=_label,
                             _config=_config)
            except Exception as e:
                print(f"Failed for {_label}: {e}")


Create the object nodes in Neo4j.

In [15]:
build_entities(db_connection, entities=objects)


=== INDEXES ===
Index for :Incident(sysId)
Index for :Interaction(sysId)
Index for :Change(sysId)
Index for :KnowledgeDocument(sysId)
Index for :Resource(sysId)
Index for :ConfigurationItem(sysId)
Index for :ServiceComponent(sysId)

=== Building ENTITY NODES ===
→ Incident nodes created.
→ Incident nodes created.
→ Incident nodes created.
→ Interaction nodes created.
→ Interaction nodes created.
→ Change nodes created.
→ Change nodes created.
→ KnowledgeDocument nodes created.
→ Resource nodes created.
→ ConfigurationItem nodes created.
→ ConfigurationItem nodes created.
→ ServiceComponent nodes created.
→ ServiceComponent nodes created.


In [16]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:Interaction)                 147172
(:Incident)                    47057
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              2371470
[:CONTAINS]                    690622

--- Totals ---
Total nodes: 920970
Total relationships: 3062092


## Object-to-Object (O2O) Relationships

Similarly to the objects, we define and specify the following Object-to-Object (O2O) relations:
- (Incident|Interaction)-[:USED_KM]->(KnowledgeDocument)
- (Incident)-[:RELATED_CHANGE]->(Change)
- (Interaction)-[:RELATED_INCIDENT]->(Incident)
- (Incident|Interaction|Change)-[:AFFECTED_CI_SC]->(CI_SC)
- (Incident|Interaction|Change)<-[:CAUSED_BY_CI_SC]-(CI_SC)

In [17]:
o2o_relationships = {
    "USED_KM": [{
        "from_object": {
            "label": "Incident|Interaction"
        },
        "to_object": {
            "label": "KnowledgeDocument",
            "foreign_key": "kmNumber"
        }
    }],
    "RELATED_CHANGE": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "Change",
            "foreign_key": "relatedChange"
        }
    }],
    "RELATED_INCIDENT": [
        {
            "from_object": {
                "label": "Interaction"
            },
            "to_object": {
                "label": "Incident",
                "foreign_key": "relatedIncident"
            }
        },
        {
            "from_object": {
                "label": "Interaction",
                "foreign_key": "relatedInteraction"
            },
            "to_object": {
                "label": "Incident"
            },
            "constants": {
                "primary": True
            }
        }],
    "AFFECTED_CI": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameAff",
        },
        "log": bpic14_incident
    },
        {
            "from_object": {
                "label": "Interaction"
            },
            "to_object": {
                "label": "ConfigurationItem",
                "foreign_key": "ciNameAff",
            },
            "log": bpic14_interaction
        },
        {
            "from_object": {
                "label": "Change"
            },
            "to_object": {
                "label": "ConfigurationItem",
                "foreign_key": "ciNameAff",
            },
            "log": bpic14_change
        }],
    "AFFECTED_SC": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentAff",
        },
        "log": bpic14_incident
    },
        {
            "from_object": {
                "label": "Interaction"
            },
            "to_object": {
                "label": "ServiceComponent",
                "foreign_key": "serviceComponentAff",
            },
            "log": bpic14_interaction
        },
        {
            "from_object": {
                "label": "Change"
            },
            "to_object": {
                "label": "ServiceComponent",
                "foreign_key": "serviceComponentAff"
            },
            "log": bpic14_change
        }],
    "CAUSED_BY_CI": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameCby"
        },
    }],
    "CAUSED_BY_SC": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentCBy"
        },
    }],
    "CONTAINS": [{
        "from_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentAff"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameAff"
        },
    }, {
        "from_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentCBy"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameCby"
        },
    }]
}

In [18]:
def build_foreign_key_index(_db_connection, _config):
    foreign_key_query_str = '''
                            CREATE INDEX $index_name IF NOT EXISTS
                                FOR (n:Record) ON (n.$foreign_key) \
                            '''

    for _type in ["from_object", "to_object"]:
        if "foreign_key" in _config[_type]:
            foreign_key = _config[_type]["foreign_key"]

            foreign_key_index_query = Query(
                query_str=foreign_key_query_str,
                parameters={
                    "index_name": f"record_{foreign_key}_index"
                },
                template_string_parameters={
                    "foreign_key": foreign_key
                }
            )

            _db_connection.exec_query(foreign_key_index_query)
            print(f"Index ensured for :Record({foreign_key})")


def build_relationship(_db_connection, _type, _config):
    o2o_query_str = '''
        :auto
         MATCH (from:$from_object) - [:EXTRACTED_FROM] -> (r:Record) <- [:EXTRACTED_FROM] - (to:$to_object)
         $log_condition
         WHERE $condition
         CALL (from, to, r) {
            MERGE (from) - [rel:$type] -> (to)
            $attr_updates
            $constants_updates
        } IN TRANSACTIONS
    '''

    attr_updates = ""
    if "attributes" in _config:
        attr_updates = "SET "
        attr_updates += ", ".join(
            [f"rel.{key} = COALESCE(rel.{key}, r.{attr})" for key, attr in _config["attributes"].items()])
    constants_updates = ""
    if "constants" in _config:
        constants_updates += "SET "
        constants_updates += ", ".join(
            [f"rel.{key} = COALESCE(rel.{key}, {attr})" for key, attr in _config["constants"].items()])

    from_object = _config["from_object"]
    to_object = _config["to_object"]

    from_foreign_key = ""
    to_foreign_key = ""
    log = ""
    conditions = []
    log_condition = ""

    if "foreign_key" in from_object:
        from_foreign_key = from_object["foreign_key"]
        conditions.append("r[$from_foreign_key] IS NOT NULL AND from.sysId = r[$from_foreign_key]")
    if "foreign_key" in to_object:
        to_foreign_key = to_object["foreign_key"]
        conditions.append("r[$to_foreign_key] IS NOT NULL AND to.sysId = r[$to_foreign_key]")
    if "log" in _config:
        log = _config["log"]
        log_condition = "MATCH (r) <- [:CONTAINS] - (:Log {name: $log_name})"

    o2o_query = Query(
        query_str=o2o_query_str,
        parameters={
            "from_foreign_key": from_foreign_key,
            "to_foreign_key": to_foreign_key,
            "log_name": log
        },

        template_string_parameters={
            "condition": " AND ".join(conditions),
            "from_object": from_object["label"],
            "to_object": to_object["label"],
            "type": _type,
            "attr_updates": attr_updates,
            "constants_updates": constants_updates,
            "log_condition": log_condition
        }
    )

    db_connection.exec_query(o2o_query)
    print(f"→ (:{_config['from_object']}) - [:{_type}] -> (:{_config['to_object']}) Relationship built")


def build_relationships(_db_connection, _relationships):
    print("\n=== INDEXES ===")
    for _type, _configs in _relationships.items():
        for _config in _configs:
            build_foreign_key_index(_db_connection=_db_connection,
                                    _config=_config)

    print("\n=== O2O RELATIONSHIPS ===")
    for _type, _configs in _relationships.items():
        for _config in _configs:
            build_relationship(_db_connection=_db_connection,
                               _type=_type,
                               _config=_config)

In [19]:
build_relationships(_db_connection=db_connection,
                    _relationships=o2o_relationships)


=== INDEXES ===
Index ensured for :Record(kmNumber)
Index ensured for :Record(relatedChange)
Index ensured for :Record(relatedIncident)
Index ensured for :Record(relatedInteraction)
Index ensured for :Record(ciNameAff)
Index ensured for :Record(ciNameAff)
Index ensured for :Record(ciNameAff)
Index ensured for :Record(serviceComponentAff)
Index ensured for :Record(serviceComponentAff)
Index ensured for :Record(serviceComponentAff)
Index ensured for :Record(ciNameCby)
Index ensured for :Record(serviceComponentCBy)
Index ensured for :Record(serviceComponentAff)
Index ensured for :Record(ciNameAff)
Index ensured for :Record(serviceComponentCBy)
Index ensured for :Record(ciNameCby)

=== O2O RELATIONSHIPS ===
→ (:{'label': 'Incident|Interaction'}) - [:USED_KM] -> (:{'label': 'KnowledgeDocument', 'foreign_key': 'kmNumber'}) Relationship built
→ (:{'label': 'Incident'}) - [:RELATED_CHANGE] -> (:{'label': 'Change', 'foreign_key': 'relatedChange'}) Relationship built
→ (:{'label': 'Interaction'

In [20]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:Interaction)                 147172
(:Incident)                    47057
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              2371470
[:CONTAINS]                    705949
[:AFFECTED_CI]                 223734
[:AFFECTED_SC]                 212948
[:USED_KM]                     194437
[:RELATED_INCIDENT]            52687
[:CAUSED_BY_CI]                45499
[:CAUSED_BY_SC]                43123
[:RELATED_CHANGE]              536

--- Totals ---
Total nodes: 920970
Total relationships: 3850383


## Event Nodes

In our data, we model the following four events.

There are four types of events Incident Events, Incident Activity Events, Change Events and Interaction Events.

FZE: WHY??? How do you infer this from the raw data? #TODO --> Add explanation

In [21]:
EVENTS = {
    "IncidentEvent": [
        {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "id_addition": "_Open",
            "attributes": {
                "timestamp": "openTime"
            },
            "constants": {
                "activity": "'Open'"
            }
        }, {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "id_addition": "_Resolve",
            "attributes": {
                "timestamp": "resolvedTime"
            },
            "constants": {
                "activity": "'Resolve'"
            }
        }, {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "id_addition": "_Close",
            "attributes": {
                "timestamp": "closeTime"
            },
            "constants": {
                "activity": "'Close'"
            }
        }
    ],
    "ChangeEvent": [
        {
            "log": bpic14_change,
            "sysId": "changeId",
            "id_addition": "_Start",
            "attributes": {
                "timestamp": "actualStart"
            },
            "constants": {
                "activity": "'Start'"
            }
        }, {
            "log": bpic14_change,
            "sysId": "changeId",
            "id_addition": "_End",
            "attributes": {
                "timestamp": "actualEnd"
            },
            "constants": {
                "activity": "'End'"
            }
        }
    ],
    "InteractionEvent": [
        {
            "log": bpic14_interaction,
            "sysId": "interactionId",
            "id_addition": "_Open",
            "attributes": {
                "timestamp": "openTime"
            },
            "constants": {
                "activity": "'Open'"
            }
        }, {
            "log": bpic14_interaction,
            "sysId": "interactionId",
            "id_addition": "_Close",
            "attributes": {
                "timestamp": "closeTime"
            },
            "constants": {
                "activity": "'Close'"
            }
        }],
    "IncidentActivityEvent": [
        {
            "log": bpic14_incident_activity,
            "sysId": "activityNumber",
            "attributes": {
                "activity": "incidentActivityType",
                "timestamp": "dateStamp"
            }
        }
    ],
}


In [22]:
build_entities(db_connection, entities=EVENTS)


=== INDEXES ===
Index for :IncidentEvent(sysId)
Index for :ChangeEvent(sysId)
Index for :InteractionEvent(sysId)
Index for :IncidentActivityEvent(sysId)

=== Building ENTITY NODES ===
→ IncidentEvent nodes created.
→ IncidentEvent nodes created.
→ IncidentEvent nodes created.
→ ChangeEvent nodes created.
→ ChangeEvent nodes created.
→ InteractionEvent nodes created.
→ InteractionEvent nodes created.
→ IncidentActivityEvent nodes created.


In [23]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:IncidentActivityEvent)       466737
(:InteractionEvent)            294008
(:Interaction)                 147172
(:IncidentEvent)               138038
(:Incident)                    47057
(:ChangeEvent)                 33381
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              3324284
[:CONTAINS]                    705949
[:AFFECTED_CI]                 223734
[:AFFECTED_SC]                 212948
[:USED_KM]                     194437
[:RELATED_INCIDENT]            52687
[:CAUSED_BY_CI]                45499
[:CAUSED_BY_SC]                43123
[:RELATED_CHANGE]              536

--- Totals ---
Total nodes: 1853134
Total relationships: 4803197


## Event-to-Object (E2O) Relationships

FZE: add here what these relationships are and how they are built

In [24]:
e2o_relationships = {
    "CORR": [
        {
            "from_object": {
                "label": "IncidentEvent"
            },
            "to_object": {
                "label": "Incident",
                "foreign_key": "incidentId"
            }
        },
        {
            "from_object": {
                "label": "ChangeEvent"
            },
            "to_object": {
                "label": "Change",
                "foreign_key": "changeId"
            }
        },
        {
            "from_object": {
                "label": "InteractionEvent"
            },
            "to_object": {
                "label": "Interaction",
                "foreign_key": "interactionId"
            }
        },
        {
            "from_object": {
                "label": "IncidentActivityEvent"
            },
            "to_object": {
                "label": "Incident",
                "foreign_key": "incidentId"
            }
        }
    ],
    "EXECUTED_BY": [
        {
            "from_object": {
                "label": "IncidentActivityEvent"
            },
            "to_object": {
                "label": "Resource",
                "foreign_key": "assignmentGroup"
            }
        }
    ]

}

In [25]:
build_relationships(db_connection, _relationships=e2o_relationships)


=== INDEXES ===
Index ensured for :Record(incidentId)
Index ensured for :Record(changeId)
Index ensured for :Record(interactionId)
Index ensured for :Record(incidentId)
Index ensured for :Record(assignmentGroup)

=== O2O RELATIONSHIPS ===
→ (:{'label': 'IncidentEvent'}) - [:CORR] -> (:{'label': 'Incident', 'foreign_key': 'incidentId'}) Relationship built
→ (:{'label': 'ChangeEvent'}) - [:CORR] -> (:{'label': 'Change', 'foreign_key': 'changeId'}) Relationship built
→ (:{'label': 'InteractionEvent'}) - [:CORR] -> (:{'label': 'Interaction', 'foreign_key': 'interactionId'}) Relationship built
→ (:{'label': 'IncidentActivityEvent'}) - [:CORR] -> (:{'label': 'Incident', 'foreign_key': 'incidentId'}) Relationship built
→ (:{'label': 'IncidentActivityEvent'}) - [:EXECUTED_BY] -> (:{'label': 'Resource', 'foreign_key': 'assignmentGroup'}) Relationship built


In [26]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:IncidentActivityEvent)       466737
(:InteractionEvent)            294008
(:Interaction)                 147172
(:IncidentEvent)               138038
(:Incident)                    47057
(:ChangeEvent)                 33381
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              3324284
[:CORR]                        932164
[:CONTAINS]                    705949
[:EXECUTED_BY]                 466737
[:AFFECTED_CI]                 223734
[:AFFECTED_SC]                 212948
[:USED_KM]                     194437
[:RELATED_INCIDENT]            52687
[:CAUSED_BY_CI]                45499
[:CAUSED_BY_SC]                43123
[:RELATED_CHANGE]              536

--- Totals ---
Total nodes: 18531

# 2. Assign Types

This function creates an ObjectType node (e.g., "Incident", "Interaction") and then links every node of that label in the graph to this type node with an IS_OF_TYPE relationship.

In [64]:
def add_object_type_node(object_type):
    query_create_ot = '''
        MERGE (ot:ObjectType {objectType: $objectType})
    '''

    db_connection.exec_query(
        Query(query_str=query_create_ot,
              parameters={'objectType': object_type}
              )
    )

    query_str = '''
        :auto
        MATCH (ot:ObjectType {objectType: $objectType })
        MATCH (o:$label)
        CALL (o, ot) {
            MERGE (o) - [:IS_OF_TYPE] -> (ot)
            } IN TRANSACTIONS
    '''

    query = Query(
        query_str=query_str,
        parameters={'objectType': object_type},
        template_string_parameters={"label": object_type}
    )

    db_connection.exec_query(query)
    print(f'-> (:ObjectType {{objectType: "{object_type}"}}) created.')

In [28]:
for label in objects.keys():
    add_object_type_node(object_type=label)

-> (:ObjectType {objectType: "Incident"}) created.
-> (:ObjectType {objectType: "Interaction"}) created.
-> (:ObjectType {objectType: "Change"}) created.
-> (:ObjectType {objectType: "KnowledgeDocument"}) created.
-> (:ObjectType {objectType: "Resource"}) created.
-> (:ObjectType {objectType: "ConfigurationItem"}) created.
-> (:ObjectType {objectType: "ServiceComponent"}) created.


This function creates an EventType node (e.g., "IncidentEvent", "InteractionEvent") and then links every node of that label in the graph to this type node with an IS_OF_TYPE relationship.

In [29]:
def add_event_type_node(_db_connection, event_type):
    create_index(_db_connection, 'Event')

    query_create_et = '''
        MERGE (et:EventType {eventType: $eventType})
    '''

    _db_connection.exec_query(
        Query(query_str=query_create_et,
              parameters={'eventType': event_type}
              )
    )

    query_str = '''
        :auto
        MATCH (et:EventType {eventType: $eventType })
        MATCH (e:$label)
        CALL (e, et) {
            MERGE (e) - [:IS_OF_TYPE] -> (et)
            REMOVE e:$label
            SET e:Event
        }
        IN TRANSACTIONS
    '''

    query = Query(
        query_str=query_str,
        parameters={'eventType': event_type},
        template_string_parameters={"label": event_type}
    )

    _db_connection.exec_query(query)
    print(f'-> (:EventType {{eventType: "{event_type}"}}) created.')

In [30]:
for label in EVENTS.keys():
    add_event_type_node(_db_connection=db_connection, event_type=label)

Index for :Event(sysId)
-> (:EventType {eventType: "IncidentEvent"}) created.
Index for :Event(sysId)
-> (:EventType {eventType: "ChangeEvent"}) created.
Index for :Event(sysId)
-> (:EventType {eventType: "InteractionEvent"}) created.
Index for :Event(sysId)
-> (:EventType {eventType: "IncidentActivityEvent"}) created.


# 3. Enrichment

FZE: I thought this was model completion. Or have you decided based on the last meeting to keep only Enrichment and distinguish two types of enrichment?
Yes

## 3.0 Filtering - Period

FZE: This is not enrichment... Is "simple" filtering, data cleaning, right? Which I think you do because of a specific analysis question you have in mind. I wonder if you could get rid of these traces before you materialize the objects. <br>
--> Yes, that's a good idea!

FZE: Important: What is a _TRACE_ in this context? In OCPM I am not sure if this is clear... I guess you refer to all events connected to one specific object, where you consider only INCIDENT, INTERACTION and CHANGE as object types?
--> Todo, rename

GOAL: We want to check if we don't have incomplete traces at the start of our dataset as these might influence our findings.
Since our data comes from different sources, they might capture data covering different time periods.
For instance, an incident might be opened before the start of our recording and as a result, not all incidentDetailActivities were recorded, TODO: work this sentence better out.
We can have incomplete traces because data comes from different sources and while we might have events from the incidentDetailActivity log, the incident might not be recorded in Incident log. (TODO: Work out)

Steps:
1) Infer and materialize DF relationships
2) Check for infrequent starting behavior
3) Filter out infrequent starting behavior,

#### 3.0.0 Infer and materialize DF

Each DF has attributes:
- objectType
- object identifier (sysid) (which object it belongs incidentId, interactionsId, changeId, etc.)

In [31]:
object_types_for_df = ['Interaction', 'Incident', 'Change']

In [32]:
def create_event_timestamp_index(_db_connection, _label):
    index_query_str = f"""
        CREATE INDEX $index_name IF NOT EXISTS
        FOR (n:$label)
        ON (n.timestamp)
    """

    index_query = Query(query_str=index_query_str,
                        parameters={
                            "index_name": f"{_label.lower()}_timestamp_index"
                        },
                        template_string_parameters={
                            "label": _label
                        })

    _db_connection.exec_query(index_query)
    print(f"Index for :{_label}(timestamp)")


In [33]:
for event_labels in EVENTS.keys():
    create_event_timestamp_index(db_connection, event_labels)

Index for :IncidentEvent(timestamp)
Index for :ChangeEvent(timestamp)
Index for :InteractionEvent(timestamp)
Index for :IncidentActivityEvent(timestamp)


In [34]:
def build_df_edges_for_object_type(_db_connection, _object_type):
    """
    Build :DF:* edges for all events related to objects of type :_object_type.
    Creates separate DF edges for each object type and incident event type.
    """

    discover_df_query_str = '''
        :auto
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        WITH o, ot.objectType as oType
        MATCH (e:Event) -- (o)
        WITH o, oType, e ORDER BY e.timestamp, elementId(e)
        WITH o.sysId as sysId, oType, collect(e) as events
        UNWIND range(0, size(events) - 2) AS index
        WITH events[index] as fromEvent, events[index+1] as toEvent, sysId, oType
        CALL (fromEvent, toEvent, sysId, oType) {
            MERGE (fromEvent) -[rel:DF {objectType:oType, id:sysId}]->(toEvent)
            RETURN count(rel) as count
        } IN TRANSACTIONS
        RETURN sum(count) as count
       '''

    discover_df = Query(query_str=discover_df_query_str,
                        parameters={'objectType': _object_type})

    res = _db_connection.exec_query(discover_df)
    print(f"-> {_object_type} DF creation result: {res[0]['count']}")


def build_df_edges(_db_connection, _object_types):
    for _object_type in _object_types:
        try:
            build_df_edges_for_object_type(_db_connection, _object_type)
        except Exception as e:
            print(f"Failed to build DFs for {_object_type}: {e}")


In [35]:
build_df_edges(_db_connection=db_connection,
               _object_types=object_types_for_df)

-> Interaction DF creation result: 147004
-> Incident DF creation result: 558159
-> Change DF creation result: 16688


#### 3.0.1 Check for infrequent starting behavior

We have three different types of objects that are involved with Events.
For all three of them, we know their expected starting activity
For Incidents, they are supposed to start with an `Open` from EventType IncidentEvent.

In [36]:
def get_count(_activity, _event_type):
    number_events_query_str = '''
        MATCH (e:Event {activity: $activity}) - [:IS_OF_TYPE] -> (et:EventType {eventType: $eventType })
        RETURN count(e) as cnt'''
    number_events_query = Query(query_str=number_events_query_str,
                                parameters={
                                    'activity': _activity,
                                    'eventType': _event_type
                                }, )

    res = db_connection.exec_query(number_events_query)
    print(f"Number of (:Event {{activity: {_activity}}}) (eventType: {_event_type}): {res[0]['cnt']}")

Let's check how many IncidentEvents we have that have the activity `opened`

In [37]:
event_type = 'IncidentEvent'
activity = 'Open'
get_count(activity, event_type)

Number of (:Event {activity: Open}) (eventType: IncidentEvent): 46606


Let's check whether there are events preceding this Event

In [38]:
def get_before_events(_activity, _event_type):
    print(
        f"Number of (e:Event) - [:DF] -> (:IncidentEvents {{activity: {_activity}}})  (both for same Incident) per e.activity")
    print(
        f"and some statistics on transition time of DF in minutes")

    before_query_str = '''
        MATCH (e1) - [df:DF] -> (e2 {activity: $activity}) - [:IS_OF_TYPE] -> (et:EventType {eventType: $eventType })
        MATCH (e1) - [:IS_OF_TYPE] -> (et1:EventType)
        WITH e1, et1, df, e2, duration.inSeconds(e1.timestamp, e2.timestamp).minutes as diff_minutes
        RETURN df.objectType as objectType, et1.eventType as eventType, e1.activity as previous_activity, min(diff_minutes) as min, round(avg(diff_minutes),2) as avg, max(diff_minutes) as max, stDev(diff_minutes) as stDev, count(df) as cnt
            ORDER by objectType DESC, cnt DESC
    '''

    before_query = Query(query_str=before_query_str,
                         parameters={
                             'activity': _activity,
                             'eventType': _event_type
                         })

    df = pd.DataFrame(db_connection.exec_query(before_query))
    print(f"Activity: {_activity}")
    print(df)

In [39]:
get_before_events(activity, event_type)

Number of (e:Event) - [:DF] -> (:IncidentEvents {activity: Open})  (both for same Incident) per e.activity
and some statistics on transition time of DF in minutes
Activity: Open
   objectType              eventType            previous_activity  min     avg  max      stDev   cnt
0    Incident  IncidentActivityEvent                         Open    0    2.48  119  16.492492  1196
1    Incident  IncidentActivityEvent                   Assignment    4   66.32  116  31.636473    66
2    Incident  IncidentActivityEvent                Status Change    7   70.38  117  36.464727    40
3    Incident  IncidentActivityEvent                 Reassignment    5   73.79  119  39.353920    19
4    Incident  IncidentActivityEvent              Operator Update    3   63.05  119  39.816906    19
5    Incident  IncidentActivityEvent         Update from customer    4   57.41  118  44.085228    17
6    Incident  IncidentActivityEvent                       Update    1   74.00  119  38.586123    10
7    Incident 

There are a few (:IncidentEvent {activity: 'Open'}) that have a preceding (:IncidentActivityEvent {activity: 'Open'}) at the same time (as min = 0). Let's explore how many.

In [40]:
query = '''
        MATCH (e1 {activity: 'Open'}) - [df:DF] -> (e2 {activity: 'Open'}) - [:IS_OF_TYPE] -> (et:EventType {eventType: 'IncidentEvent' })
        MATCH (e1) - [:IS_OF_TYPE] -> (et1:EventType)
        RETURN df.objectType as objectType, et1.eventType as eventType, e1.activity as previous_activity, e1.timestamp = e2.timestamp as at_same_time, count(df) as cnt
            ORDER by objectType DESC, cnt DESC
'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,objectType,eventType,previous_activity,at_same_time,cnt
0,Incident,IncidentActivityEvent,Open,True,1168
1,Incident,IncidentActivityEvent,Open,False,28


Majority, let's swap these cases

In [41]:
def repair_df_edges(_db_connection):
    query = '''
            MATCH (:EventType {eventType: 'IncidentActivityEvent'}) <- [:IS_OF_TYPE] - (e1 {activity: 'Open'}) - [df12:DF] -> (e2 {activity: 'Open'}) - [:IS_OF_TYPE] -> (et:EventType {eventType: 'IncidentEvent' })
            MATCH (e2) - [df23:DF {id:df12.id}] -> (e3)
            MATCH (e1) - [:IS_OF_TYPE] -> (et1:EventType)
            WHERE e1.timestamp = e2.timestamp
            MERGE (e2) - [df21:DF] -> (e1)
            MERGE (e1) - [df13:DF] -> (e3)
            SET df21 = properties(df12) // copy over properties
            SET df13 = properties(df23) // copy over properties
            DELETE df12
            DELETE df23
    '''
    pd.DataFrame(db_connection.exec_query(query))


repair_df_edges(db_connection)

In [42]:
query = '''
        MATCH (e1 {activity: 'Open'}) - [df:DF] -> (e2 {activity: 'Open'}) - [:IS_OF_TYPE] -> (et:EventType {eventType: 'IncidentEvent' })
        MATCH (e1) - [:IS_OF_TYPE] -> (et1:EventType)
        RETURN df.objectType as objectType, et1.eventType as eventType, e1.activity as previous_activity, e1.timestamp = e2.timestamp as at_same_time, count(df) as cnt
            ORDER by objectType DESC, cnt DESC
'''
pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,objectType,eventType,previous_activity,at_same_time,cnt
0,Incident,IncidentActivityEvent,Open,False,28


In [43]:
get_before_events(activity, event_type)

Number of (e:Event) - [:DF] -> (:IncidentEvents {activity: Open})  (both for same Incident) per e.activity
and some statistics on transition time of DF in minutes
Activity: Open
   objectType              eventType            previous_activity  min     avg  max      stDev  cnt
0    Incident  IncidentActivityEvent                   Assignment    4   66.32  116  31.636473   66
1    Incident  IncidentActivityEvent                Status Change    7   70.38  117  36.464727   40
2    Incident  IncidentActivityEvent                         Open   59  106.14  119  25.071327   28
3    Incident  IncidentActivityEvent                 Reassignment    5   73.79  119  39.353920   19
4    Incident  IncidentActivityEvent              Operator Update    3   63.05  119  39.816906   19
5    Incident  IncidentActivityEvent         Update from customer    4   57.41  118  44.085228   17
6    Incident  IncidentActivityEvent                       Update    1   74.00  119  38.586123   10
7    Incident  Inciden

Let's check whether there are events succeeding this Event

In [44]:
def get_after_events(_activity, _event_type):
    print(
        f"Number of (:IncidentEvents {{activity: {_activity}}}) - [:DF] -> (e:Event) (both for same Incident) per e.activity")

    print(
        f"and some statistics on transition time of DF in minutes")

    after_query_str = '''
        MATCH (et:EventType {eventType: $eventType}) <- [:IS_OF_TYPE] - (e1:Event {activity: $activity}) - [df:DF] -> (e2)
        MATCH (et2:EventType) <- [:IS_OF_TYPE] - (e2)
        WITH e1, et2, df, e2, duration.inSeconds(e1.timestamp, e2.timestamp).minutes as diff_minutes
        RETURN df.objectType as objectType, et2.eventType as next_eventType, e2.activity as next_activity, min(diff_minutes) as min, round(avg(diff_minutes),2) as avg, max(diff_minutes) as max, stDev(diff_minutes) as stDev, count(df) as cnt
            ORDER by objectType DESC, cnt DESC
    '''

    after_query = Query(query_str=after_query_str,
                        parameters={
                            'activity': _activity,
                            "eventType": _event_type
                        })

    df = pd.DataFrame(db_connection.exec_query(after_query))
    print(df)


In [45]:
event_type = 'IncidentEvent'
activity = 'Open'
get_after_events(activity, event_type)

Number of (:IncidentEvents {activity: Open}) - [:DF] -> (e:Event) (both for same Incident) per e.activity
and some statistics on transition time of DF in minutes
   objectType         next_eventType                next_activity     min        avg     max          stDev    cnt
0    Incident  IncidentActivityEvent                         Open       0       0.00       0       0.000000  46369
1    Incident  IncidentActivityEvent                   Assignment       1   14337.90  289347   52230.590031     60
2    Incident  IncidentActivityEvent              Operator Update       6   11542.13  120958   25345.996429     46
3    Incident  IncidentActivityEvent                 Reassignment       0   35974.90  484964  105811.667640     42
4    Incident  IncidentActivityEvent                Status Change       0    1650.74   22684    4450.809517     27
5    Incident  IncidentActivityEvent                       Update       0   31155.59  275138   77619.083018     22
6    Incident  IncidentActivityEv

Almost all :IncidentEvents are followed by an `:IncidentActivityEvent` with activity `Open`.

Namely, 46369/46606 = 99.5%.

Let's check the first and last timestamps of the subsequent events.

Let's check which `(:Incidents)` are not DFed by a `(:IncidentActivityEvent {activity:Opened})`.

If they are at the beginning of the timeline, then it could be that we miss the open 'IncidentActivityEvent', even though we do have the Opened `IncidentEvent`.

In [46]:
query_str = '''
         MATCH (et:EventType {eventType: 'IncidentEvent'}) <- [:IS_OF_TYPE] - (e1:Event {activity: 'Open'}) - [df:DF] -> (e2)
        MATCH (et2:EventType) <- [:IS_OF_TYPE] - (e2)
        RETURN date(min(e1.timestamp)) as min_timestamp_e1, date(max(e1.timestamp)) as max_timestamp_e1, e2.activity, et2.eventType as next_event_type, date(min(e2.timestamp)) as min_timestamp_e2, date(max(e2.timestamp)) as max_timestamp_e2, count(e2) as cnt order by cnt DESC
'''

result = pd.DataFrame(db_connection.exec_query(query_str))
print(result)

   min_timestamp_e1 max_timestamp_e1                  e2.activity        next_event_type min_timestamp_e2 max_timestamp_e2    cnt
0        2013-08-19       2014-03-31                         Open  IncidentActivityEvent       2013-08-19       2014-03-31  46369
1        2013-01-22       2013-08-14                   Assignment  IncidentActivityEvent       2013-01-23       2013-11-12     60
2        2012-10-15       2013-08-13              Operator Update  IncidentActivityEvent       2013-01-07       2013-09-16     46
3        2012-02-05       2013-08-15                 Reassignment  IncidentActivityEvent       2013-01-07       2013-09-11     42
4        2013-01-23       2013-08-16                Status Change  IncidentActivityEvent       2013-01-23       2013-08-20     27
5        2012-10-01       2013-08-12                       Update  IncidentActivityEvent       2013-02-11       2013-11-12     22
6        2012-08-10       2013-08-14         Update from customer  IncidentActivityEvent  

There's a clear cutt off, if `(e1:IncidentEvent {activity: 'Open'})` is DFed by `(:IncidentActivityEvent {activity: 'Open'})`, then the date is after or on 2013-08-19.

This could be a result of drift or of how the data was recorded, but let's explore what happens if we would remove all objects (Incident, Change, Interaction) that have at least one event before said cut off date.

In [47]:
query_str = '''
        MATCH (o) - [:CORR] - (e:Event)
        WITH e, o, e.timestamp < dateTime("2013-08-19T09:59:53.000000000+01:00") as before_cutoff
        WITH o, collect(distinct before_cutoff) as before_cutoffs
        MATCH (o) - [:CORR] - (all_e:Event)- [:IS_OF_TYPE] -> (et:EventType)
        RETURN et.eventType as eventType, True in before_cutoffs as before, count(all_e) as cnt ORDER BY eventType, before DESC
'''

result = pd.DataFrame(db_connection.exec_query(query_str))
table = pd.pivot_table(result, index=['eventType', 'before'], aggfunc="sum")
table['%'] = (round(table.cnt / table.groupby(level=0).cnt.transform("sum") * 100, 2)).astype(str) + '%'
print(table)

                                 cnt       %
eventType             before                
ChangeEvent           False    33217  99.51%
                      True       164   0.49%
IncidentActivityEvent False   458748  98.29%
                      True      7989   1.71%
IncidentEvent         False   137336  99.49%
                      True       702   0.51%
InteractionEvent      False   293106  99.69%
                      True       902   0.31%


In [48]:
query_str = '''
        MATCH (o) - [:CORR] - (e:Event)
        WITH e, o, e.timestamp < dateTime("2013-08-19T09:59:53.000000000+01:00") as before_cutoff
        WITH o, collect(distinct before_cutoff) as before_cutoffs
        MATCH (o) - [:IS_OF_TYPE] - (ot:ObjectType)
        RETURN ot.objectType as objectType, True in before_cutoffs as before, count(distinct o) as cnt ORDER BY objectType, before DESC
'''

result = pd.DataFrame(db_connection.exec_query(query_str))
table = pd.pivot_table(result, index=['objectType', 'before'], aggfunc="sum")
table['%'] = (round(table.cnt / table.groupby(level=0).cnt.transform("sum") * 100, 2)).astype(str) + '%'
print(table)

                       cnt       %
objectType  before                
Change      False    16611  99.51%
            True        82   0.49%
Incident    False    46378  99.49%
            True       238   0.51%
Interaction False   146553  99.69%
            True       451   0.31%


There are only very few objects that have at least one event before 2013-08-19. So we will remove these events and also there associated objects.

#### 3.0.2 Filter Interactions, Changes and Incidents that have at least one event before cutoff

Find specific cut_off point

In [49]:
query_str = '''
         MATCH (et:EventType {eventType: 'IncidentEvent'}) <- [:IS_OF_TYPE] - (e1:Event {activity: 'Open'}) - [df:DF] -> (e2)
        MATCH (et2:EventType) <- [:IS_OF_TYPE] - (e2)
        RETURN (max(e1.timestamp)) as max_timestamp_e1, e2.activity, et2.eventType as next_event_type, count(e2) as cnt order by cnt DESC
'''

result = pd.DataFrame(db_connection.exec_query(query_str))
print(result)

                       max_timestamp_e1                  e2.activity        next_event_type    cnt
0   2014-03-31T17:24:49.000000000+01:00                         Open  IncidentActivityEvent  46369
1   2013-08-14T13:04:53.000000000+01:00                   Assignment  IncidentActivityEvent     60
2   2013-08-13T13:37:31.000000000+01:00              Operator Update  IncidentActivityEvent     46
3   2013-08-15T18:40:54.000000000+01:00                 Reassignment  IncidentActivityEvent     42
4   2013-08-16T09:10:05.000000000+01:00                Status Change  IncidentActivityEvent     27
5   2013-08-12T10:16:16.000000000+01:00                       Update  IncidentActivityEvent     22
6   2013-08-14T10:02:31.000000000+01:00         Update from customer  IncidentActivityEvent     18
7   2013-04-17T14:31:53.000000000+01:00                      Resolve          IncidentEvent      4
8   2013-08-08T09:30:14.000000000+01:00  Communication with customer  IncidentActivityEvent      4
9   2013-0

We use as cutoff 2013-08-19 00:00:00

In [50]:
date = '2013-08-19T00:00:00+01:00'
object_types = ['Interaction', 'Change', 'Incident']

In [51]:
def filter_objects_with_events_before_date(_object_types, _date):
    # first count the objects and events we want to delete
    count_query = '''
        MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - [] - (e:Event)
        WHERE ot.objectType in $objectTypes
        WITH e, ot, o, e.timestamp < dateTime($date) as before_cutoff
        WITH ot, o, collect(distinct before_cutoff) as before_cutoffs
        WHERE True in before_cutoffs
        MATCH (o) - [] - (all_e) - [:IS_OF_TYPE] -> (et:EventType)
        RETURN ot.objectType as objectType, count(distinct o) as object_deleted, count(distinct all_e) as events_deleted
    '''
    q_number_kept_str = Query(
        query_str=count_query,
        parameters={
            'objectTypes': _object_types,
            'date': date
        }
    )
    df_removed = pd.DataFrame(db_connection.exec_query(q_number_kept_str))

    # now actually delete those objects and events
    delete_query_str = '''
        MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - [] - (e) - [:IS_OF_TYPE] -> (et:EventType)
        WHERE ot.objectType in $objectTypes
        WITH e, o, e.timestamp < dateTime($date) as before_cutoff
        WITH o, collect(distinct before_cutoff) as before_cutoffs
        WHERE True in before_cutoffs
        MATCH (o) - [] - (all_e) - [:IS_OF_TYPE] -> (et:EventType)
        DETACH DELETE o
        DETACH DELETE all_e
    '''

    delete_query = Query(
        query_str=delete_query_str,
        parameters={
            'objectTypes': _object_types,
            'date': date
        }
    )
    db_connection.exec_query(delete_query)

    # count what is left, these are the objects and events we kept
    q_number_kept_str = '''
        MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - []  - (e) - [:IS_OF_TYPE] -> (et:EventType)
        WHERE ot.objectType in $objectTypes
        RETURN ot.objectType as objectType, count(distinct o) as objects_kept, count(distinct e) as events_kept
    '''
    q_number_kept = Query(
        query_str=q_number_kept_str,
        parameters={'objectTypes': _object_types}
    )
    df_kept = pd.DataFrame(db_connection.exec_query(q_number_kept))

    df_both = df_removed.set_index('objectType').join(df_kept.set_index('objectType'))
    df_both['total_objects'] = df_both['objects_kept'] + df_both['object_deleted']
    df_both['total_events'] = df_both['events_kept'] + df_both['events_deleted']
    df_both['% events deleted'] = round(df_both['events_deleted'] / df_both['total_events'] * 100, 2)
    df_both['% objects deleted'] = round(df_both['object_deleted'] / df_both['total_objects'] * 100, 2)
    df_both = df_both[
        ['object_deleted', 'objects_kept', 'total_objects', '% objects deleted', 'events_deleted', 'events_kept',
         'total_events', '% events deleted']]
    return df_both



In [52]:
filter_objects_with_events_before_date(object_types, date)

Unnamed: 0_level_0,object_deleted,objects_kept,total_objects,% objects deleted,events_deleted,events_kept,total_events,% events deleted
objectType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Incident,237,46379,46616,0.51,8675,596100,604775,1.43
Interaction,447,146557,147004,0.3,894,293114,294008,0.3
Change,79,16614,16693,0.47,158,33223,33381,0.47


## 3.1 Materialize CI_SC Objects and its Relationships

As for analysis question we want to understand how Configuration Items are managed over time and how to interact together.

For this, we want to look at the behavior of CIs, but the CIs belong to a specific Service Component and this SC might change over time.
To simplify the analysis on our end, we decide to reify the time-constrained relationship between the CI and SC into a new object CI_SC and then we can analyze the behavior on these CI_SCs over time.



### 3.1.1 Materialize CI_SC Objects

We want to materialize the time-constrained relationship between the CI and SC.
We can use the generic materialize_relationship_into_object method for this.

This method reifies a relationship using a configuration file.
We want to materialize the `(:ServiceComponent) - [:CONTAINS] -> (:ConfigurationItem)` relationship into a new `(:CI_SC)` node.

So the `from_object` is `ServiceComponent`, `to_object` is `ConfigurationItem` and relation_type is `CONTAINS`.

We can also copy over some relationships from the two objects into the new materialized object.

We decide to copy over the `ciType` and `ciSubtype` from the `ConfigurationItem`.

In [53]:
relationships_to_materialize = {
    "CI_SC": [{
        "from_object": {
            "label": "ServiceComponent",
        },
        "to_object": {
            "label": "ConfigurationItem",
            "attributes": {
                "ciType": "ciType",
                "ciSubtype": "ciSubtype"
            }
        },
        "relation_type": "CONTAINS"
    }]
}

In [54]:
def materialize_relationship_into_object(_db_connection, _label, _config):
    from_object = _config["from_object"]
    to_object = _config["to_object"]
    set_attributes = []

    for _type, _object in {"from": from_object, "to": to_object}.items():
        if "attributes" in _object:
            set_attributes.extend(
                [f"new.{key} = COALESCE(new.{key}, {_type}.{attr})" for key, attr in _object["attributes"].items()])

    materialize_relationship_query = '''
        :auto
        MATCH (from) - [ :IS_OF_TYPE] -> (:ObjectType {objectType: $from_object})
        MATCH (to) - [ :IS_OF_TYPE] -> (:ObjectType {objectType: $to_object})
        MATCH (from) - [r WHERE type(r) = $relation_type] -> (to)
        CALL (from, r, to) {
            MERGE (new:$materialized_object {sysId: from.sysId + '_' + to.sysId})
            MERGE (from) <- [:RELATED] - (new) - [:RELATED] -> (to)
            SET new[$from_object] = from.sysId,
                new[$to_object] = to.sysId
            $set_attributes
        } IN TRANSACTIONS
    '''

    materialize_query = Query(
        query_str=materialize_relationship_query,
        parameters={
            "from_object": from_object["label"],
            "to_object": to_object["label"],
            "relation_type": _config["relation_type"]
        },
        template_string_parameters={
            "materialized_object": _label,
            "set_attributes": "SET " + ", ".join(set_attributes)
        }
    )

    _db_connection.exec_query(materialize_query)
    print(f"→ {_label} nodes created.")


First, we ensure we add an index for the new materialized object

In [55]:
print("\n=== INDEXES ===")
for _label in relationships_to_materialize.keys():
    try:
        create_index(_db_connection=db_connection,
                     _label=_label)
    except Exception as e:
        print(f"Failed to create index for {_label}: {e}")


=== INDEXES ===
Index for :CI_SC(sysId)


Then, we can materialize the relationship

In [56]:
print("\n=== Materializing Relationships ===")
for _label, _configs in relationships_to_materialize.items():
    for _config in _configs:
        try:
            materialize_relationship_into_object(
                _db_connection=db_connection,
                _label=_label,
                _config=_config)
        except Exception as e:
            print(f"Failed for {_label}: {e}")


=== Materializing Relationships ===
→ CI_SC nodes created.


Add the object type to adhere to schema

In [57]:
for _object in relationships_to_materialize.keys():
    add_object_type_node(_object)

-> (:ObjectType {objectType: "CI_SC"}) created.


#### 3.1.2 Extend Relationships to CI_SC Objects

In our data model, Incidents, Interactions and Changes were related to the ConfigurationItem and ServiceComponent they affected or were caused by.
To keep this knowledge, we decide to extend these relationships to the new materialized `(:CI_SC)` object.

So, more specifically, if a single Incident was affected by a Service Component A and a ConfigurationItem B, then it is affected by the reified :CI_SC of SC A and CI B.

More specific, that means we want to extend the following relationships

 | Extended Relationship to CI_SC                      | Relationship to SC                                       | Relationship to CI                                        |
 |-----------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------|
 | `(inc:Incident) - [:AFFECTED_CI_SC] -> (:CI_SC)`    | `(inc:Incident) - [:AFFECTED_SC] -> (:ServiceComponent)` | `(inc:Incident) - [:AFFECTED_CI] -> (:ConfigurationItem)` |
 | `(int:Interaction) - [:AFFECTED_CI_SC] -> (:CI_SC)` | `(int:Interaction) - [:AFFECTED_SC] -> (:ServiceComponent)` | `(int:Interaction) - [:AFFECTED_CI] -> (:ConfigurationItem)`|
 | `(c:Change) - [:AFFECTED_CI_SC] -> (:CI_SC)`        | `(c:Change) - [:AFFECTED_SC] -> (:ServiceComponent)` | `(c:Change) - [:AFFECTED_CI] -> (:ConfigurationItem)` |
 | `(c:Change) - [:CAUSED_BY_CI_SC] -> (:CI_SC)`        | `(c:Change) - [:CAUSED_BY_CI_SC] -> (:ServiceComponent)` | `(c:Change) - [:CAUSED_BY_CI_SC] -> (:ConfigurationItem)` |


In [58]:
o2o_relationships_to_extend = {
    "AFFECTED_CI_SC": [{
        "from_object": {
            "label": "Incident|Interaction|Change",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "AFFECTED_SC"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "AFFECTED_CI"
                }]
        },
        "to_object": {
            "label": "CI_SC",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "RELATED"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "RELATED"
                }]
        }
    }],
    "CAUSED_BY_CI_SC": [{
        "from_object": {
            "label": "Incident",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "CAUSED_BY_SC"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "CAUSED_BY_CI"
                }]
        },
        "to_object": {
            "label": "CI_SC",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "RELATED"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "RELATED"
                }]
        }
    }]
}

In [59]:
def extend_relationship(_db_connection, _type, _config):
    from_object = _config["from_object"]
    to_object = _config["to_object"]

    query_str = '''
        :auto
        MATCH (from:$from_object)
        MATCH (to:$to_object)
        $relation_conditions
        WITH distinct from, to
        CALL (from, to) {
            MERGE (from) - [r:$type] -> (to)
            RETURN r
        } IN TRANSACTIONS
        RETURN count(r) as count
    '''

    relation_conditions = []
    for _object_type, _object in {"from": from_object, "to": to_object}.items():
        if "relationships" in _object:
            for relationship in _object['relationships']:
                rel_type = relationship["relation_type"]
                related_object = relationship["related_object"]
                related_label = relationship["related_label"]
                relation_conditions.append(
                    f"MATCH ({_object_type}) - [:{rel_type}] - ({related_object}:{related_label})")

    query = Query(
        query_str=query_str,
        template_string_parameters={
            "from_object": from_object["label"],
            "to_object": to_object["label"],
            "type": _type,
            "relation_conditions": "\n".join(relation_conditions)
        }
    )

    res = _db_connection.exec_query(query)
    print(f'→ {res[0]["count"]} (:{from_object["label"]}) - [:{_type}] -> (:{to_object["label"]}) Relationship built')

In [60]:
for _type, _configs in o2o_relationships_to_extend.items():
    for _config in _configs:
        try:
            extend_relationship(db_connection, _type, _config)
        except Exception as e:
            print(f"Failed for {_type}: {e}")

→ 222894 (:Incident|Interaction|Change) - [:AFFECTED_CI_SC] -> (:CI_SC) Relationship built
→ 42929 (:Incident) - [:CAUSED_BY_CI_SC] -> (:CI_SC) Relationship built


CI_SCs are impacted by Interactions, Changes, and Incidents. To understand how CI_SCs are managed over time, we can analyze these related objects and their events. However, examining these objects in isolation misses the bigger picture.

A more effective approach is to directly link events, such as opening a change, to the specific CI_SCs they affect. This allows us to track how CI_SCs are managed through these three types of events and observe their interactions over time.

To achieve this, we aim to extend the following Event-to-Object (E2O) relationships:

 | Extended Relationship to CI_SC                      | Relationship to related object             | Relationship to from related object to CI_SC             |
 |-----------------------------------------------------|--------------------------------------------|----------------------------------------------------------|
 | `(e:Event) - [:CORR] -> (ci_sc:CI_SC)`              | `(e:Event) - [:CORR] -> (c:Change)`        | `(c:Change) - [:AFFECTED_CI_SC] -> (ci_sc:CI_SC)`        |
 | `(e:Event) - [:CORR] -> (ci_sc:CI_SC)`              | `(e:Event) - [:CORR] -> (int:Interaction)` | `(int:Interaction) - [:AFFECTED_CI_SC] -> (ci_sc:CI_SC)` |
 | `(e:Event) - [:CORR] -> (ci_sc:CI_SC)`              | `(e:Event) - [:CORR] -> (inc:Incident)`    | `(inc:Incident) - [:AFFECTED_CI_SC] -> (ci_sc:CI_SC)`    |


In [61]:
e2o_relationships_to_extend = {
    "CORR": [
        {
            "from_object": {
                "label": "Event",
                "relationships": [
                    {
                        "related_label": "Change",
                        "related_object": "change",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Change",
                        "related_object": "change",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        },
        {
            "from_object": {
                "label": "Event",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        },
        {
            "from_object": {
                "label": "Event",
                "relationships": [
                    {
                        "related_label": "Interaction",
                        "related_object": "interaction",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Interaction",
                        "related_object": "interaction",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        }
    ],
}

We can use the same generic function `extend_relationship` but now for the E2O relationships.

In [62]:
for _type, _configs in e2o_relationships_to_extend.items():
    for _config in _configs:
        try:
            extend_relationship(db_connection, _type, _config)
        except Exception as e:
            print(f"Failed for {_type}: {e}")

→ 53423 (:Event) - [:CORR] -> (:CI_SC) Relationship built
→ 596028 (:Event) - [:CORR] -> (:CI_SC) Relationship built
→ 293114 (:Event) - [:CORR] -> (:CI_SC) Relationship built


## 3.2 Understand Behavior of CI_SC over time

Now, we have materialized the CI_SC object, and extended the relationships so that the CI_SCs are directly related to the events.
Using this information, we can understand the behavior of the CI_SC over time by looking at how the events are ordered in time.
For this, we will materialize the DF relationship for the CI_SC.

We can reuse the generic query for the materialization of DF relationships

#### 3.2.1 Materialize DF edges

In [63]:
build_df_edges_for_object_type(_db_connection=db_connection, _object_type='CI_SC')

-> CI_SC DF creation result: 929655


Recall that because some events happen at the same time, they can be incorrectly ordered. Let's fix these as well.

In [64]:
repair_df_edges(db_connection)

#### 3.2.1 Understand the main flow: Explore Directly-Follows Relations

To better understand the process, we can query the DF paths in Neo4j bloom with the following query
```
    MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
    MATCH (start:Event) -- (o)
    MATCH (end:Event) -- (o)
    WHERE NOT EXISTS (() - [:DF {id: o.sysId}] -> (start))
    AND NOT EXISTS ((end) - [:DF {id: o.sysId}] -> ())
    MATCH p = (start) - [:DF* {id: o.sysId}] -> (end)
    RETURN p limit 10
```

#### 3.2.2 Set Variant Analysis per CI_SC

Let's explore the set variants
TODO AVA Add explanation

In [22]:
def get_activity_set_variants(_db_connection, _object_type, _event_types):
    # get the bag variants on the high_level
    q_set_activity_variants_str = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)
        WHERE et.eventType IN $eventTypes
        WITH o, e.activity AS activity ORDER BY activity
        WITH o, collect(distinct activity) as set_variant
        RETURN ltrim(reduce(initial = "", activity in set_variant | initial + " - (" + activity + ")" ), " - " ) as set_variant, count(distinct o) as count_objects order by count_objects DESC
    '''

    q_set_activity_variants = Query(query_str=q_set_activity_variants_str,
                                    parameters={
                                        'objectType': _object_type,
                                        'eventTypes': event_types
                                    })

    _result = pd.DataFrame(db_connection.exec_query(q_set_activity_variants))
    _result['%_set_variant'] = round(
        _result.groupby(['set_variant']).count_objects.transform("sum") / sum(_result['count_objects']) * 100, 2)
    return _result



In [66]:
event_types = ['ChangeEvent', 'InteractionEvent', 'IncidentEvent', 'IncidentActivityEvent']
get_activity_set_variants(_db_connection=db_connection,
                          _object_type='CI_SC',
                          _event_types=event_types)

Unnamed: 0,set_variant,count_objects,%_set_variant
0,(End) - (Start),8212,63.61
1,(Close) - (Open),1554,12.04
2,(Assignment) - (Close) - (Closed) - (Open) - (...,159,1.23
3,(Assignment) - (Close) - (Closed) - (Open) - (...,115,0.89
4,(Close) - (End) - (Open) - (Start),71,0.55
...,...,...,...
1309,(Assignment) - (Close) - (Closed) - (Communica...,1,0.01
1310,(Assignment) - (Caused By CI) - (Close) - (Clo...,1,0.01
1311,(Assignment) - (Caused By CI) - (Close) - (Clo...,1,0.01
1312,(Assignment) - (Close) - (Closed) - (Communica...,1,0.01


These results are difficult to interpret as the activities refer to different objects in the process, which are "lost" at this level of abstraction. <br> Let's then abstract and look at the event types:

In [67]:
def get_event_type_set_variants(_db_connection, _object_type, _event_types):
    # get the bag variants on the high_level
    q_set_event_type_variants_str = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e:Event) - [:IS_OF_TYPE] -> (et:EventType)
        WHERE et.eventType IN $eventTypes
        WITH o, et.eventType as event_type ORDER BY event_type
        WITH o, collect(distinct event_type) as set_variant
        RETURN ltrim(reduce(initial = "", activity in set_variant | initial + " - (" + activity + ")" ), " - " ) as set_variant, count(o) as count_objects order by count_objects DESC
    '''

    q_set_event_type_variants = Query(query_str=q_set_event_type_variants_str,
                                      parameters={
                                          'objectType': 'CI_SC',
                                          'eventTypes': event_types
                                      })

    _result = pd.DataFrame(db_connection.exec_query(q_set_event_type_variants))
    _result['%_set_variant'] = round(
        _result.groupby(['set_variant']).count_objects.transform("sum") / sum(_result['count_objects']) * 100, 2)
    return _result

In [68]:
event_types = ['ChangeEvent', 'InteractionEvent', 'IncidentEvent', 'IncidentActivityEvent']
get_event_type_set_variants(_db_connection=db_connection,
                            _object_type='CI_SC',
                            _event_types=event_types)

Unnamed: 0,set_variant,count_objects,%_set_variant
0,(ChangeEvent),8212,63.61
1,(IncidentActivityEvent) - (IncidentEvent) - (I...,2038,15.79
2,(InteractionEvent),1554,12.04
3,(ChangeEvent) - (IncidentActivityEvent) - (Inc...,531,4.11
4,(IncidentActivityEvent) - (IncidentEvent),472,3.66
5,(ChangeEvent) - (InteractionEvent),71,0.55
6,(ChangeEvent) - (IncidentActivityEvent) - (Inc...,32,0.25


Now we only have 7 variants.

- Variant 0:  Most CI_SCs (63.6%) are only involved in ChangeEvents;
- Variant 1: A large part of CI_SCs (15.8%) are only involved in Incident(Activity)Events and InteractionEvents
- Variant 2: 12.0% of CI_SCs in just InteractionEvents
- Variant 3: 4.1% of CI_SCs are involved in all types of events
- Variant 4: 3.7% of CI_SCs are involved just Incident(Activity)Events
- Variant 5: 0.6% of CI_SCs are involved in Change and Interaction Event
- Variant 6: 3.7% of CI_SCs are involved in Change an Incident(Activity)Events

This is already an interesting finding that many CI_SCs are either managed through Changes or either through Incident and Interactions.


#### 3.2.4 Inferring HighLevelEvents

To better understand this behavior, we aggregate the events to a higher level.
So instead of looking at the individual events performed on a CI_SC through an Interaction, we are only interested in when said Interaction opens and closes.

Therefore, we will infer High Level Events.
This inference can be done on many levels, but we will do it by creating a new HighLevelEvent for every Interaction, Change and Incident.
For each Interaction, Change and Incident, we will store when it was opened as timestamp, and when it was closed as EndTime.

Instead of aggregating on the eventType level, we will aggregate on the main object they operate. So, that's for
- Change --> ChangeEvent
- Interaction --> InteractionEvent
- Incident --> IncidentActivityEvent and IncidentEvent

In [69]:
def infer_start_event(_db_connection, _object_type):
    # infer start and end events for each object type
    q_start_event = '''
        :auto
        // Infer start event of an object
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (o)<-[]-(e:Event)
        WHERE NOT ()-[:DF {id:o.sysId}]->(e)
        CALL (o, e){
            MERGE (o)<-[rel:START]-(e)
            RETURN rel
        } IN TRANSACTIONS
        RETURN count(rel) as count
    '''

    q_start_event_result = Query(
        query_str=q_start_event,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_start_event_result)

    print(f'-> Inferred Start Events for {res[0]["count"]} objects ({_object_type})')

In [70]:
def infer_end_event(_db_connection, _object_type):
    # infer start and end events for each object type
    q_end_event = '''
        :auto
        // Infer start event of an object
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (o)<-[]-(e:Event)
        WHERE NOT (e)-[:DF {id:o.sysId}]->()
        CALL (o, e){
            MERGE (o)<-[rel:END]-(e)
            RETURN rel
        } IN TRANSACTIONS
        RETURN count(rel) as count
    '''

    q_end_event_result = Query(
        query_str=q_end_event,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_end_event_result)

    print(f'-> Inferred End Events for {res[0]["count"]} objects ({_object_type})')

In [71]:
object_types = ['Interaction', 'Incident', 'Change']
for object_type in object_types:
    infer_start_event(db_connection, object_type)
    infer_end_event(db_connection, object_type)

-> Inferred Start Events for 146557 objects (Interaction)
-> Inferred End Events for 146557 objects (Interaction)
-> Inferred Start Events for 48133 objects (Incident)
-> Inferred End Events for 49887 objects (Incident)
-> Inferred Start Events for 16614 objects (Change)
-> Inferred End Events for 16619 objects (Change)


In [None]:
# materialize time between events as duration on DF edge
q_materialize_duration_on_df = '''
    :auto
    MATCH (e1:Event) - [df:DF] -> (e2:Event)
    WHERE df.duration IS NULL
    CALL (e1, df, e2) {
        SET df.duration = duration.inSeconds(e1.timestamp, e2.timestamp).seconds
    } IN TRANSACTIONS
    RETURN count(*) AS total
'''

db_connection.exec_query(q_materialize_duration_on_df)

In [72]:
# build index for high-level events
qCreateIndexHLE_sys = f'''
    CREATE INDEX highLevelEventSysIdIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.sysId);
'''
db_connection.exec_query(qCreateIndexHLE_sys)

qCreateIndexHLE_start = f'''
    CREATE INDEX timestampIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.timestamp);
'''
db_connection.exec_query(qCreateIndexHLE_start)
qCreateIndexHLE_end = f'''
    CREATE INDEX highLevelEventEndTimeIndex IF NOT EXISTS FOR (h:HighLevelEvent) ON (h.endTime);
'''
db_connection.exec_query(qCreateIndexHLE_end)


[]

[]

[]

In [73]:
def infer_high_level_events(_db_connection, _object_type):
    # build high-level events
    q_build_high_level_event_str = '''

                :auto
        MATCH (n) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (eStart:Event)-[st:START]->(n)<-[en:END]-(eEnd:Event)
        WITH DISTINCT eStart, eEnd
        CALL (eStart, eEnd) {
            MERGE (hEventType:EventType {eventType: 'HighLevelEvent', subEventType:  $objectType})
            MERGE (h:HighLevelEvent {sysId: "HLE_" + eStart.sysId})
            MERGE (h) - [:IS_OF_TYPE] -> (hEventType)
            ON CREATE SET h.timestamp=eStart.timestamp, h.activity=$objectType, h.subActivity = 'Open'
            MERGE (h)-[:START]->(eStart)
            RETURN h
            UNION
            MERGE (hEventType:EventType {eventType: 'HighLevelEvent', subEventType:  $objectType})
            MERGE (h:HighLevelEvent {sysId: "HLE_" + eEnd.sysId})
            MERGE (h) - [:IS_OF_TYPE] -> (hEventType)
            ON CREATE SET h.timestamp=eEnd.timestamp, h.activity=$objectType, h.subActivity = 'Close'
            MERGE (h)-[:END]->(eEnd)
            RETURN h
        } IN TRANSACTIONS
        RETURN count(h) as count
    '''

    q_build_high_level_event_result = Query(
        query_str=q_build_high_level_event_str,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_build_high_level_event_result)
    print(f'-> Inferred {res[0]["count"]} HighLevelEvent for ObjectType ({_object_type})')


In [74]:
object_types = ['Interaction', 'Incident', 'Change']
for object_type in object_types:
    infer_high_level_events(db_connection, object_type)

-> Inferred 293114 HighLevelEvent for ObjectType (Interaction)
-> Inferred 103872 HighLevelEvent for ObjectType (Incident)
-> Inferred 33223 HighLevelEvent for ObjectType (Change)


#### Lift the relationships

After aggregating Events into HighLevelEvents, we of course also need to lift the E2O relationship.
That is, if an event was correlated to a CI_SC, then its HighLevelEvent should also be correlated to the same CI_SC

In [75]:
def lift_e2o_relationship_to_hle(_db_connection, _object_type):
    # lift CORR to high-level events
    q_lift_e2o_str = '''
        :auto
        MATCH (h:HighLevelEvent) - [:START|END] -> () --> (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        WITH distinct h, o
        CALL (h, o) {
            MERGE(h) - [c:CORR] -> (o)
            RETURN c
        } IN TRANSACTIONS
        RETURN count(c) as count
    '''

    q_lift_e2o = Query(
        query_str=q_lift_e2o_str,
        parameters={"objectType": _object_type}
    )

    res = db_connection.exec_query(q_lift_e2o)
    print(f'-> Lifted {res[0]["count"]} E2O relationships for ObjectType ({_object_type})')

In [76]:
object_types = ['Interaction', 'Incident', 'Change', 'CI_SC']
for object_type in object_types:
    lift_e2o_relationship_to_hle(db_connection, object_type)

-> Lifted 293114 E2O relationships for ObjectType (Interaction)
-> Lifted 94512 E2O relationships for ObjectType (Incident)
-> Lifted 33223 E2O relationships for ObjectType (Change)
-> Lifted 441029 E2O relationships for ObjectType (CI_SC)


#### Retreive the Set Variants for the High Level Events

Now, we can do the same set variant analysis, but for the high level events.

In [23]:
event_types = ['HighLevelEvent']
get_activity_set_variants(_db_connection=db_connection,
                          _object_type='CI_SC',
                          _event_types=event_types)

Unnamed: 0,set_variant,count_objects,%_set_variant
0,(Change),8212,63.61
1,(Incident) - (Interaction),2038,15.79
2,(Interaction),1554,12.04
3,(Change) - (Incident) - (Interaction),531,4.11
4,(Incident),472,3.66
5,(Change) - (Interaction),71,0.55
6,(Change) - (Incident),32,0.25


We find the same set variants as before, but now they indicate through what objects a CI_SC was managed.

In [27]:
g_get_affected_objects_sum_over_events = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)

        WHERE et.eventType IN $eventTypes
        RETURN e.activity as activity, count(e) as event_count, count(distinct o) as unique_affected_object_count ORDER BY activity
    '''

g_get_affected_objects_sum_over_events = Query(query_str=g_get_affected_objects_sum_over_events,
                                               parameters={
                                                   'objectType': 'CI_SC',
                                                   'eventTypes': event_types
                                               })

result = pd.DataFrame(db_connection.exec_query(g_get_affected_objects_sum_over_events))

g_get_only_has_this_activity = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)
        MATCH (e) - [:START] -> (:Event)
        WHERE et.eventType IN $eventTypes
        WITH o, collect(distinct e.activity) as activities
        WHERE size(activities) = 1
        RETURN activities[0] as activity, count(distinct o) as only_has_this_activity
    '''

g_get_only_has_this_activity = Query(query_str=g_get_only_has_this_activity,
                                     parameters={
                                         'objectType': 'CI_SC',
                                         'eventTypes': event_types
                                     })

only_this_activity_result = pd.DataFrame(db_connection.exec_query(g_get_only_has_this_activity))
pd.merge(result, only_this_activity_result)

Unnamed: 0,activity,event_count,unique_affected_object_count,only_has_this_activity
0,Change,160269,8846,8212
1,Incident,283476,3073,472
2,Interaction,879342,4194,1554


**Conclusion**

Though most CI_SCs are only managed through changes, the number of Changes that happen are not that many compared to the Incidents and Interactions.
Only 600 CI_SCs that have Changes, also have the other Incident and Interaction.

Then, though there are only have of the CI_SCs that have an Interaction, there are way more Interaction Events.

In [35]:
count_types_of_events= '''
    MATCH (ci_sc:CI_SC) <- [:CORR] - (e:HighLevelEvent {subActivity: 'Open'})
    RETURN ci_sc.sysId as sysId, e.activity as activity, count(e) as count ORDER BY ci_sc.sysId
'''

result = pd.DataFrame(db_connection.exec_query(count_types_of_events))
pivot_df = result.pivot_table(
    index="sysId",
    columns=["activity"],
    values="count",
    fill_value=0,
)
# Normalize per type
normalized_df = pivot_df.div(pivot_df.sum(axis=1), axis=0)


In [40]:
normalized_df.loc[normalized_df['Change'] == 0, 'exposure_level'] = 2
normalized_df.loc[(0 < normalized_df['Change']) & (normalized_df['Change'] < 1), 'exposure_level'] = 1
normalized_df.loc[normalized_df['Change'] == 1, 'exposure_level'] = 0
normalized_df = normalized_df.reset_index()
normalized_df

activity,sysId,Change,Incident,Interaction,exposure_level
0,WBS000001_SBA000135,0.777778,0.111111,0.111111,1.0
1,WBS000001_SBA000664,1.000000,0.000000,0.000000,0.0
2,WBS000001_SUB000125,0.000000,0.000000,1.000000,2.0
3,WBS000002_OVR000059,0.000000,0.500000,0.500000,2.0
4,WBS000002_OVR000093,0.000000,0.200000,0.800000,2.0
...,...,...,...,...,...
12905,WBS000340_SBA000297,1.000000,0.000000,0.000000,0.0
12906,WBS000341_SBA000722,1.000000,0.000000,0.000000,0.0
12907,WBS000341_SBA000723,1.000000,0.000000,0.000000,0.0
12908,WBS000342_SBA000660,1.000000,0.000000,0.000000,0.0


In [41]:
query = '''
    UNWIND $ci_scs as ci_sc_value
    MATCH (ci_sc:CI_SC {sysId: ci_sc_value[0]})
    SET ci_sc.exposed = ci_sc_value[1]
'''

query = Query(query_str=query,
              parameters={
                  "ci_scs": [[id, exposed] for id, exposed in zip(list(normalized_df['sysId']), list(normalized_df['exposure_level']))],
              })
db_connection.exec_query(query)

[]

In [42]:
query = '''
MATCH (ci_sc:CI_SC)
WHERE ci_sc.exposed IS NOT NULL AND not ci_sc.ciType  in ['#N/B', 'no type']
RETURN ci_sc.ciType as type, ci_sc.exposed as exposed, count(ci_sc) as count order by type
'''

result = pd.DataFrame(db_connection.exec_query(query))
pivot_df = result.pivot_table(
    index="type",
    columns=["exposed"],
    values="count",
    fill_value=0,
)
# Normalize per type
normalized_df = pivot_df.div(pivot_df.sum(axis=1), axis=0)

In [44]:
pivot_df

exposed,0.0,1.0,2.0
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Phone,120.0,0.0,3.0
application,1011.0,313.0,363.0
applicationcomponent,405.0,1.0,2.0
computer,3740.0,153.0,3159.0
database,558.0,3.0,13.0
displaydevice,2.0,0.0,25.0
hardware,168.0,20.0,47.0
networkcomponents,884.0,17.0,103.0
officeelectronics,16.0,1.0,135.0
software,567.0,14.0,49.0


In [46]:
normalized_df

exposed,0.0,1.0,2.0
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Phone,0.97561,0.0,0.02439
application,0.599289,0.185536,0.215175
applicationcomponent,0.992647,0.002451,0.004902
computer,0.530346,0.021696,0.447958
database,0.972125,0.005226,0.022648
displaydevice,0.074074,0.0,0.925926
hardware,0.714894,0.085106,0.2
networkcomponents,0.880478,0.016932,0.10259
officeelectronics,0.105263,0.006579,0.888158
software,0.9,0.022222,0.077778


- There are a few CI_SCs that are rarely exposed (i.e. phone, database, software, networkcomponents) --> database
- Then there are a few that are really exposed (displaydevice, officeelectronics) --> Office electronics
- Few inbetween (storage, subapplication, hardware, application) --> Storage

We focus on a few of these.

In [207]:
result

Unnamed: 0,type,exposed,count
0,#N/B,True,1
1,Phone,False,120
2,Phone,True,3
3,application,False,1070
4,application,True,617
5,applicationcomponent,False,405
6,applicationcomponent,True,3
7,computer,False,3804
8,computer,True,3248
9,database,False,560


In [53]:
types = ['hardware', 'subapplication', 'storage', 'application']

In [57]:
# immediately solved
query = '''
MATCH (e1:HighLevelEvent) - [df:DF where df.objectType = 'Interaction'] -> (e4:HighLevelEvent)
MATCH (e1) - [:CORR] -> (o:CI_SC)
WHERE o.ciType IN $ciTypes
RETURN o.ciType as type, e1.activity as activity, exists((e1:HighLevelEvent) - [:DF {objectType:'CI_SC', id: o.sysId}] -> (e4:HighLevelEvent)) as immediately_solved, count(e1) as count, avg(df.duration)*1.0/3600 as duration_hours order by type, activity, immediately_solved
'''

object_type = 'Change'
ci_type = 'storage'

query = Query(
    query_str=query,
    parameters = {
        'objectType': object_type,
        'ciTypes': types
    }
)
pd.DataFrame(db_connection.exec_query(query))


Unnamed: 0,type,activity,immediately_solved,count,duration_hours
0,application,Interaction,False,58836,62.622824
1,application,Interaction,True,38717,0.404885
2,hardware,Interaction,False,467,82.140293
3,hardware,Interaction,True,511,0.779028
4,storage,Interaction,False,6580,11.463924
5,storage,Interaction,True,5248,0.066899
6,subapplication,Interaction,False,12181,55.871891
7,subapplication,Interaction,True,8519,0.221098


In [83]:
# build DF relations between high-level events
# step 1: build HL-DF relations between events

def discover_df_hle(_db_connection, _object_type):
    q_build_df_relations_hle_str = '''
        :auto
        // get only START/END events of each HL event correlated to CI_SC
        MATCH (ot:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) <- [:CORR] - (h:HighLevelEvent)
        MATCH (o) <-[:CORR]- (h) -[:START|END] -> (e:Event)
        WITH o, e ORDER BY e.timestamp, elementId(e) // order by time
        WITH o, collect(e) AS events
        UNWIND range(0, size(events)-2) AS idx
        WITH idx, events[idx] AS fromEv, events[idx+1] AS toEv, o
        CALL (o, idx, fromEv, toEv) {
            // get their HL events correlated to the same object
            MATCH (fromEv)<-[:START|END]-(h1:HighLevelEvent)-[:CORR]->(o)<-[:CORR]-(h2:HighLevelEvent)-[:START|END]->(toEv)
            WHERE h1 <> h2
            // and add their DF relation
            MERGE (h1)-[rel:DF {objectType:$objectType, id:o.sysId}]->(h2)
            RETURN count(rel) as rel
        } IN TRANSACTIONS
        RETURN sum(rel) AS total

    '''

    q_build_df_relations_hle = Query(
        query_str=q_build_df_relations_hle_str,
        parameters={
            "objectType": _object_type,
        }
    )

    return db_connection.exec_query(q_build_df_relations_hle)

In [87]:
def delete_immediate_resolved(object_type):
    query = '''
    :auto
    MATCH (e1:HighLevelEvent) - [df:DF {objectType: $objectType}] -> (e4:HighLevelEvent)
    MATCH (e1) -  [:DF {objectType: 'CI_SC'}] -> (e4)
    CALL (e1, e4){
    DETACH DELETE e1, e4
    } IN TRANSACTIONS
    RETURN count(e1) as count
    '''

    query = Query(query_str=query,
                  parameters={'objectType': object_type})

    res = db_connection.exec_query(query)
    print(f"deleted {res[0]['count']} events")

In [94]:
for object_type in ('Interaction', 'Incident'):
    delete_immediate_resolved(object_type)
discover_df_hle(db_connection, 'CI_SC')

deleted 0 events
deleted 0 events


[{'total': 275490}]

In [106]:
query = '''
MATCH (e1:HighLevelEvent) - [df:DF where df.objectType <> 'CI_SC'] -> (e4:HighLevelEvent)
MATCH (e1 WHERE e1.activity IN ['Incident', 'Interaction']) - [:CORR] -> (o:CI_SC)
WHERE NOT exists((e1:HighLevelEvent) - [:DF {objectType:'CI_SC'}] -> (e4:HighLevelEvent)) AND o.ciType IN ['hardware', 'subapplication', 'storage', 'application']
MATCH (e1:HighLevelEvent) - [df2:DF {objectType:'CI_SC', id: o.sysId}] -> (e2:HighLevelEvent)
RETURN o.ciType, e2.activity = 'Change' and e2.subActivity = 'Open' as open_change, count(e1), avg(df.duration)*1.0/3600 as duration_hours'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,o.ciType,open_change,count(e1),duration_hours
0,storage,False,6598,20.071065
1,application,False,81385,83.88885
2,application,True,466,239.424906
3,subapplication,False,18122,69.83198
4,subapplication,True,75,307.603674
5,hardware,False,636,100.288739
6,hardware,True,16,84.365955
7,storage,True,2,37.113194


In [48]:
event_types = ['HighLevelEvent']
g_get_affected_objects = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)
        MATCH (e) - [:START] -> (:Event)
        WHERE et.eventType IN $eventTypes
        RETURN  o.ciType as type, e.activity as activity, count(distinct e) as event_count, count(distinct o) as unique_affected_object_count ORDER BY type
    '''

g_get_affected_objects = Query(query_str=g_get_affected_objects,
                                               parameters={
                                                   'objectType': 'CI_SC',
                                                   'eventTypes': event_types
                                               })

affected_result = pd.DataFrame(db_connection.exec_query(g_get_affected_objects))

g_get_cum_affected_objects = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)
        MATCH (e) - [:START] -> (:Event)
        WHERE et.eventType IN $eventTypes
        WITH  o.ciType as type, e.activity as activity, e, count(distinct o) as affected_o ORDER BY type
        RETURN type, activity, sum(affected_o) as cumulative_affected
    '''

g_get_cum_affected_objects = Query(query_str=g_get_cum_affected_objects,
                                               parameters={
                                                   'objectType': 'CI_SC',
                                                   'eventTypes': event_types
                                               })

cum_affected_result = pd.DataFrame(db_connection.exec_query(g_get_cum_affected_objects))

result = pd.merge(affected_result, cum_affected_result)

g_get_only_has_this_activity = '''
        MATCH (:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) -- (e) - [:IS_OF_TYPE] -> (et:EventType)
        MATCH (e) - [:START] -> (:Event)
        WHERE et.eventType IN $eventTypes
        WITH o, collect(distinct e.activity) as activities
        WHERE size(activities) = 1
        RETURN o.ciType as type, activities[0] as activity, count(distinct o) as only_has_this_activity ORDER BY type
    '''

g_get_only_has_this_activity = Query(query_str=g_get_only_has_this_activity,
                                     parameters={
                                         'objectType': 'CI_SC',
                                         'eventTypes': event_types
                                     })

only_this_activity_result = pd.DataFrame(db_connection.exec_query(g_get_only_has_this_activity))
combined_per_type = pd.merge(result, only_this_activity_result)
combined_per_type[combined_per_type['type'].isin(types)]

Unnamed: 0,type,activity,event_count,unique_affected_object_count,cumulative_affected,only_has_this_activity
11,database,Change,2027,561,2622,558
12,database,Interaction,339,15,339,5
13,displaydevice,Change,2,2,2,2
14,displaydevice,Interaction,1661,24,1661,6
15,displaydevice,Incident,220,19,220,1
30,storage,Interaction,11828,87,11828,2
31,storage,Incident,733,87,733,2
32,storage,Change,274,355,728,293


In [49]:
combined_per_type['avg_affected_event'] = combined_per_type['cumulative_affected']/combined_per_type['event_count']
combined_per_type['avg_affected_total'] = combined_per_type['unique_affected_object_count']/combined_per_type['event_count']
selected = combined_per_type[combined_per_type['type'].isin(types)]
selected = combined_per_type[['type', 'activity', 'event_count', 'unique_affected_object_count',  'avg_affected_event', 'avg_affected_total']]


In [50]:
# materialize time between events as duration on DF edge
q_materialize_duration_on_df = '''
    :auto
    MATCH (e1:HighLevelEvent) - [df:DF] -> (e2:HighLevelEvent)
    WHERE df.duration IS NULL
    CALL (e1, df, e2) {
        SET df.duration = duration.inSeconds(e1.timestamp, e2.timestamp).seconds
    } IN TRANSACTIONS
    RETURN count(*) AS total
'''

db_connection.exec_query(q_materialize_duration_on_df)

[{'total': 0}]

In [51]:
# materialize time between events as duration on DF edge
q_materialize_duration_on_df = '''
    :auto
    MATCH (e1:HighLevelEvent) - [df:DF] -> (e2:HighLevelEvent) - [:CORR] -> (o:CI_SC)
    WHERE df.objectType in ['Change', 'Interaction', 'Incident']
    RETURN e1.activity as activity, o.ciType as type, avg(df.duration)*1.0/60 as avg_minutes
'''

result = pd.DataFrame(db_connection.exec_query(q_materialize_duration_on_df))
selected_result = result[result['type'].isin(types)]

In [52]:
pd.merge(selected, selected_result)

Unnamed: 0,type,activity,event_count,unique_affected_object_count,avg_affected_event,avg_affected_total,avg_minutes
0,database,Change,2027,561,1.293537,0.276764,3940.197941
1,database,Interaction,339,15,1.0,0.044248,1374.185841
2,displaydevice,Change,2,2,1.0,1.0,16142.5
3,displaydevice,Interaction,1661,24,1.0,0.014449,1170.390126
4,displaydevice,Incident,220,19,1.0,0.086364,7912.166288
5,storage,Interaction,11828,87,1.0,0.007355,384.428644
6,storage,Incident,733,87,1.0,0.11869,5181.964075
7,storage,Change,274,355,2.656934,1.29562,2993.21978


Unnamed: 0,type,activity,immediately_solved,count,duration_hours
0,database,Change,False,166,709.694076
1,database,Change,True,2456,22.140649
2,database,Incident,False,167,31.713159
3,database,Incident,True,49,23.336525
4,database,Interaction,False,244,31.494057
5,database,Interaction,True,95,0.837895
6,displaydevice,Change,True,2,269.041667
7,displaydevice,Incident,False,186,144.699131
8,displaydevice,Incident,True,34,61.683472
9,displaydevice,Interaction,False,675,46.006074


In [235]:
query = '''
MATCH (ci_sc:CI_SC) <- [:CORR] - (hle:HighLevelEvent {subActivity: 'Open'})
WHERE ci_sc.ciType = 'storage'
RETURN ci_sc.sysId as sysId, ci_sc.exposed as exposed, hle.activity, count(hle) as count order by count DESC
'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,sysId,exposed,hle.activity,count
0,WBS000128_SAN000182,True,Interaction,10667
1,WBS000128_SAN000178,True,Interaction,862
2,WBS000128_SAN000182,True,Incident,410
3,WBS000128_SAN000178,True,Incident,75
4,WBS000128_SAN000184,True,Interaction,67
...,...,...,...,...
524,WBS000127_SAN000089,True,Incident,1
525,WBS000127_SAN000108,True,Interaction,1
526,WBS000127_SAN000108,True,Incident,1
527,WBS000127_SAN000130,True,Incident,1


In [238]:
query = '''
MATCH (ci_sc:CI_SC) <- [:CORR] - (hle:HighLevelEvent {subActivity: 'Open'})
WHERE ci_sc.ciType = 'storage' AND ci_sc.sysId = 'WBS000128_SAN000178'
RETURN ci_sc.sysId as sysId, ci_sc.exposed as exposed, hle.activity, count(hle) as count order by count DESC
'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,sysId,exposed,hle.activity,count
0,WBS000128_SAN000178,True,Interaction,862
1,WBS000128_SAN000178,True,Incident,75


In [239]:
query = '''
MATCH (ci_sc:CI_SC) <- [:CORR] - (hle:HighLevelEvent {subActivity: 'Open'})
WHERE ci_sc.ciType = 'database'
RETURN ci_sc.sysId as sysId, ci_sc.exposed as exposed, hle.activity, count(hle) as count order by count DESC
'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,sysId,exposed,hle.activity,count
0,WBS000224_DBR00114,False,Change,296
1,WBS000239_ADB000010,True,Interaction,225
2,WBS000224_DBR00113,False,Change,217
3,WBS000239_ADB000010,True,Incident,173
4,WBS000253_ADB000094,True,Interaction,64
...,...,...,...,...
581,WBS000253_ADB000011,True,Interaction,1
582,WBS000253_ADB000053,True,Interaction,1
583,WBS000253_ADB000012,True,Interaction,1
584,WBS000253_ADB000025,True,Interaction,1


In [243]:
query = '''
MATCH (ci_sc:CI_SC) <- [:CORR] - (hle:HighLevelEvent {subActivity: 'Open'})
WHERE ci_sc.sysId = 'WBS000239_ADB000010'
RETURN ci_sc.sysId as sysId, ci_sc.exposed as exposed, hle.activity, count(hle) as count order by count DESC
'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,sysId,exposed,hle.activity,count
0,WBS000239_ADB000010,True,Interaction,225
1,WBS000239_ADB000010,True,Incident,173


In [252]:
query = '''MATCH (e1:HighLevelEvent) - [df:DF where df.objectType <> 'CI_SC'] -> (e4:HighLevelEvent)
MATCH (e1 {activity: 'Interaction'}) - [:CORR] -> (o:CI_SC {ciType:  'storage'})
WHERE NOT exists((e1:HighLevelEvent) - [:DF {objectType:'CI_SC'}] -> (e4:HighLevelEvent))
MATCH (e1:HighLevelEvent) - [df2:DF {objectType:'CI_SC', id: o.sysId}] -> (e2:HighLevelEvent)
RETURN e2.activity, e2.subActivity, count(e1), avg(df2.duration)*1.0/3600 as duration_hours'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,e2.activity,e2.subActivity,count(e1),duration_hours
0,Incident,Open,525,0.079308
1,Incident,Close,53,0.40968
2,Interaction,Open,3359,0.165952
3,Interaction,Close,2643,0.010859


Closes or opens another interaction (most often, very few actually lead to an opened incident)

In [142]:
query = '''MATCH (e1:HighLevelEvent) - [df:DF where df.objectType <> 'CI_SC'] -> (e4:HighLevelEvent)
MATCH (e1 {activity: 'Incident'}) - [:CORR] -> (o:CI_SC {ciType:  'storage'})
WHERE NOT exists((e1:HighLevelEvent) - [:DF {objectType:'CI_SC'}] -> (e4:HighLevelEvent))
MATCH (e1:HighLevelEvent) - [df2:DF {objectType:'CI_SC', id: o.sysId}] -> (e2:HighLevelEvent)
RETURN e2.activity, e2.subActivity, count(e1), avg(df2.duration)*1.0/3600 as duration_hours'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,e2.activity,e2.subActivity,count(e1),duration_hours
0,Interaction,Close,216,8.890769
1,Interaction,Open,342,3.316759
2,Incident,Close,21,2.25168
3,Change,Open,2,1.415833
4,Incident,Open,9,0.161019


Only 2 incidents lead to an opened change

In [143]:
query = '''MATCH (e1:HighLevelEvent) - [df:DF where df.objectType <> 'CI_SC'] -> (e4:HighLevelEvent)
MATCH (e1 {activity: 'Interaction'}) - [:CORR] -> (o:CI_SC {ciType:  'networkcomponents'})
WHERE NOT exists((e1:HighLevelEvent) - [:DF {objectType:'CI_SC'}] -> (e4:HighLevelEvent))
MATCH (e1:HighLevelEvent) - [df2:DF {objectType:'CI_SC', id: o.sysId}] -> (e2:HighLevelEvent)
RETURN e2.activity, e2.subActivity, count(e1), avg(df2.duration)*1.0/3600 as duration_hours'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,e2.activity,e2.subActivity,count(e1),duration_hours
0,Incident,Open,78,0.132977
1,Change,Open,1,0.033333
2,Incident,Close,1,0.088611
3,Interaction,Open,1,362.566667


In [175]:
query = '''MATCH (e1:HighLevelEvent) - [df:DF where df.objectType <> 'CI_SC'] -> (e4:HighLevelEvent)
MATCH (e1 {activity: 'Incident'}) - [:CORR] -> (o:CI_SC)
WHERE NOT exists((e1:HighLevelEvent) - [:DF {objectType:'CI_SC'}] -> (e4:HighLevelEvent))
MATCH (e1:HighLevelEvent) - [df2:DF {objectType:'CI_SC', id: o.sysId}] -> (e2:HighLevelEvent)
RETURN o.ciType, e2.activity, e2.subActivity, count(e1), avg(df2.duration)*1.0/3600 as duration_hours order by e2.activity'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,o.ciType,e2.activity,e2.subActivity,count(e1),duration_hours
0,subapplication,Change,Open,49,84.673084
1,application,Change,Open,287,36.592026
2,subapplication,Change,Close,14,7.987976
3,software,Change,Open,26,20.158088
4,application,Change,Close,51,6.26427
5,hardware,Change,Close,10,9.820778
6,hardware,Change,Open,14,23.040258
7,storage,Change,Open,2,1.415833
8,computer,Change,Close,6,5.038519
9,computer,Change,Open,17,52.758693


In [178]:
query = '''MATCH (e1:HighLevelEvent) - [df:DF where df.objectType <> 'CI_SC'] -> (e4:HighLevelEvent)
MATCH (e1 {activity: 'Incident'}) - [:CORR] -> (o:CI_SC)
WHERE NOT exists((e1:HighLevelEvent) - [:DF {objectType:'CI_SC'}] -> (e4:HighLevelEvent))
MATCH (e1:HighLevelEvent) - [df2:DF {objectType:'CI_SC', id: o.sysId}] -> (e2:HighLevelEvent {activity:'Change'})
MATCH (e2:HighLevelEvent {activity:'Change'}) - [:CORR] -> (new_o:CI_SC)
WITH o, e2, df2, e1, count(new_o) as affected_objects
RETURN o.ciType, e2.activity, e2.subActivity, count(e1), sum(affected_objects), avg(df2.duration)*1.0/3600 as duration_hours order by e2.activity'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,o.ciType,e2.activity,e2.subActivity,count(e1),sum(affected_objects),duration_hours
0,subapplication,Change,Open,49,54,84.673084
1,application,Change,Open,287,422,36.592026
2,subapplication,Change,Close,14,16,7.987976
3,software,Change,Open,26,31,20.158088
4,application,Change,Close,51,62,6.26427
5,hardware,Change,Close,10,10,9.820778
6,hardware,Change,Open,14,20,23.040258
7,storage,Change,Open,2,3,1.415833
8,computer,Change,Close,6,13,5.038519
9,computer,Change,Open,17,47,52.758693


Most lead to an opened incident

## Building Directly-Follows Relationships between high-level events

In [88]:
# build DF relations between high-level events
# step 1: build HL-DF relations between events

def discover_df_hle(_db_connection, _object_type):
    q_build_df_relations_hle_str = '''
        :auto
        // get only START/END events of each HL event correlated to CI_SC
        MATCH (ot:ObjectType {objectType: $objectType}) <- [:IS_OF_TYPE] - (o) <- [:CORR] - (h:HighLevelEvent)
        MATCH (o) <-[:CORR]- (h) -[:START|END] -> (e:Event)
        WITH o, e ORDER BY e.timestamp, elementId(e) // order by time
        WITH o, collect(e) AS events
        UNWIND range(0, size(events)-2) AS idx
        WITH idx, events[idx] AS fromEv, events[idx+1] AS toEv, o
        CALL (o, idx, fromEv, toEv) {
            // get their HL events correlated to the same object
            MATCH (fromEv)<-[:START|END]-(h1:HighLevelEvent)-[:CORR]->(o)<-[:CORR]-(h2:HighLevelEvent)-[:START|END]->(toEv)
            WHERE h1 <> h2
            // and add their DF relation
            MERGE (h1)-[rel:DF {objectType:$objectType, id:o.sysId}]->(h2)
            RETURN count(rel) as rel
        } IN TRANSACTIONS
        RETURN sum(rel) AS total

    '''

    q_build_df_relations_hle = Query(
        query_str=q_build_df_relations_hle_str,
        parameters={
            "objectType": _object_type,
        }
    )

    return db_connection.exec_query(q_build_df_relations_hle)

In [89]:
discover_df_hle(db_connection, _object_type='CI_SC')
discover_df_hle(db_connection, _object_type='Incident')
discover_df_hle(db_connection, _object_type='Interaction')
discover_df_hle(db_connection, _object_type='Change')

[{'total': 432202}]

[{'total': 51641}]

[{'total': 146557}]

[{'total': 16609}]

In [145]:
def infer_hle_start_event(_db_connection, _object_type):
    # infer start and end events for each object type
    q_start_event = '''
        :auto
        // Infer start event of an object
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (o) <- [] - (e:HighLevelEvent)
        WHERE NOT EXISTS ((:Event) - [:DF {id:o.sysId}] -> (e))
        CALL (o, e){
            MERGE (o)<-[rel:START]-(e)
            RETURN rel
        } IN TRANSACTIONS
        RETURN count(rel) as count
    '''

    q_start_event_result = Query(
        query_str=q_start_event,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_start_event_result)

    print(f'-> Inferred High Level Start Events for {res[0]["count"]} objects ({_object_type})')

In [146]:
def infer_hle_end_event(_db_connection, _object_type):
    # infer start and end events for each object type
    q_end_event = '''
        :auto
        // Infer start event of an object
        MATCH (o) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType: $objectType})
        MATCH (o) <- [] - (e:HighLevelEvent)
        WHERE NOT EXISTS ((e) - [:DF {id:o.sysId}] -> (:Event))
        CALL (o, e){
            MERGE (o)<-[rel:END]-(e)
            RETURN rel
        } IN TRANSACTIONS
        RETURN count(rel) as count
    '''

    q_end_event_result = Query(
        query_str=q_end_event,
        parameters={"objectType": _object_type}
    )

    res = _db_connection.exec_query(q_end_event_result)

    print(f'-> Inferred High Level End Events for {res[0]["count"]} objects ({_object_type})')

In [147]:
infer_hle_start_event(_db_connection=db_connection,
                      _object_type='CI_SC')
infer_hle_end_event(_db_connection=db_connection,
                    _object_type='CI_SC')

-> Inferred High Level Start Events for 1323087 objects (CI_SC)
-> Inferred High Level End Events for 1323087 objects (CI_SC)


# Discover different NBags
For every shortest trace from the first event of a CI_SC until the last event of a CI_SC, we determine sequences of a specific length, so called nbags.
Then for every CI_SC, we also request the type


FZE: Why? What is a Nbag, why is the type important all of the sudden

In [148]:
def get_nbag(size):
    q_get_nbags_str = '''
    :auto
        MATCH (o) - [:IS_OF_TYPE] -> (:ObjectType {objectType: 'CI_SC'})
        WHERE o.ciType IN ['storage', 'networkcomponents']
        CALL (o){
            MATCH (o) <- [:CORR] - (event:HighLevelEvent)
            WITH o, event.activity as activity order by event.timestamp
            WITH o, collect(activity) as activities
            WITH o, [i in range(0, size(activities) - 1) WHERE i = 0 OR activities[i] <> activities[i-1] | activities[i]] AS activities
            UNWIND range(0, size(activities) - $size) as i
            RETURN activities[i..i+$size] as nbag_list
        } IN TRANSACTIONS
        RETURN o.ciType as type, ltrim(reduce(init="", t in nbag_list| init + "-" + t), '-') as nbag, count(nbag_list) as nbag_count
    '''

    q_get_nbags = Query(query_str=q_get_nbags_str,
                        template_string_parameters={
                            "size": size
                        })

    results = db_connection.exec_query(q_get_nbags)
    nbags = pd.DataFrame(results)
    table = pd.pivot_table(nbags, index=['type', 'nbag'], aggfunc="sum")
    table['%'] = (
        round(
            table.groupby(['nbag', 'type']).nbag_count.transform("sum") /
            table.groupby(['type']).nbag_count.transform("sum") * 100,
            2
        )
    )
    return table



In [149]:
get_nbag(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,nbag_count,%
type,nbag,Unnamed: 2_level_1,Unnamed: 3_level_1
networkcomponents,Change,913,73.75
networkcomponents,Incident,146,11.79
networkcomponents,Interaction,179,14.46
storage,Change,376,12.91
storage,Incident,1242,42.65
storage,Interaction,1294,44.44


In [150]:
get_nbag(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,nbag_count,%
type,nbag,Unnamed: 2_level_1,Unnamed: 3_level_1
networkcomponents,Change-Incident,5,2.14
networkcomponents,Change-Interaction,15,6.41
networkcomponents,Incident-Change,4,1.71
networkcomponents,Incident-Interaction,85,36.32
networkcomponents,Interaction-Change,10,4.27
networkcomponents,Interaction-Incident,115,49.15
storage,Change-Incident,3,0.12
storage,Change-Interaction,48,1.9
storage,Incident-Change,20,0.79
storage,Incident-Interaction,1187,46.94


In [151]:
get_nbag(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,nbag_count,%
type,nbag,Unnamed: 2_level_1,Unnamed: 3_level_1
networkcomponents,Change-Incident-Change,1,0.65
networkcomponents,Change-Incident-Interaction,3,1.95
networkcomponents,Change-Interaction-Change,4,2.6
networkcomponents,Change-Interaction-Incident,10,6.49
networkcomponents,Incident-Change-Incident,3,1.95
networkcomponents,Incident-Interaction-Change,5,3.25
networkcomponents,Incident-Interaction-Incident,42,27.27
networkcomponents,Interaction-Change-Incident,1,0.65
networkcomponents,Interaction-Change-Interaction,1,0.65
networkcomponents,Interaction-Incident-Change,3,1.95


FZE: What do we see here? Is there anything interesting to report/conclude?

# CI_SC Check
CI_SC are often handled simultaneously. Sometimes, they go then on to a different route and come back later at a different point in time. <br>
Let's check what happens in between these two events

FZE: the description above is cryptical. Where do they go? :-) I am not sure I understand it

In [157]:
def get_ci_scs_in_sync(db_connection):
    query = '''
            :auto


        MATCH (e:HighLevelEvent) - [:CORR] -> (o1) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType:'CI_SC'})
MATCH (e) - [:CORR] -> (o2) - [:IS_OF_TYPE] -> (ot:ObjectType {objectType:'CI_SC'})
WHERE o1 < o2
WITH e, o1, o2
CALL (e, o1, o2){
  MATCH (f:HighLevelEvent)  - [:CORR] -> (o1)
  MATCH (f) - [:CORR] -> (o2)
  WHERE f.timestamp >= e.timestamp AND e <> f
  RETURN f ORDER BY f.timestamp limit 1
} IN TRANSACTIONS
CALL (o1, e, f){
  MATCH p = (e) - [:DF* {id: o1.sysId}] -> (f)
  WITH p, nodes(p)[1..-1] as events
  CALL (events) {
    UNWIND events as event
    WITH event ORDER BY event.activity
    RETURN collect(distinct event.activity) as set_variant
    }
  RETURN nodes(p) as p_o1, set_variant as set_variant_o1
} IN TRANSACTIONS
CALL (o2, e, f){
    MATCH p = (e) - [:DF* {id: o2.sysId}] -> (f)
  WITH p, nodes(p)[1..-1] as events
  CALL (events) {
    UNWIND events as event
    WITH event ORDER BY event.activity
    RETURN collect(distinct event.activity) as set_variant
    }

  RETURN nodes(p) as p_o2,  set_variant as  set_variant_o2

} IN TRANSACTIONS
    MATCH (o1) - [:RELATED] -> (ci1:ConfigurationItem)
    MATCH (o2) - [:RELATED] -> (ci2:ConfigurationItem)
    WITH o1, ci1, o2, ci2, p_o1, p_o2, set_variant_o1, set_variant_o2

    CALL (set_variant_o1, set_variant_o2) {
    WITH set_variant_o1, set_variant_o2, CASE
        WHEN size(set_variant_o1) <= size(set_variant_o2) THEN set_variant_o1
        ELSE set_variant_o2 END AS first_interruptions
    WITH set_variant_o1, set_variant_o2, first_interruptions, CASE
        WHEN first_interruptions = set_variant_o1 THEN set_variant_o2
        ELSE set_variant_o1 END AS second_interruptions
    WITH set_variant_o1, set_variant_o2, CASE
        WHEN size(first_interruptions) = 0 THEN " None"
        ELSE ltrim(reduce(init="", item in first_interruptions | init + "-" + item), '-')  END AS interruptions_o1,
    CASE
        WHEN size(second_interruptions) = 0 THEN " None"
        ELSE ltrim(reduce(init="", item in second_interruptions | init + "-" + item), '-') END AS interruptions_o2
    RETURN interruptions_o1, interruptions_o2}
    RETURN ci1.ciType as c1Type,ci2.ciType as c2Type, interruptions_o1, interruptions_o2, count(o1) as occurrences

    '''

    result = pd.DataFrame(db_connection.exec_query(query=query))
    table = pd.pivot_table(result,
                           index=['interruptions_o1', 'interruptions_o2', 'c1Type', 'c2Type'],
                           aggfunc="sum")
    table['%'] = (
        round(
            table.groupby(['interruptions_o1', 'interruptions_o2', 'c1Type', 'c2Type',]).occurrences.transform("sum") /
            sum(table.occurrences) * 100,
            2
        )
    )
    return table


In [162]:
result = get_ci_scs_in_sync(db_connection)
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,occurrences,%
interruptions_o1,interruptions_o2,c1Type,c2Type,Unnamed: 4_level_1,Unnamed: 5_level_1
,,Phone,Phone,18,0.03
,,Phone,networkcomponents,27,0.04
,,application,application,3089,4.55
,,application,applicationcomponent,3,0.00
,,application,computer,180,0.27
...,...,...,...,...,...
Interaction,Change-Interaction,application,application,3,0.00
Interaction,Change-Interaction,computer,computer,1,0.00
Interaction,Incident-Interaction,application,application,9,0.01
Interaction,Incident-Interaction,computer,computer,2,0.00


In [171]:
reset = result.reset_index()
new = reset[reset['c1Type'].isin(['storage', 'networkcomponents']) | reset['c2Type'].isin(['storage', 'networkcomponents'])]
table = pd.pivot_table(new,
                           index=['c1Type', 'c2Type', 'interruptions_o1', 'interruptions_o2'],
                           aggfunc="sum")
table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,%,occurrences
c1Type,c2Type,interruptions_o1,interruptions_o2,Unnamed: 4_level_1,Unnamed: 5_level_1
Phone,networkcomponents,,,0.04,27
application,networkcomponents,,,0.05,31
application,networkcomponents,,Change,0.04,27
application,storage,,,0.0,2
application,storage,,Change,0.0,2
computer,networkcomponents,,,0.04,30
computer,networkcomponents,,Change,0.01,8
computer,storage,,,0.44,301
computer,storage,,Change,0.09,62
database,storage,,,0.01,4


In [173]:
new_result = result.reset_index()
new_result = new_result.drop(columns=['c1Type', 'c2Type', '%'])
table = pd.pivot_table(new_result, index=['interruptions_o1', 'interruptions_o2'], aggfunc="sum")
table['%'] = (
    round(
        table.groupby(['interruptions_o1', 'interruptions_o2']).occurrences.transform("sum") /
        sum(table.occurrences) * 100,
        2
    )
)
table


Unnamed: 0_level_0,Unnamed: 1_level_0,occurrences,%
interruptions_o1,interruptions_o2,Unnamed: 2_level_1,Unnamed: 3_level_1
,,58863,86.66
,Change,6066,8.93
,Change-Incident-Interaction,119,0.18
,Change-Interaction,10,0.01
,Incident,11,0.02
,Incident-Interaction,401,0.59
,Interaction,158,0.23
Change,Change,1998,2.94
Change,Change-Incident,7,0.01
Change,Change-Incident-Interaction,95,0.14


In [102]:
new_result.columns