# 0. Prepare Project

In [314]:
%matplotlib inline
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import pandas as pd

pd.set_option('display.width', 2000)

### Define the project that you want to do analysis on

In [315]:
case_study = 'bpic14'
use_sample = False

In [316]:
# retrieve configuration for case_study
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


### Prepare so we can use PromG to load the data and execute queries

In [317]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)
perf = Performance.set_up_performance(config=config)
dataset_descriptions = DatasetDescriptions(config=config)

In [318]:
def reset_pbar(pbar=perf.pbar, total=None):
    # clear db
    pbar.reset()
    # TODO update dragons in PromG, #update method to set total for pbar
    pbar.total = total
    pbar.set_postfix_str()

#### Prepare the DB

In [319]:
# read the semantic header --> this details how the data should be structured
semantic_header = SemanticHeader.create_semantic_header(config=config)

In [320]:
# Clear the DB (if use_sample = False, this should not take long on a loaded database)
db_manager = DBManagement(db_connection=db_connection, semantic_header=None)
db_manager.clear_db(
    replace=True);  # in the community version of neo4j, replace is not allowed. In that case, set replace=False

13it [36:38, 430.76s/it, clear_db: took 2163.65 seconds]                                              

### Statistics

In [321]:
def get_graph_statistics(db_connection):
    """
    Statistics about nodes and relations.
    """

    with db_connection.driver.get_session(database=db_connection.db_name) as session:
        print("\n=== GRAPH STATISTICS ===")

        try:
            node_query = """
            MATCH (n)
            WITH n, labels(n) as labels
            RETURN reduce(label_str = "(", l in labels | label_str + ":" + l) + ")" as label, count(n) as count ORDER BY count DESC
            """
            node_counts = session.run(node_query)
            print("\n--- Node counts ---")
            for record in node_counts:
                print(f"{record['label']:<30} {record['count']}")

            rel_query = """
            MATCH (n) - [r] -> (n2)
            RETURN "[:" + type(r) + "]" as  type, count(r) as count ORDER BY count DESC
            """
            rel_counts = session.run(rel_query)
            print("\n--- Relationship counts ---")
            for record in rel_counts:
                print(f"{record['type']:<30} {record['count']}")

            total_nodes = session.run("MATCH (n) RETURN count(n) AS total").single()["total"]
            total_rels = session.run("MATCH ()-[r]->() RETURN count(r) AS total").single()["total"]

            print("\n--- Totals ---")
            print(f"Total nodes: {total_nodes}")
            print(f"Total relationships: {total_rels}")

        except Exception as e:
            print(f"Failed to get graph statistics: {e}")

# 0. Load the data

In [322]:
use_sample = False
oced_pg = OcedPg(database_connection=db_connection,
                 dataset_descriptions=dataset_descriptions,
                 semantic_header=semantic_header,
                 use_sample=use_sample)


In [323]:
# first, we load all records
# (if use_sample = False, this should take less than 2 minutes)
reset_pbar(total=11)
oced_pg.load();

  0%|          | 0/11 [00:00<?, ?it/s]                                



100%|██████████| 11/11 [00:34<00:00,  3.12s/it, _filter_nodes for BPIC14Interaction: took 0.0 seconds]                      

In [324]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:Log)                         4

--- Relationship counts ---
[:CONTAINS]                    690622

--- Totals ---
Total nodes: 690626
Total relationships: 690622


In [325]:
bpic14_incident = "BPIC14Incident.csv"
bpic14_interaction = "BPIC14Interaction.csv"
bpic14_change = "Detail_Change.csv"
bpic14_incident_activity = "Detail_Incident_Activity.csv"

# 1. Split Entities into Objects and Events

### Objects Nodes

Create objects out of the Record nodes directly.
Those are I
- Incident
- Interaction
- Change
- Knowledge Document
- Resource.

In [326]:
objects = {
    "Incident": [
        {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "attributes": {
                "incidentId": "incidentId",
                "status": "status",
                "impact": "impact",
                "priority": "priority",
                "category": "category",
                "handleTimeHours": "handleTimeHours",
                "closureCode": "closureCode",
                "alertStatus": "alertStatus",
                "numReassignments": "numReassignments",
                "numRelatedInteractions": "numRelatedInteractions",
                "numRelatedIncidents": "numRelatedIncidents",
                "numRelatedChanges": "numRelatedChanges"
            },
        },
        {
            "log": bpic14_interaction,
            "sysId": "relatedIncident",
            "attributes": {
                "incidentId": "relatedIncident"
            },
            "constants": {
                "derivedFromInteraction": True
            }
        },

        {
            "log": bpic14_incident_activity,
            "sysId": "incidentId",
            "attributes": {
                "incidentId": "incidentId"
            }
        }
    ],
    "Interaction": [
        {
            "log": bpic14_interaction,
            "sysId": "interactionId",
            "attributes": {
                "interactionId": "interactionId",
                "status": "status",
                "impact": "impact",
                "priority": "priority",
                "category": "category",
                "handleTimeSecs": "handleTimeSecs",
                "closureCode": "closureCode",
                "firstCallResolution": "firstCallResolution"
            },
        },
        {
            "log": bpic14_incident,
            "sysId": "relatedInteraction",
            "attributes": {
                "interactionId": "relatedInteraction"
            },
        }
    ],
    "Change": [
        {
            "log": bpic14_change,
            "sysId": "changeId",
            "attributes": {
                "changeId": "changeId",
                "type": "changeType",
                "riskAssessment": "riskAssessment",
                "cabApprovalNeeded": "cabApprovalNeeded",
                "plannedStart": "plannedStart",
                "plannedEnd": "plannedEnd",
                "scheduledDowntimeStart": "scheduledDowntimeStart",
                "scheduledDowntimeEnd": "scheduledDowntimeEnd",
                "requestedEndDate": "requestedEndDate",
                "originatedFrom": "originatedFrom",
                "numRelatedInteractions": "numRelatedInteractions",
                "numRelatedIncidents": "numRelatedIncidents"
            },
        }, {
            "log": bpic14_incident,
            "sysId": "relatedChange",
            "attributes": {
                "changeId": "relatedChange"
            },
            "constants": {
                "derivedFromIncident": True
            }
        }
    ],
    "KnowledgeDocument": [
        {
            "log": None,
            "sysId": "kmNumber",
            "attributes": {"kmNumber": "kmNumber"}
        }
    ],
    "Resource": [
        {
            "log": bpic14_incident_activity,
            "sysId": "assignmentGroup",
            "attributes": {"assignmentGroup": "assignmentGroup"}
        }
    ],
    "ConfigurationItem": [
        {  # affected CIs
            "log": None,
            "sysId": "ciNameAff",
            "attributes": {
                "ciName": "ciNameAff",
                "ciType": "ciTypeAff",
                "ciSubtype": "ciSubtypeAff"
            },
            "constants": {
                "affected": True
            }
        },
        {  # caused by CIs
            "log": bpic14_incident,
            "sysId": "ciNameCby",
            "attributes": {
                "ciName": "ciNameCby",
                "ciType": "ciTypeCby",
                "ciSubtype": "ciSubtypeCby"
            },
            "constants": {
                "caused": True
            }

        }

    ],
    "ServiceComponent": [
        {  # affected SCs
            "log": None,
            "sysId": "serviceComponentAff",
            "attributes": {
                "scName": "serviceComponentAff"
            },
            "constants": {
                "affected": True
            }
        },
        {  # caused by SCs
            "log": bpic14_incident,
            "sysId": "serviceComponentCBy",
            "attributes": {
                "scName": "serviceComponentCBy"
            },
            "constants": {
                "caused": True
            }
        },
    ]
}

In [327]:
def create_index(_db_connection, _label):
    index_query_str = f"""
        CREATE INDEX $index_name IF NOT EXISTS
        FOR (n:$label)
        ON (n.sysId)
    """

    index_query = Query(query_str=index_query_str,
                        parameters={
                            "index_name": f"{_label.lower()}_sysId_index"
                        },
                        template_string_parameters={
                            "label": _label
                        })

    _db_connection.exec_query(index_query)
    print(f"Index for :{_label}(sysId)")


def build_entity(_label, _config):
    iterate_query = """
        :auto
        MATCH (l:Log)-[:CONTAINS]->(r:Record)
        WHERE r.$sysId_field IS NOT NULL $log_name_condition $time_field_condition
        WITH r.$sysId_field $id_addition AS sysId, r
        CALL (sysId, r) {
             MERGE (n:$label {sysId: sysId})
             MERGE (n)-[:EXTRACTED_FROM]->(r)
             $attr_updates
             $constants_updates
        } IN TRANSACTIONS
    """
    attr_updates = ""
    time_field_condition = ""

    if "attributes" in _config:
        attr_updates += "SET "
        attr_updates += ", ".join(
            [f"n.{key} = COALESCE(n.{key}, r.{attr})" for key, attr in _config["attributes"].items()])

        if "timestamp" in _config["attributes"]:
            time_field_condition = f"AND r.{_config["attributes"]["timestamp"]} IS NOT NULL"

    constants_updates = ""
    if "constants" in _config:
        constants_updates += "SET "
        constants_updates += ", ".join(
            [f"n.{key} = COALESCE(n.{key}, {attr})" for key, attr in _config["constants"].items()])

    query = Query(
        query_str=iterate_query,
        parameters={
            "log_name": _config["log"],
        },
        template_string_parameters={
            "label": _label,
            "sysId_field": _config["sysId"],
            "log_name_condition": "AND l.name = $log_name" if _config["log"] else "",
            "time_field_condition": time_field_condition,
            "attr_updates": attr_updates,
            "constants_updates": constants_updates,
            "id_addition": f"+ '{_config['id_addition']}'" if 'id_addition' in _config else ""
        }
    )
    db_connection.exec_query(query)
    print(f"→ {_label} nodes created.")


def build_entities(_db_connection, entities):
    """
    Create entities. Includes indexing.
    """
    print("\n=== INDEXES ===")
    for _label in entities.keys():
        try:
            create_index(_db_connection=_db_connection,
                         _label=_label)
        except Exception as e:
            print(f"Failed to create index for {_label}: {e}")

    print(f"\n=== Building ENTITY NODES ===")

    for _label, _configs in entities.items():
        for _config in _configs:
            try:
                build_entity(_label=_label,
                             _config=_config)
            except Exception as e:
                print(f"Failed for {_label}: {e}")


In [328]:
build_entities(db_connection, entities=objects)


=== INDEXES ===
Index for :Incident(sysId)
Index for :Interaction(sysId)
Index for :Change(sysId)
Index for :KnowledgeDocument(sysId)
Index for :Resource(sysId)
Index for :ConfigurationItem(sysId)
Index for :ServiceComponent(sysId)

=== Building ENTITY NODES ===
→ Incident nodes created.
→ Incident nodes created.
→ Incident nodes created.
→ Interaction nodes created.
→ Interaction nodes created.
→ Change nodes created.
→ Change nodes created.
→ KnowledgeDocument nodes created.
→ Resource nodes created.
→ ConfigurationItem nodes created.
→ ConfigurationItem nodes created.
→ ServiceComponent nodes created.
→ ServiceComponent nodes created.


In [329]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:Interaction)                 147172
(:Incident)                    47057
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              2371470
[:CONTAINS]                    690622

--- Totals ---
Total nodes: 920970
Total relationships: 3062092


### O2O Relations

- (Incident|Interaction)-[:USED_KM]->(KnowledgeDocument)
- (Incident)-[:RELATED_CHANGE]->(Change)
- (Interaction)-[:RELATED_INCIDENT]->(Incident)
- (Incident|Interaction|Change)-[:AFFECTED_CI_SC]->(CI_SC)
- (Incident|Interaction|Change)<-[:CAUSED_BY_CI_SC]-(CI_SC)

In [330]:
o2o_relationships = {
    "USED_KM": [{
        "from_object": {
            "label": "Incident|Interaction"
        },
        "to_object": {
            "label": "KnowledgeDocument",
            "foreign_key": "kmNumber"
        }
    }],
    "RELATED_CHANGE": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "Change",
            "foreign_key": "relatedChange"
        }
    }],
    "RELATED_INCIDENT": [
        {
            "from_object": {
                "label": "Interaction"
            },
            "to_object": {
                "label": "Incident",
                "foreign_key": "relatedIncident"
            }
        },
        {
            "from_object": {
                "label": "Interaction",
                "foreign_key": "relatedInteraction"
            },
            "to_object": {
                "label": "Incident"
            },
            "constants": {
                "primary": True
            }
        }],
    "AFFECTED_CI": [{
        "from_object": {
            "label": "Incident|Interaction|Change"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameAff"
        },
    }],
    "AFFECTED_SC": [{
        "from_object": {
            "label": "Incident|Interaction|Change"
        },
        "to_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentAff"
        },
    }],
    "CAUSED_BY_CI": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameCby"
        },
    }],
    "CAUSED_BY_SC": [{
        "from_object": {
            "label": "Incident"
        },
        "to_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentCBy"
        },
    }],
    "CONTAINS": [{
        "from_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentAff"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameAff"
        },
    }, {
        "from_object": {
            "label": "ServiceComponent",
            "foreign_key": "serviceComponentCBy"
        },
        "to_object": {
            "label": "ConfigurationItem",
            "foreign_key": "ciNameCby"
        },
    }]
}

In [331]:
def build_foreign_key_index(_db_connection, _config):
    foreign_key_query_str = '''
                            CREATE INDEX $index_name IF NOT EXISTS
                                FOR (n:Record) ON (n.$foreign_key) \
                            '''

    for _type in ["from_object", "to_object"]:
        if "foreign_key" in _config[_type]:
            foreign_key = _config[_type]["foreign_key"]

            foreign_key_index_query = Query(
                query_str=foreign_key_query_str,
                parameters={
                    "index_name": f"record_{foreign_key}_index"
                },
                template_string_parameters={
                    "foreign_key": foreign_key
                }
            )

            _db_connection.exec_query(foreign_key_index_query)
            print(f"Index ensured for :Record({foreign_key})")


def build_relationship(_db_connection, _type, _config):
    o2o_query_str = '''
        :auto
         MATCH (from:$from_object) - [:EXTRACTED_FROM] -> (r:Record) <- [:EXTRACTED_FROM] - (to:$to_object)
         WHERE $condition
         CALL (from, to, r) {
            MERGE (from) - [rel:$type] -> (to)
            $attr_updates
            $constants_updates
        } IN TRANSACTIONS
    '''

    attr_updates = ""
    if "attributes" in _config:
        attr_updates = "SET "
        attr_updates += ", ".join(
            [f"rel.{key} = COALESCE(rel.{key}, r.{attr})" for key, attr in _config["attributes"].items()])
    constants_updates = ""
    if "constants" in _config:
        constants_updates += "SET "
        constants_updates += ", ".join(
            [f"rel.{key} = COALESCE(rel.{key}, {attr})" for key, attr in _config["constants"].items()])

    from_object = _config["from_object"]
    to_object = _config["to_object"]

    from_foreign_key = ""
    to_foreign_key = ""
    conditions = []

    if "foreign_key" in from_object:
        from_foreign_key = from_object["foreign_key"]
        conditions.append("r[$from_foreign_key] IS NOT NULL AND from.sysId = r[$from_foreign_key]")
    if "foreign_key" in to_object:
        to_foreign_key = to_object["foreign_key"]
        conditions.append("r[$to_foreign_key] IS NOT NULL AND to.sysId = r[$to_foreign_key]")

    o2o_query = Query(
        query_str=o2o_query_str,
        parameters={
            "from_foreign_key": from_foreign_key,
            "to_foreign_key": to_foreign_key
        },

        template_string_parameters={
            "condition": " AND ".join(conditions),
            "from_object": from_object["label"],
            "to_object": to_object["label"],
            "type": _type,
            "attr_updates": attr_updates,
            "constants_updates": constants_updates
        }
    )

    db_connection.exec_query(o2o_query)
    print(f"→ (:{_config['from_object']}) - [:{_type}] -> (:{_config['to_object']}) Relationship built")


def build_relationships(_db_connection, _relationships):
    print("\n=== INDEXES ===")
    for _type, _configs in _relationships.items():
        for _config in _configs:
            build_foreign_key_index(_db_connection=_db_connection,
                                    _config=_config)

    print("\n=== O2O RELATIONSHIPS ===")
    for _type, _configs in _relationships.items():
        for _config in _configs:
            build_relationship(_db_connection=_db_connection,
                               _type=_type,
                               _config=_config)

In [332]:
build_relationships(_db_connection=db_connection,
                    _relationships=o2o_relationships)


=== INDEXES ===
Index ensured for :Record(kmNumber)
Index ensured for :Record(relatedChange)
Index ensured for :Record(relatedIncident)
Index ensured for :Record(relatedInteraction)
Index ensured for :Record(ciNameAff)
Index ensured for :Record(serviceComponentAff)
Index ensured for :Record(ciNameCby)
Index ensured for :Record(serviceComponentCBy)
Index ensured for :Record(serviceComponentAff)
Index ensured for :Record(ciNameAff)
Index ensured for :Record(serviceComponentCBy)
Index ensured for :Record(ciNameCby)

=== O2O RELATIONSHIPS ===
→ (:{'label': 'Incident|Interaction'}) - [:USED_KM] -> (:{'label': 'KnowledgeDocument', 'foreign_key': 'kmNumber'}) Relationship built
→ (:{'label': 'Incident'}) - [:RELATED_CHANGE] -> (:{'label': 'Change', 'foreign_key': 'relatedChange'}) Relationship built
→ (:{'label': 'Interaction'}) - [:RELATED_INCIDENT] -> (:{'label': 'Incident', 'foreign_key': 'relatedIncident'}) Relationship built
→ (:{'label': 'Interaction', 'foreign_key': 'relatedInteractio

In [333]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:Interaction)                 147172
(:Incident)                    47057
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              2371470
[:CONTAINS]                    705949
[:AFFECTED_CI]                 226751
[:AFFECTED_SC]                 214729
[:USED_KM]                     194437
[:RELATED_INCIDENT]            52687
[:CAUSED_BY_CI]                45499
[:CAUSED_BY_SC]                43123
[:RELATED_CHANGE]              536

--- Totals ---
Total nodes: 920970
Total relationships: 3855181


## Build Events

There are four types of events Incident Events, Incident Activity Events, Change Events and Interaction Events

In [334]:
EVENTS = {
    "IncidentEvent": [
        {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "id_addition": "_Open",
            "attributes": {
                "timestamp": "openTime"
            },
            "constants": {
                "activity": "'Open'"
            }
        }, {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "id_addition": "_Resolve",
            "attributes": {
                "timestamp": "resolvedTime"
            },
            "constants": {
                "activity": "'Resolve'"
            }
        }, {
            "log": bpic14_incident,
            "sysId": "incidentId",
            "id_addition": "_Close",
            "attributes": {
                "timestamp": "closeTime"
            },
            "constants": {
                "activity": "'Close'"
            }
        }
    ],
    "ChangeEvent": [
        {
            "log": bpic14_change,
            "sysId": "changeId",
            "id_addition": "_Start",
            "attributes": {
                "timestamp": "actualStart"
            },
            "constants": {
                "activity": "'Start'"
            }
        }, {
            "log": bpic14_change,
            "sysId": "changeId",
            "id_addition": "_End",
            "attributes": {
                "timestamp": "actualEnd"
            },
            "constants": {
                "activity": "'End'"
            }
        }
    ],
    "InteractionEvent": [
        {
            "log": bpic14_interaction,
            "sysId": "interactionId",
            "id_addition": "_Open",
            "attributes": {
                "timestamp": "openTime"
            },
            "constants": {
                "activity": "'Open'"
            }
        }, {
            "log": bpic14_interaction,
            "sysId": "interactionId",
            "id_addition": "_Close",
            "attributes": {
                "timestamp": "closeTime"
            },
            "constants": {
                "activity": "'Close'"
            }
        }],
    "IncidentActivityEvent": [
        {
            "log": bpic14_incident_activity,
            "sysId": "activityNumber",
            "attributes": {
                "activity": "incidentActivityType",
                "timestamp": "dateStamp"
            }
        }
    ],
}


In [335]:
build_entities(db_connection, entities=EVENTS)


=== INDEXES ===
Index for :IncidentEvent(sysId)
Index for :ChangeEvent(sysId)
Index for :InteractionEvent(sysId)
Index for :IncidentActivityEvent(sysId)

=== Building ENTITY NODES ===
→ IncidentEvent nodes created.
→ IncidentEvent nodes created.
→ IncidentEvent nodes created.
→ ChangeEvent nodes created.
→ ChangeEvent nodes created.
→ InteractionEvent nodes created.
→ InteractionEvent nodes created.
→ IncidentActivityEvent nodes created.


In [336]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:IncidentActivityEvent)       466737
(:InteractionEvent)            294008
(:Interaction)                 147172
(:IncidentEvent)               138038
(:Incident)                    47057
(:ChangeEvent)                 33381
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              3324284
[:CONTAINS]                    705949
[:AFFECTED_CI]                 226751
[:AFFECTED_SC]                 214729
[:USED_KM]                     194437
[:RELATED_INCIDENT]            52687
[:CAUSED_BY_CI]                45499
[:CAUSED_BY_SC]                43123
[:RELATED_CHANGE]              536

--- Totals ---
Total nodes: 1853134
Total relationships: 4807995


## E2O Relationships

In [337]:
e2o_relationships = {
    "CORR": [
        {
            "from_object": {
                "label": "IncidentEvent"
            },
            "to_object": {
                "label": "Incident",
                "foreign_key": "incidentId"
            }
        },
        {
            "from_object": {
                "label": "ChangeEvent"
            },
            "to_object": {
                "label": "Change",
                "foreign_key": "changeId"
            }
        },
        {
            "from_object": {
                "label": "InteractionEvent"
            },
            "to_object": {
                "label": "Interaction",
                "foreign_key": "interactionId"
            }
        },
        {
            "from_object": {
                "label": "IncidentActivityEvent"
            },
            "to_object": {
                "label": "Incident",
                "foreign_key": "incidentId"
            }
        }
    ],
    "EXECUTED_BY": [
        {
            "from_object": {
                "label": "IncidentActivityEvent"
            },
            "to_object": {
                "label": "Resource",
                "foreign_key": "assignmentGroup"
            }
        }
    ]

}

In [338]:
build_relationships(db_connection, _relationships=e2o_relationships)


=== INDEXES ===
Index ensured for :Record(incidentId)
Index ensured for :Record(changeId)
Index ensured for :Record(interactionId)
Index ensured for :Record(incidentId)
Index ensured for :Record(assignmentGroup)

=== O2O RELATIONSHIPS ===
→ (:{'label': 'IncidentEvent'}) - [:CORR] -> (:{'label': 'Incident', 'foreign_key': 'incidentId'}) Relationship built
→ (:{'label': 'ChangeEvent'}) - [:CORR] -> (:{'label': 'Change', 'foreign_key': 'changeId'}) Relationship built
→ (:{'label': 'InteractionEvent'}) - [:CORR] -> (:{'label': 'Interaction', 'foreign_key': 'interactionId'}) Relationship built
→ (:{'label': 'IncidentActivityEvent'}) - [:CORR] -> (:{'label': 'Incident', 'foreign_key': 'incidentId'}) Relationship built
→ (:{'label': 'IncidentActivityEvent'}) - [:EXECUTED_BY] -> (:{'label': 'Resource', 'foreign_key': 'assignmentGroup'}) Relationship built


In [339]:
get_graph_statistics(db_connection)


=== GRAPH STATISTICS ===

--- Node counts ---
(:Record)                      690622
(:IncidentActivityEvent)       466737
(:InteractionEvent)            294008
(:Interaction)                 147172
(:IncidentEvent)               138038
(:Incident)                    47057
(:ChangeEvent)                 33381
(:Change)                      18026
(:ConfigurationItem)           15134
(:KnowledgeDocument)           2373
(:ServiceComponent)            340
(:Resource)                    242
(:Log)                         4

--- Relationship counts ---
[:EXTRACTED_FROM]              3324284
[:CORR]                        932164
[:CONTAINS]                    705949
[:EXECUTED_BY]                 466737
[:AFFECTED_CI]                 226751
[:AFFECTED_SC]                 214729
[:USED_KM]                     194437
[:RELATED_INCIDENT]            52687
[:CAUSED_BY_CI]                45499
[:CAUSED_BY_SC]                43123
[:RELATED_CHANGE]              536

--- Totals ---
Total nodes: 18531

# 2. ASSIGN TYPES

In [340]:
def add_object_type_node(object_type):
    query_create_ot = '''
        MERGE (ot:ObjectType {objectType: $objectType})
    '''

    db_connection.exec_query(
        Query(query_str=query_create_ot,
              parameters={'objectType': object_type}
              )
    )

    query_str = '''
        :auto
        MATCH (ot:ObjectType {objectType: $objectType })
        MATCH (o:$label)
        CALL (o, ot) {
            MERGE (o) - [:IS_OF_TYPE] -> (ot)
            } IN TRANSACTIONS
    '''

    query = Query(
        query_str=query_str,
        parameters={'objectType': object_type},
        template_string_parameters={"label": object_type}
    )

    db_connection.exec_query(query)
    print(f'-> (:ObjectType {{objectType: "{object_type}"}}) created.')

In [341]:
for label in objects.keys():
    add_object_type_node(object_type=label)

-> (:ObjectType {objectType: "Incident"}) created.
-> (:ObjectType {objectType: "Interaction"}) created.
-> (:ObjectType {objectType: "Change"}) created.
-> (:ObjectType {objectType: "KnowledgeDocument"}) created.
-> (:ObjectType {objectType: "Resource"}) created.
-> (:ObjectType {objectType: "ConfigurationItem"}) created.
-> (:ObjectType {objectType: "ServiceComponent"}) created.


In [342]:
def add_event_type_node(event_type):
    query_create_et = '''
        MERGE (et:EventType {eventType: $eventType})
    '''

    db_connection.exec_query(
        Query(query_str=query_create_et,
              parameters={'eventType': event_type}
              )
    )

    query_str = '''
        :auto
        MATCH (et:EventType {eventType: $eventType })
        MATCH (e:$label)
        CALL (e, et) {
            MERGE (e) - [:IS_OF_TYPE] -> (et)
        }
        IN TRANSACTIONS
    '''

    query = Query(
        query_str=query_str,
        parameters={'eventType': event_type},
        template_string_parameters={"label": event_type}
    )

    db_connection.exec_query(query)
    print(f'-> (:EventType {{eventType: "{event_type}"}}) created.')

In [343]:
for label in EVENTS.keys():
    add_event_type_node(event_type=label)

-> (:EventType {eventType: "IncidentEvent"}) created.
-> (:EventType {eventType: "ChangeEvent"}) created.
-> (:EventType {eventType: "InteractionEvent"}) created.
-> (:EventType {eventType: "IncidentActivityEvent"}) created.


# 3) Enrichment

## 3.1 Materialize CI_SC Objects

In [344]:
def materialize_object(_db_connection, _label, _config):
    from_object = _config["from_object"]
    to_object = _config["to_object"]
    set_attributes = []

    for _object in [from_object, to_object]:
        if "attributes" in _object:
            set_attributes.extend(
                [f"new.{key} = COALESCE(new.{key}, from.{attr})" for key, attr in _object["attributes"].items()])

    materialize_relationship_query = '''
        :auto
        MATCH (from) - [ :IS_OF_TYPE] -> (:ObjectType {objectType: $from_object})
        MATCH (to) - [ :IS_OF_TYPE] -> (:ObjectType {objectType: $to_object})
        MATCH (from) - [r WHERE type(r) = $relation_type] -> (to)
        CALL (from, r, to) {
            MERGE (new:$materialized_object {sysId: from.sysId + '_' + to.sysId})
            MERGE (from) <- [:RELATED] - (new) - [:RELATED] -> (to)
            SET new[$from_object] = from.sysId,
                new[$to_object] = to.sysId
            $set_attributes
        } IN TRANSACTIONS
    '''

    materialize_query = Query(
        query_str=materialize_relationship_query,
        parameters={
            "from_object": from_object["label"],
            "to_object": to_object["label"],
            "relation_type": _config["relation_type"]
        },
        template_string_parameters={
            "materialized_object": _label,
            "set_attributes": "SET " + ", ".join(set_attributes)
        }
    )

    _db_connection.exec_query(materialize_query)
    print(f"→ {_label} nodes created.")


In [345]:
def materialize_objects(_db_connection, _objects):
    """
    Create entities. Includes indexing.
    """
    print("\n=== INDEXES ===")
    for _label in objects_to_materialize.keys():
        try:
            create_index(_db_connection=_db_connection,
                         _label=_label)
        except Exception as e:
            print(f"Failed to create index for {_label}: {e}")

    print("\n=== Materializing Relationships ===")
    for _label, _configs in _objects.items():
        for _config in _configs:
            try:
                materialize_object(
                    _db_connection=_db_connection,
                    _label=_label,
                    _config=_config)
            except Exception as e:
                print(f"Failed for {_label}: {e}")

In [346]:
objects_to_materialize = {
    "CI_SC": [{
        "from_object": {
            "label": "ServiceComponent",
            "attributes": {
                "ciType": "ciType",
                "ciSubtype": "ciSubtype"
            }
        },
        "to_object": {
            "label": "ConfigurationItem"
        },
        "relation_type": "CONTAINS"
    }]
}

In [347]:
materialize_objects(_db_connection=db_connection,
                    _objects=objects_to_materialize)


=== INDEXES ===
Index for :CI_SC(sysId)

=== Materializing Relationships ===
→ CI_SC nodes created.


In [348]:
for _object in materialized_objects.keys():
    add_object_type_node(_object)

-> (:ObjectType {objectType: "CI_SC"}) created.


## 3.2 Extend Relationships to Materialized CI_SC Objects

In [349]:
relationships_to_extend = {
    "AFFECTED_CI_SC": [{
        "from_object": {
            "label": "Incident|Interaction|Change",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "AFFECTED_SC"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "AFFECTED_CI"
                }]
        },
        "to_object": {
            "label": "CI_SC",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "RELATED"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "RELATED"
                }]
        }
    }],
    "CAUSED_BY_CI_SC": [{
        "from_object": {
            "label": "Incident",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "CAUSED_BY_SC"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "CAUSED_BY_CI"
                }]
        },
        "to_object": {
            "label": "CI_SC",
            "relationships": [
                {
                    "related_label": "ServiceComponent",
                    "related_object": "sc",
                    "relation_type": "RELATED"
                },
                {
                    "related_label": "ConfigurationItem",
                    "related_object": "ci",
                    "relation_type": "RELATED"
                }]
        }
    }],
    "CORR": [
        {
            "from_object": {
                "label": "ChangeEvent",
                "relationships": [
                    {
                        "related_label": "Change",
                        "related_object": "change",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Change",
                        "related_object": "change",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        },
        {
            "from_object": {
                "label": "IncidentEvent",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        },
        {
            "from_object": {
                "label": "IncidentActivityEvent",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Incident",
                        "related_object": "incident",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        },
        {
            "from_object": {
                "label": "InteractionEvent",
                "relationships": [
                    {
                        "related_label": "Interaction",
                        "related_object": "interaction",
                        "relation_type": "CORR"
                    }]
            },
            "to_object": {
                "label": "CI_SC",
                "relationships": [
                    {
                        "related_label": "Interaction",
                        "related_object": "interaction",
                        "relation_type": "AFFECTED_CI_SC"
                    }]
            }
        }
    ],
}

In [350]:
def extend_relationship(_db_connection, _type, _config):
    from_object = _config["from_object"]
    to_object = _config["to_object"]

    query_str = '''
        :auto
        MATCH (from:$from_object)
        MATCH (to:$to_object)
        $relation_conditions
        WITH distinct from, to
        CALL (from, to) {
            MERGE (from) - [r:$type] -> (to)
        } IN TRANSACTIONS
    '''

    relation_conditions = []
    for _object_type, _object in {"from": from_object, "to": to_object}.items():
        if "relationships" in _object:
            for relationship in _object['relationships']:
                rel_type = relationship["relation_type"]
                related_object = relationship["related_object"]
                related_label = relationship["related_label"]
                relation_conditions.append(
                    f"MATCH ({_object_type}) - [:{rel_type}] - ({related_object}:{related_label})")

    query = Query(
        query_str=query_str,
        template_string_parameters={
            "from_object": from_object["label"],
            "to_object": to_object["label"],
            "type": _type,
            "relation_conditions": "\n".join(relation_conditions)
        }
    )

    _db_connection.exec_query(query)
    print(f"→ (:{from_object["label"]}) - [:{_type}] -> (:{to_object["label"]}) Relationship built")

def extend_relationships(_db_connection, _relationships):
    for _type, _configs in _relationships.items():
        for _config in _configs:
            try:
                extend_relationship(_db_connection, _type, _config)
            except Exception as e:
                print(f"Failed for {_type}: {e}")


In [351]:
extend_relationships(db_connection, relationships_to_extend)

→ (:Incident|Interaction|Change) - [:AFFECTED_CI_SC] -> (:CI_SC) Relationship built
→ (:Incident) - [:CAUSED_BY_CI_SC] -> (:CI_SC) Relationship built
→ (:ChangeEvent) - [:CORR] -> (:CI_SC) Relationship built
→ (:IncidentEvent) - [:CORR] -> (:CI_SC) Relationship built
→ (:IncidentActivityEvent) - [:CORR] -> (:CI_SC) Relationship built
→ (:InteractionEvent) - [:CORR] -> (:CI_SC) Relationship built


## 3.3 Check Incomplete Traces at Start And End of Period

# Filter out all events of objects that have at least one event before cutoff
Cutoff = 2013-08-19T09:59:53.000000000+01:00

In [None]:
results = []
labels = ['Incident', 'Interaction', 'Change']

count_query = '''
    MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - [] - (e:Event)
    WHERE ot.objectType in $labels
    WITH e, ot, o, e.timestamp < dateTime("2013-08-19T09:59:53.000000000+01:00") as before_cutoff
    WITH ot, o, collect(distinct before_cutoff) as before_cutoffs
    WHERE True in before_cutoffs
    MATCH (o) - [] - (all_e:Event)
    RETURN ot.objectType as _label, count(distinct o) as object_deleted, count(distinct all_e) as events_deleted
'''
query = Query(
    query_str=count_query,
    parameters={'labels': labels}
)
result = pd.DataFrame(db_connection.exec_query(query))
results.append(result)

delete_query_str = '''
    MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - [] - (e:Event)
    WHERE ot.objectType in $labels
    WITH e, o, e.timestamp < dateTime("2013-08-19T09:59:53.000000000+01:00") as before_cutoff
    WITH o, collect(distinct before_cutoff) as before_cutoffs
    WHERE True in before_cutoffs
    MATCH (o) - [] - (all_e:Event)
    DETACH DELETE o
    DETACH DELETE all_e
'''

delete_query = Query(
    query_str=delete_query_str,
    parameters={'labels': labels}
)
db_connection.exec_query(delete_query)

df_result = pd.concat(results)
df_result

In [None]:

query = '''
    MATCH (ot:ObjectType) <- [:IS_OF_TYPE] - (o) - [] - (e:Event)
    WHERE ot.objectType in $labels
    RETURN ot.objectType as _label, count(distinct o) as objects_kept, count(distinct e) as events_kept
'''
query = Query(
    query_str=query,
    parameters={'labels': labels}
)
df_result_kept = pd.DataFrame(db_connection.exec_query(query))
print(df_result_kept)

In [None]:
df_both = df_result.set_index('_label').join(df_result_kept.set_index('_label'))
df_both['total_objects'] = df_both['objects_kept'] + df_both['object_deleted']
df_both['total_events'] = df_both['events_kept'] + df_both['events_deleted']
df_both['% events deleted'] = round(df_both['events_deleted'] / df_both['total_events'] * 100, 2)
df_both['% objects deleted'] = round(df_both['object_deleted'] / df_both['total_objects'] * 100, 2)
df_both = df_both[
    ['object_deleted', 'objects_kept', 'total_objects', '% objects deleted', 'events_deleted', 'events_kept',
     'total_events', '% events deleted']]
print(df_both)

## Delete non-referred CI_SC

In [None]:
query = '''MATCH (ci_sc:CI_SC)
WHERE NOT EXISTS((ci_sc) <- [] - (:Event)) AND NOT EXISTS ((ci_sc) -- (:Incident|Change|Interaction))
DETACH DELETE ci_sc
RETURN count(ci_sc) as cnt'''

pd.DataFrame(db_connection.exec_query(query))

### DF edges

Each DF has attributes:
- object (which object it correlates Incident, Interactions, Change, etc.)
- object identifier (sysid) (which object it belongs incidentId, interactionsId, changeId, etc.)
- dftype is an attribute that only exists for events of Incident objects that specifyes whether the DF relation connects IncidentActivityEvent or IncidentEvent Event nodes

In [None]:
def build_df_edges(db_connection):
    """
    Build :DF:* edges for all objects except KnowledgeDocument.
    Creates separate DF edges for each object type and incident event type.
    """

    all_objects = {**objects, **RESOURCES, **COMPLEX_OBJECTS}
    excluded_objects = {"KnowledgeDocument", "ConfigurationItem",
                        "ServiceComponent"}  # add here objects for which DF are not needed
    for key in excluded_objects:
        all_objects.pop(key, None)

    with db_connection.driver.get_session(database=db_connection.db_name) as session:
        print("=== BUILDING DIRECTLY_FOLLOWS (DF) EDGES (excluding Incident) ===")

        for obj_label, obj_data in all_objects.items():
            sys_id_attr = obj_data.get("sysId")

            query = f"""
            CALL apoc.periodic.iterate(
                '
                MATCH (e:Event)-[]-> (o) - [:IS_OF_TYPE] -> (ot:ObjectType {{objectType:"{obj_label}" }})
                WHERE e.timestamp IS NOT NULL
                WITH o, ot.objectType as oType, e ORDER BY e.timestamp, ID(e)
                WITH o, oType, collect(e) AS events
                UNWIND range(0, size(events)-2) AS idx
                WITH events[idx] AS fromEv, events[idx+1] AS toEv, o.{sys_id_attr} AS objSysId, oType
                RETURN fromEv, toEv, objSysId, oType
                ',
                '
                WITH fromEv, toEv, objSysId, oType
                MERGE (fromEv)-[rel:DF {{objectType:oType, id:objSysId}}]->(toEv)
                ',
                {{batchSize:1000, parallel:false}}
            ) YIELD total
            RETURN total
            """

            res = session.run(query).data()
            print(f"{obj_label} DF creation result:", res)


In [None]:
build_df_edges(db_connection)

### Statistics

In [None]:
get_graph_statistics(db_connection)

In [None]:



for dictionary in [objects, RESOURCES]:
    for label in dictionary.keys():
        add_object_type_node(object_type=label)
        print(f'-> (:ObjectType {{objectType: "{label}"}} created.')