In [1]:
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import numpy as np
import pandas as pd

pd.set_option('display.width', 2000)


In [2]:
case_study = 'bpic14'
load = False


In [3]:
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [4]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

In [5]:
if load:
    # Import the original modeled data
    %run./ 0_analysis_and_model.ipynb

# Check Objects without Any Events

In [6]:
query = '''
    MATCH (i) - [:IS_OF_TYPE] -> (ot:ObjectType)
    WHERE not 'KnowledgeDocument' in labels(i)
    RETURN EXISTS((i) <- [] - (:Event)) as has_events, labels(i)[0] as _label, count(i) as cnt order by _label, has_events
'''

df_result = pd.DataFrame(db_connection.exec_query(query))

table = pd.pivot_table(df_result, index=['_label', 'has_events'], aggfunc="sum")
table['%'] = (round(table.cnt / table.groupby(level=0).cnt.transform("sum") * 100, 2)).astype(str) + '%'
table

Unnamed: 0_level_0,Unnamed: 1_level_0,cnt,%
_label,has_events,Unnamed: 2_level_1,Unnamed: 3_level_1
CI_SC,False,1353,8.81%
CI_SC,True,14003,91.19%
Change,True,17318,100.0%
ConfigurationItem,False,1318,8.71%
ConfigurationItem,True,13816,91.29%
Incident,True,46368,100.0%
Interaction,True,146553,100.0%
Resource,False,2,0.83%
Resource,True,240,99.17%
ServiceComponent,False,5,1.47%


The found numbers for 'Incident', 'Interaction', 'Change' correspond to the objects that have events deleted before the cutoff.

After deleting these objects, we might now have CI_SC that have no other object referring to them.

In [13]:
query = '''MATCH (ci_sc:CI_SC)
RETURN EXISTS((ci_sc) <- [] - (:Event)) as has_events, EXISTS ((ci_sc) -- (:Incident|Change|Interaction)) as is_refered_to, count(ci_sc) as cnt'''

pd.DataFrame(db_connection.exec_query(query))

Unnamed: 0,has_events,is_refered_to,cnt
0,True,True,14003
1,False,False,323
2,False,True,1030


## FINDING
We can also delete the 323 CI_SC that are now not being referred to anymore.
