In [1]:
import matplotlib.pyplot as plt

from pathlib import Path

from promg.modules.db_management import DBManagement
from tabulate import tabulate
import yaml

from promg import Configuration, DatabaseConnection, Performance, SemanticHeader, DatasetDescriptions, OcedPg, Query

import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option('display.width', 2000)
%matplotlib inline


In [2]:
case_study = 'bpic14'
load = False
infer_high_level_events = False


In [3]:
conf_path = Path(case_study, 'config.yaml')
config = yaml.safe_load(open(conf_path))

print(f"These are the credentials that I expect to be set for the database.")
print(f"db_name: {config['db_name']}")
print(f"uri: {config['uri']}")
print(f"password: {config['password']}")
print("----------------------")
print(f"If you have other credentials, please change them at: {conf_path}")

These are the credentials that I expect to be set for the database.
db_name: neo4j
uri: bolt://localhost:7687
password: bpic2014
----------------------
If you have other credentials, please change them at: bpic14\config.yaml


In [4]:
config = Configuration.init_conf_with_config_file(conf_path)
db_connection = DatabaseConnection.set_up_connection(config=config)

In [5]:
if load:
    # Import the original modeled data
    %run./ 0_analysis_and_model.ipynb

In [6]:
if infer_high_level_events:
    # Import the original modeled data
    %run./ 0_3_infer_high_level_events.ipynb

In [85]:
# get the bag variants on the high_level
q_bag_variants = '''
MATCH (ci_sc:CI_SC) <- [:CORR] - (e:HighLevelEvent)
MATCH (ci_sc) - [:RELATED_CI] -> (ci:ConfigurationItem)
WITH ci_sc, ci, e.activity AS activity, count(e) as event_count ORDER BY activity, event_count
WITH ci_sc, ci, activity,
CASE
WHEN event_count = 1 THEN "1"
WHEN event_count = 2 THEN "2"
WHEN event_count <= 10 THEN "3-10"
WHEN event_count <= 20 THEN "11-20"
WHEN event_count <= 100 THEN "21-100"
WHEN event_count <= 1000 THEN "101-1000"
ELSE ">1001" END AS event_count
WITH ci_sc.sysId as sysId, ci.ciType as type, ci.ciSubtype as subtype, collect(distinct activity) as bag_variant, collect(distinct activity + " (" + event_count + ")") as set_variant
RETURN  rtrim(reduce(str = "", act in bag_variant | str + act + ", "),", ") as bag_variant,
        rtrim(reduce(str = "", act in set_variant | str + act + ", "),", ") as set_variant,
        count(distinct sysId) as num_ci_sc
'''

result = pd.DataFrame(db_connection.exec_query(q_bag_variants))

In [86]:
table = pd.pivot_table(result, index=['bag_variant', 'set_variant'], aggfunc="sum", sort=False)
table['%_bag_variant'] = (
    round(
        table.groupby(['bag_variant']).num_ci_sc.transform("sum") /
        sum(table['num_ci_sc']) * 100,
        2
    )
)
table.reset_index(inplace=True)
# table.set_index(['bag_variant', '%_bag_variant'], inplace=True)
table['%_set_variant'] = (round(table.num_ci_sc / table.groupby(['bag_variant', '%_bag_variant']).num_ci_sc.transform("sum") * 100, 2))
bag_variants = pd.pivot_table(table, index=['bag_variant', '%_bag_variant'], aggfunc={'num_ci_sc': "sum"})
bag_variants

Unnamed: 0_level_0,Unnamed: 1_level_0,num_ci_sc
bag_variant,%_bag_variant,Unnamed: 2_level_1
Change,63.61,8212
"Change, Incident",0.25,32
"Change, Incident, Interaction",4.11,531
"Change, Interaction",0.55,71
Incident,3.66,472
"Incident, Interaction",15.79,2038
Interaction,12.04,1554


In [90]:
set_variants = pd.pivot_table(table, index=['bag_variant', '%_bag_variant', 'set_variant'])

In [91]:
# get the bag variants on the high_level
q_bag_variants = '''
MATCH (ci_sc:CI_SC) <- [:CORR] - (e:HighLevelEvent)
MATCH (ci_sc) - [:RELATED_CI] -> (ci:ConfigurationItem)
WITH ci_sc, ci, e.activity AS activity, count(e) as event_count ORDER BY activity DESC
WITH ci_sc.sysId as sysId, ci.ciType as type, ci.ciSubtype as subtype, collect(distinct activity) as bag_variant, collect(distinct activity + " (" + event_count + ")") as set_variant
RETURN  type,
        subtype,
        rtrim(reduce(str = "", act in bag_variant | str + act + ", "),", ") as bag_variant,
        rtrim(reduce(str = "", act in set_variant | str + act + ", "),", ") as set_variant,
        count(distinct sysId) as num_ci_sc
'''

result = pd.DataFrame(db_connection.exec_query(q_bag_variants))

In [94]:
table = pd.pivot_table(result, index=['type', 'subtype', 'bag_variant', 'set_variant'], aggfunc="sum")

table['%_bag_variant'] = (
    round(
        table.groupby(['type', 'subtype', 'bag_variant']).num_ci_sc.transform("sum") /
        table.groupby(['type', 'subtype']).num_ci_sc.transform("sum") * 100,
        2
    ).astype(str) + '%'
)
table.reset_index(inplace=True)
# table.set_index(['bag_variant', '%_bag_variant'], inplace=True)
table['%_set_variant'] = (round(table.num_ci_sc / table.groupby(['bag_variant', '%_bag_variant']).num_ci_sc.transform("sum") * 100, 2))
bag_variants = pd.pivot_table(table, index=['type', 'subtype' ,'bag_variant', '%_bag_variant'], aggfunc={'num_ci_sc': "sum"})
bag_variants

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,num_ci_sc
type,subtype,bag_variant,%_bag_variant,Unnamed: 4_level_1
#N/B,#N/B,Interaction,100.0%,1
Phone,Number,Change,97.56%,120
Phone,Number,Incident,0.81%,1
Phone,Number,Interaction,0.81%,1
Phone,Number,"Interaction, Incident",0.81%,1
...,...,...,...,...
subapplication,Web Based Application,Incident,1.83%,2
subapplication,Web Based Application,"Incident, Change",1.83%,2
subapplication,Web Based Application,Interaction,7.34%,8
subapplication,Web Based Application,"Interaction, Incident",29.36%,32
