In [1]:
import pickle
import networkx as nx
import pandas as pd
import os

dir_path = os.getcwd()
pcg_data_path = f"{dir_path}/result/nodes_and_edges_PCG.pkl"
lotus_south_data_path = f"{dir_path}/result/nodes_and_edges_lotus_south.pkl"
# data_path_dict = {"PCG": pcg_data_path, "Lotus South": lotus_south_data_path}
data_path_dict = {"PCG": pcg_data_path}
# data_path_dict = {"Lotus South": lotus_south_data_path}


def filter_non_arrow_edges(edges):
    return [edge for edge in edges if edge["data"]["arrow_weight"] != "none"]


debug_list = []
all_data_list = []
for company, data_path in data_path_dict.items():
    G = nx.DiGraph()
    data = pickle.load(open(data_path, "rb"))

    nodes = data[0]
    edges = data[1]
    print(len(edges))
    filter_edges = filter_non_arrow_edges(edges)
    print(len(filter_edges))
    for edge in filter_edges:
        G.add_edge(
            edge["data"]["source"],
            edge["data"]["target"],
            weight=edge["data"]["raw_weight"],
        )
        if edge["data"]["source"] == "risk_PCG_40":
            debug_list.append(edge)
    in_degree_centrality_dict = nx.in_degree_centrality(G)
    out_degree_centrality_dict = nx.out_degree_centrality(G)
    betweenness_dict_weight = nx.betweenness_centrality(G, weight="weight")
    betweenness_dict_non_weight = nx.betweenness_centrality(G)

    # create list of data so I can convert to dataframe later
    for node in nodes:
        row_data = {
            "company": company,
            "risk_id": node["data"]["id"],
            "risk_name": node["data"]["label"],
            "risk_level": node["data"]["risk_level"],
            "in_degree": G.in_degree(node["data"]["id"]),
            "out_degree": G.out_degree(node["data"]["id"]),
            "in_degree_centrality": in_degree_centrality_dict.get(
                node["data"]["id"], None
            ),
            "out_degree_centrality": out_degree_centrality_dict.get(
                node["data"]["id"], None
            ),
            "betweenness_centrality_weight": betweenness_dict_weight.get(
                node["data"]["id"], None
            ),
            "betweenness_centrality_non_weight": betweenness_dict_non_weight.get(
                node["data"]["id"], None
            ),
        }
        all_data_list.append(row_data)
all_data_df = pd.DataFrame(all_data_list)
all_data_df.head()

2193
174


Unnamed: 0,company,risk_id,risk_name,risk_level,in_degree,out_degree,in_degree_centrality,out_degree_centrality,betweenness_centrality_weight,betweenness_centrality_non_weight
0,PCG,risk_PCG_0,Accounting errors,1,1,0,0.018519,0.0,0.0,0.0
1,PCG,risk_PCG_1,Business interruption from fire hazards,2,0,1,0.0,0.018519,0.0,0.0
2,PCG,risk_PCG_2,Business interruption from flood,1,1,0,0.018519,0.0,0.0,0.0
3,PCG,risk_PCG_3,Business interruption from labor dispute,1,(),(),,,,
4,PCG,risk_PCG_4,Business interruption from natural disasters,2,0,2,0.0,0.037037,0.0,0.0


In [2]:
# sort all_data_df out_degree
all_data_df = all_data_df.sort_values(by="out_degree_centrality", ascending=False)
all_data_df.head()

Unnamed: 0,company,risk_id,risk_name,risk_level,in_degree,out_degree,in_degree_centrality,out_degree_centrality,betweenness_centrality_weight,betweenness_centrality_non_weight
48,PCG,risk_PCG_48,Product-related dissatisfaction,2,6,6,0.111111,0.111111,0.072676,0.065514
40,PCG,risk_PCG_40,Poor product quality,2,2,6,0.037037,0.111111,0.031097,0.039483
46,PCG,risk_PCG_46,Product safety concern,2,2,5,0.037037,0.092593,0.025856,0.025507
36,PCG,risk_PCG_36,Operational inefficiency,2,4,4,0.074074,0.074074,0.043676,0.043676
29,PCG,risk_PCG_29,Machinery / equipment failure and damage,2,2,4,0.037037,0.074074,0.012928,0.013452


In [3]:
from collections import Counter

print(len(debug_list))
Counter(edge["data"]["target"] for edge in debug_list)
target_list = [edge["data"]["target"] for edge in debug_list]
# get label from nodes
nodes_list = [node for node in nodes if node["data"]["id"] in target_list]
nodes_list
# get label from nodes_list
label_list = [node["data"]["label"] for node in nodes_list]
label_list

11


['Non-compliance with laws or standards regarding product quality and safety',
 'Poor service quality',
 'Poor supply quality',
 'Product-related dissatisfaction',
 'Service-related dissatisfaction',
 'Unable to deliver product']

In [4]:
# 1 ['Non-compliance with laws or standards regarding product quality and safety',
# 2 'Operational inefficiency',
# 1 'Poor service quality',
# 1 'Poor supply quality',
# 2 'Product safety concern',
# 1 'Product-related dissatisfaction',
# 1 'Service-related dissatisfaction',
# 1 'Unable to deliver product']

In [5]:
node["data"]["id"]

'risk_PCG_65'

In [6]:
in_degree_centrality_dict

{'risk_PCG_9': 0.018518518518518517,
 'risk_PCG_0': 0.018518518518518517,
 'risk_PCG_21': 0.037037037037037035,
 'risk_PCG_36': 0.07407407407407407,
 'risk_PCG_37': 0.037037037037037035,
 'risk_PCG_39': 0.037037037037037035,
 'risk_PCG_41': 0.05555555555555555,
 'risk_PCG_42': 0.07407407407407407,
 'risk_PCG_48': 0.1111111111111111,
 'risk_PCG_54': 0.05555555555555555,
 'risk_PCG_59': 0.0,
 'risk_PCG_1': 0.0,
 'risk_PCG_12': 0.05555555555555555,
 'risk_PCG_4': 0.0,
 'risk_PCG_2': 0.018518518518518517,
 'risk_PCG_5': 0.0,
 'risk_PCG_23': 0.018518518518518517,
 'risk_PCG_10': 0.037037037037037035,
 'risk_PCG_43': 0.018518518518518517,
 'risk_PCG_45': 0.07407407407407407,
 'risk_PCG_58': 0.05555555555555555,
 'risk_PCG_65': 0.0,
 'risk_PCG_11': 0.037037037037037035,
 'risk_PCG_35': 0.037037037037037035,
 'risk_PCG_38': 0.05555555555555555,
 'risk_PCG_18': 0.037037037037037035,
 'risk_PCG_29': 0.037037037037037035,
 'risk_PCG_62': 0.037037037037037035,
 'risk_PCG_14': 0.0,
 'risk_PCG_15': 

In [7]:
all_data_df

Unnamed: 0,company,risk_id,risk_name,risk_level,in_degree,out_degree,in_degree_centrality,out_degree_centrality,betweenness_centrality_weight,betweenness_centrality_non_weight
48,PCG,risk_PCG_48,Product-related dissatisfaction,2,6,6,0.111111,0.111111,0.072676,0.065514
40,PCG,risk_PCG_40,Poor product quality,2,2,6,0.037037,0.111111,0.031097,0.039483
46,PCG,risk_PCG_46,Product safety concern,2,2,5,0.037037,0.092593,0.025856,0.025507
36,PCG,risk_PCG_36,Operational inefficiency,2,4,4,0.074074,0.074074,0.043676,0.043676
29,PCG,risk_PCG_29,Machinery / equipment failure and damage,2,2,4,0.037037,0.074074,0.012928,0.013452
...,...,...,...,...,...,...,...,...,...,...
31,PCG,risk_PCG_31,Negative media coverage,2,(),(),,,,
34,PCG,risk_PCG_34,Non-compliance with laws regarding migrant wor...,1,(),(),,,,
49,PCG,risk_PCG_49,Protest,2,(),(),,,,
57,PCG,risk_PCG_57,Theft/Loss of Physical Media Containing Sensit...,2,(),(),,,,


In [8]:
data

[[{'data': {'id': 'risk_PCG_0',
    'label': 'Accounting errors',
    'raw_size': np.float64(760.6634398205223),
    'size_level': 1,
    'size': 1,
    'color': 'rgb(54, 162, 235)',
    'risk_level': 1,
    'story': ''},
   'position': {'x': 455.0008644929565, 'y': 447.3915367498325}},
  {'data': {'id': 'risk_PCG_1',
    'label': 'Business interruption from fire hazards',
    'raw_size': np.float64(5949.1581742316075),
    'size_level': 2,
    'size': 50,
    'color': 'rgb(54, 162, 235)',
    'risk_level': 2,
    'story': 'A fire incident within the workplace can lead to significant business interruptions, resulting in damage to property and equipment, which may halt operations temporarily or permanently. This risk is primarily driven by negligence in handling flammable materials, inadequate fire safety training, and insufficient maintenance of fire safety equipment. Without a robust fire emergency response plan and effective communication procedures, the likelihood of such incidents 

In [9]:
tmp = all_data_df[all_data_df["company"] == "PCG"]
df_sorted = tmp.sort_values(by="betweenness_centrality_non_weight", ascending=False)
df_sorted
# df_sorted['in_degree'] = df_sorted['in_degree_centrality']*(len(df_sorted)-1)
# df_sorted['out_degree'] = df_sorted['out_degree_centrality']*(len(df_sorted)-1)
df_sorted

Unnamed: 0,company,risk_id,risk_name,risk_level,in_degree,out_degree,in_degree_centrality,out_degree_centrality,betweenness_centrality_weight,betweenness_centrality_non_weight
48,PCG,risk_PCG_48,Product-related dissatisfaction,2,6,6,0.111111,0.111111,0.072676,0.065514
58,PCG,risk_PCG_58,Unable to deliver product,2,3,2,0.055556,0.037037,0.052411,0.051188
41,PCG,risk_PCG_41,Poor service quality,1,3,3,0.055556,0.055556,0.045423,0.046122
62,PCG,risk_PCG_62,Vehicles failure and damage,2,2,2,0.037037,0.037037,0.045772,0.045248
36,PCG,risk_PCG_36,Operational inefficiency,2,4,4,0.074074,0.074074,0.043676,0.043676
...,...,...,...,...,...,...,...,...,...,...
31,PCG,risk_PCG_31,Negative media coverage,2,(),(),,,,
34,PCG,risk_PCG_34,Non-compliance with laws regarding migrant wor...,1,(),(),,,,
49,PCG,risk_PCG_49,Protest,2,(),(),,,,
57,PCG,risk_PCG_57,Theft/Loss of Physical Media Containing Sensit...,2,(),(),,,,
