In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from pyvis.network import Network
import json
import plotly.express as px
import plotly.graph_objects as go

In [2]:
with open('dict_entities.json', 'r') as f:
    db_dict = json.load(f)

In [3]:
db_dict

{'prod_017.PM152': {'cod_articolo': '017.PM152',
  'variante': 0,
  'costo': 10000.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Vetri',
  'descrizione': 'VETRO STRATIFICATO 33.1'},
 'prod_090.VST33': {'cod_articolo': '090.VST33',
  'variante': 0,
  'costo': 24.5,
  'data_riferimento': '2023-08-28',
  'categoria': 'Vetri',
  'descrizione': 'VETRO TRASPARENTE STRATIFICATO 3+3 F.G. ANTA INTELAIATA'},
 'prod_090.VST55': {'cod_articolo': '090.VST55',
  'variante': 0,
  'costo': 37.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Vetri',
  'descrizione': 'VETRO STRATIFICATO TRASPARENTE 5+5'},
 'prod_045.OB.P1T': {'cod_articolo': '045.OB.P1T',
  'variante': 0,
  'costo': 75.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Visive',
  'descrizione': "VETROCAMERA OBLO' PER PORTA 400X700 R100 TRASPARENTE S45"},
 'prod_052.MP.VT.POR.TR.45.04': {'cod_articolo': '052.MP.VT.POR.TR.45.04',
  'variante': 0,
  'costo': 112.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Visi

In [4]:
graph_db = nx.DiGraph()
for key in db_dict.keys():
    key_start = key.split('_')[0]

    if key_start == 'documento':
        cod_fornitore = db_dict[key]['cod_fornitore']
        azienda = db_dict[key]['azienda']
        importo = db_dict[key]['importo']
        id_documento = db_dict[key]['id_documento']
        fornitore = "fornitore_"+ str(cod_fornitore)
        if not graph_db.has_node(fornitore):
            graph_db.add_node(fornitore, type='fornitore')
        if not graph_db.has_node(azienda):
            graph_db.add_node(azienda, type='azienda')
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            graph_db.add_edge(fornitore, azienda, type='materia', label= codice_articolo, name= codice_articolo, weight= 1)
        graph_db.add_edge(azienda, fornitore, type='money_neg', label= str(importo), name= str(importo), weight= importo)
        graph_db.add_edge(fornitore, azienda, type='document', label= id_documento, name= id_documento, weight= 1)

    elif key_start == 'preventivo':
        documento = db_dict[key]['id_documento']
        cliente = db_dict[key]['id_cliente']
        azienda = db_dict[key]['id_azienda']
        status = db_dict[key]['status']
        if not graph_db.has_node(cliente):
            graph_db.add_node(cliente, type='cliente')
        if not graph_db.has_node(azienda):
            graph_db.add_node(azienda, type='azienda')
        graph_db.add_edge(azienda, cliente, type='document', label= documento + "(" + status + ")", name= documento + "(" + status + ")", weight= 1)

    elif key_start == 'fattura':
        documento = db_dict[key]['id_documento']
        cliente = db_dict[key]['id_cliente']
        azienda = db_dict[key]['id_azienda']
        importo = db_dict[key]['importo']
        if not graph_db.has_node(cliente):
            graph_db.add_node(cliente, type='cliente')
        if not graph_db.has_node(azienda):
            graph_db.add_node(azienda, type='azienda')
        graph_db.add_edge(azienda, cliente, type='document', label= id_documento, name= id_documento, weight= 1)
        graph_db.add_edge(cliente, azienda, type='money_pos', label= str(importo), name= str(importo), weight= importo)
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            graph_db.add_edge(azienda, cliente, type='materia', label= codice_articolo, name= codice_articolo, weight= 1)


In [5]:
graph_db_money = graph_db.copy()
for edge in graph_db.edges(data=True):
    if edge[2]['type'] not in ['money_pos','money_neg']:
        graph_db_money.remove_edge(edge[0], edge[1])
        
graph_db_materia = graph_db.copy()
for edge in graph_db.edges(data=True):
    if edge[2]['type'] != 'materia':
        graph_db_materia.remove_edge(edge[0], edge[1])

graph_db_document = graph_db.copy()
for edge in graph_db.edges(data=True):
    if edge[2]['type'] != 'document':
        graph_db_document.remove_edge(edge[0], edge[1])

In [6]:
graph_db_product = nx.DiGraph()
for key in db_dict.keys():
    key_start = key.split('_')[0]

    if key_start == 'documento':
        cod_fornitore = db_dict[key]['cod_fornitore']
        fornitore = "fornitore_"+ str(cod_fornitore)
        importo = db_dict[key]['importo']
        if not graph_db_product.has_node(fornitore):
            graph_db_product.add_node(fornitore, type='fornitore')
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            articolo = "prod_" + codice_articolo
            if not graph_db_product.has_node(articolo):
                graph_db_product.add_node(articolo, type='materia', label= articolo, name= articolo, weight= 1)
        graph_db_product.add_edge(articolo, fornitore, type='money_neg', label= importo, name= importo, weight= importo)

    elif key_start == 'fattura':
        cliente = db_dict[key]['id_cliente']
        importo = db_dict[key]['importo']
        if not graph_db_product.has_node(cliente):
            graph_db_product.add_node(cliente, type='cliente')
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            articolo = "prod_" + codice_articolo
            if not graph_db_product.has_node(articolo):
                graph_db_product.add_node(articolo, type='materia', label= codice_articolo, name= codice_articolo, weight= 1)
            graph_db_product.add_edge(cliente, articolo, type='money_pos', label= importo, name= importo, weight= importo)

In [7]:
graph_dict = {'all': graph_db, 'money': graph_db_money, 'materia': graph_db_materia, 'document': graph_db_document, 'prod': graph_db_product}

In [8]:
def create_graph(graph, name):
    # create pyvis network
    nt = Network(height="1024px", width="100%",
                 notebook=True, directed=True, 
                 neighborhood_highlight=True, select_menu=False, 
                 filter_menu=True, cdn_resources='in_line', bgcolor="#f4f4f4")
    #nt.from_nx(mrg)
    node_colors = {'fornitore': '#e0e098', 'azienda': '#76cc76', 'cliente': '#9d6ec9', 'materia': '#ff8c8c'}
    edge_colors = {'materia': 'black', 'money_neg': 'red', 'money_pos': 'green', 'document': 'blue'}
    for node in graph.nodes(data=True):
        node_name = node[0]
        node_type = node[1]['type']
        nt.add_node(node_name, title=node, color=node_colors[node_type])
    for edge in graph.edges(data=True):
        val = edge[2]['weight']
        edge_type = edge[2]['type']
        val = round(val, 2)
        label = edge[2]['label']
        try :
            label = float(label)
        except:
            label = edge[2]['label']
        if isinstance(label, float):
            label = round(label, 0)
        nt.add_edge(edge[0], edge[1], 
                    value=val, 
                    weight=val,
                    title=str(val),
                    label=str(label),
                    color=edge_colors[edge_type],
        )
    nt.force_atlas_2based(gravity=-50, overlap=1.0, central_gravity=0.005)
    nt.show_buttons(filter_=['physics'])#,'nodes'
    #nt.set_options("""
    #const options = {
    #  "borderWidth": 3,
    #  "borderWidthSelected": 5,
    #  "nodes": {
    #    "font": {
    #      "size": 42,
    #      "face": "verdana",
    #      "strokeWidth": 2
    #    },
    #    "size": 30
    #  },
    #  "physics": {
    #    "forceAtlas2Based": {
    #      "gravitationalConstant": -100,
    #      "centralGravity": 0.005,
    #      "springLength": 100,
    #      "avoidOverlap": 1
    #    },
    #    "minVelocity": 0.75,
    #    "solver": "forceAtlas2Based",
    #    "timestep": 1
    #  }
    #}
    #                """)
    nt.show("graph_analysis/"+name+".html")

In [9]:
for key in graph_dict.keys():
    create_graph(graph_dict[key], key)

graph_analysis/all.html
graph_analysis/money.html
graph_analysis/materia.html
graph_analysis/document.html
graph_analysis/prod.html


In [10]:
# for graph_money calculate in and out degree, betweenness and closeness, vitaility

def calculate_metrics(graph):
    in_degree = dict(graph.in_degree(weight='weight'))
    out_degree = dict(graph.out_degree(weight='weight'))
    betweenness = nx.betweenness_centrality(graph, weight='weight')
    closeness = nx.closeness_centrality(graph, distance='weight')
    #vitality = nx.closeness_vitality(graph, weight='weight')
    metrics = {}
    for node in graph.nodes():
        metrics[node] = {'in_degree': in_degree[node], 'out_degree': out_degree[node], 'betweenness': betweenness[node], 'closeness': closeness[node]}#, 'vitality': vitality[node]}
    return pd.DataFrame(metrics).T

In [11]:
df_metrics = calculate_metrics(graph_db)
df_metrics.to_csv('graph_analysis/metrics_money.csv')
df_metrics

Unnamed: 0,in_degree,out_degree,betweenness,closeness
fornitore_1964,47.5,2.0,0.073099,4.9e-05
mangini_3,1.0,0.0,0.0,4.9e-05
mangini_1,130055.864647,56.5,0.336257,4.9e-05
fornitore_1571,11040.0,1.0,0.0,1.9e-05
mangini_2,4.0,21762.12,0.035088,0.210526
fornitore_597,8500.0,1.0,0.0,2.5e-05
fornitore_5624,0.0,1.0,0.0,4.9e-05
fornitore_3493,620.77,1.0,0.0,0.000339
fornitore_584,1601.35,1.0,0.0,0.000131
cliente_1,2.0,50000.839623,0.0,6.3e-05


In [12]:
df_metrics = calculate_metrics(graph_db_product)
df_metrics.to_csv('graph_analysis/metrics_product.csv')
df_metrics

Unnamed: 0,in_degree,out_degree,betweenness,closeness
fornitore_1964,249.5,0.0,0.0,6.1e-05
prod_045.OB.P1T,0.0,0.0,0.0,0.0
fornitore_1571,11040.0,0.0,0.0,7e-06
prod_017.PM001,60029.086094,11040.0,0.006494,7e-06
fornitore_597,8500.0,0.0,0.0,4e-06
prod_017.PM507,30000.62137,8500.0,0.004329,2e-06
fornitore_5624,0.0,0.0,0.0,6e-06
prod_090.VST55,0.0,202.0,0.0,0.0
fornitore_3493,620.77,0.0,0.0,2.6e-05
prod_090.VST33,60151.090176,668.27,0.021645,1.9e-05


In [39]:
# create a dataframe from graph_money
df_product = pd.DataFrame(graph_db_product.edges(data=True), columns=['source', 'target', 'data'])
df_product['type'] = df_product['data'].apply(lambda x: x['type'])
df_product['label'] = df_product['data'].apply(lambda x: x['label'])
df_product['label'] = df_product['label'].apply(lambda x: round(float(x), 2) if isinstance(x, str) else x)
money_neg_sum = df_product[df_product['type'] == 'money_neg']["label"].sum()
money_pos_sum = df_product[df_product['type'] == 'money_pos']["label"].sum()
df_product["label_percentage"] = df_product.apply(lambda x: x['label']/money_neg_sum if x['type'] == 'money_neg' else x['label']/money_pos_sum, axis=1)
df_product.drop(columns=['data'], inplace=True)
#df_money.to_csv('graph_analysis/df_money.csv')3
entities = list(set(df_product['source'].unique().tolist() + df_product['target'].unique().tolist()))
# assign a number to each entity
entity_dict = {entities[i]: i for i in range(len(entities))}
# assign category to each entity
entity_type = {entities[i]: graph_db_product.nodes[entities[i]]['type'] for i in range(len(entities))}
# assign a color to each entity type
entity_color = {'fornitore': '#e0e098', 'cliente': '#9d6ec9', 'materia': '#686868'}
# create the list of colors for each entity
entity_colors = [entity_color[entity_type[entity]] for entity in entities]
df_product['source_id'] = df_product['source'].apply(lambda x: entity_dict[x])
df_product['target_id'] = df_product['target'].apply(lambda x: entity_dict[x])
# assign a color to each type
df_product['color'] = df_product['type'].apply(lambda x: '#ff7c7f' if x == 'money_neg' else '#7cff96')
# label percentage must be negative for money_neg
df_product

Unnamed: 0,source,target,type,label,label_percentage,source_id,target_id,color
0,prod_045.OB.P1T,fornitore_1964,money_neg,0.0,0.0,13,20,#ff7c7f
1,prod_017.PM001,fornitore_1571,money_neg,11040.0,0.501553,14,15,#ff7c7f
2,prod_017.PM507,fornitore_597,money_neg,8500.0,0.38616,1,22,#ff7c7f
3,prod_017.PM507,fornitore_5624,money_neg,0.0,0.0,1,0,#ff7c7f
4,prod_090.VST55,fornitore_1964,money_neg,202.0,0.009177,19,20,#ff7c7f
5,prod_090.VST33,fornitore_3493,money_neg,620.77,0.028202,21,8,#ff7c7f
6,prod_090.VST33,fornitore_1964,money_neg,47.5,0.002158,21,20,#ff7c7f
7,prod_017.PM597,fornitore_584,money_neg,1601.35,0.07275,7,11,#ff7c7f
8,prod_052.MP.VT.POR.TR.45.04,fornitore_1964,money_neg,0.0,0.0,2,20,#ff7c7f
9,cliente_0,prod_017.PM152,money_pos,30000.610208,0.085659,12,18,#7cff96


In [42]:
sources = df_product['source_id'].to_list()
targets = df_product['target_id'].to_list()
nodes = list(set(sources + targets))
values = df_product['label_percentage'].to_list()
values = [round(x, 2) for x in values]
edge_colors = df_product['color'].to_list()

In [50]:
fig = go.Figure(
    data=
    [
      go.Sankey(
        valueformat = ".2%",
        #valuesuffix = "%",
        #arrangement='snap',
        # Define nodes
        node = dict(
          pad = 15,
          thickness = 15,
          line = dict(color = "black", width = 0.5),
          label =  list(entity_dict.keys()),
          color =  entity_colors,
          #align="left",
        ),
        # Add links
        link = dict(
          arrowlen=10,
          source = sources,
          target =  targets,
          value =  values,
          #label =  df_product['label_percentage'].to_list(),
          color =  edge_colors
        )
      )
    ]
)

fig.update_layout(title_text="Products money flow",
                  font_size=10, height=600, width=1500)
fig.write_html('graph_analysis/sankey_product.html')
fig.show()

In [118]:
work_product_graph = nx.DiGraph()
for i in range(0,15):
    quantity = np.random.randint(125, 350)
    work_product_graph.add_node("p_l0_"+str(i)+"("+str(quantity)+")", label="p_l0_"+str(i)+"("+str(quantity)+")", type='product_l0', quantity=quantity, status='sufficient')
for i in range(0,10):
    quantity = np.random.randint(5, 30)
    work_product_graph.add_node("p_l1_"+str(i)+"("+str(quantity)+")", label="p_l1_"+str(i)+"("+str(quantity)+")", type='product_l1', quantity=quantity, status='sufficient')
for i in range(0,3):
    quantity = np.random.randint(1, 5)
    work_product_graph.add_node("p_l2_"+str(i)+"("+str(quantity)+")", label="p_l2_"+str(i)+"("+str(quantity)+")", type='product_l2', quantity=quantity, status='sufficient')

product_l0_nodes = [node for node in work_product_graph.nodes() if 'p_l0' in node]
len_l0 = len(product_l0_nodes)
product_l1_nodes = [node for node in work_product_graph.nodes() if 'p_l1' in node]
len_l1 = len(product_l1_nodes)
product_l2_nodes = [node for node in work_product_graph.nodes() if 'p_l2' in node]
len_l2 = len(product_l2_nodes)
for i in range(0, 100):
    random_l0 = np.random.randint(0, len_l0)
    random_l1 = np.random.randint(0, len_l1)
    random_l2 = np.random.randint(0, len_l2)
    if np.random.rand() > 0.75:
        w = int(np.random.rand()*10+1)
        node_l0 = product_l0_nodes[random_l0]
        node_l1 = product_l1_nodes[random_l1]
        node_l0_quantity = work_product_graph.nodes[node_l0]['quantity']
        node_l1_quantity = work_product_graph.nodes[node_l1]['quantity']
        work_product_graph.add_edge(node_l0, node_l1, weight=w, label=str(w), status='ok')
        if np.random.rand() > 0.5:
            w = int(np.random.rand()*5+1)
            node_l1 = product_l1_nodes[random_l1]
            node_l2 = product_l2_nodes[random_l2]
            node_l1_quantity = work_product_graph.nodes[node_l1]['quantity']
            node_l2_quantity = work_product_graph.nodes[node_l2]['quantity']
            work_product_graph.add_edge(node_l1, node_l2, weight=w, label=str(w), status='ok')
# for every edge
for edge in work_product_graph.edges(data=True):
    node_1 = edge[0]
    node_1_quantity = work_product_graph.nodes[node_1]['quantity']
    node_2 = edge[1]
    node_2_quantity = work_product_graph.nodes[node_2]['quantity']
    edge_weight = edge[2]['weight']
    if node_1_quantity < edge_weight*node_2_quantity:
        edge[2]['status'] = 'not ok'
        work_product_graph.nodes[node_1]['status'] = 'insufficient'
        work_product_graph.nodes[node_2]['status'] = 'blocked'

#change = True
#while change:
    change = False
    # all the edges that come out from a node with status blocked or insufficient must be not ok
    for node in work_product_graph.nodes(data=True):
        if node[1]['status'] == 'blocked' or node[1]['status'] == 'insufficient':
            for edge in work_product_graph.edges(node[0], data=True):
                edge[2]['status'] = 'not ok'
            change = True
    # all nodes that receive an edge with status not ok must be blocked
    for edge in work_product_graph.edges(data=True):
        if edge[2]['status'] == 'not ok':
            work_product_graph.nodes[edge[1]]['status'] = 'blocked'
            change = True
# remove nodes not connected
nodes_to_remove = [node for node in work_product_graph.nodes() if work_product_graph.degree(node) == 0]
work_product_graph.remove_nodes_from(nodes_to_remove)

In [119]:
# create pyvis network
nt = Network(height="1024px", width="100%",
             notebook=True, directed=True, 
             neighborhood_highlight=True, select_menu=True, 
             filter_menu=True, cdn_resources='in_line', bgcolor="#f4f4f4")
#nt.from_nx(mrg)
node_colors = {'product_l0': '#aad2ff', 'product_l1': '#56a5ff', 'product_l2': '#0579ff'}
for node in work_product_graph.nodes(data=True):
    node_name = node[0]
    node_type = node[1]['type']
    node_quantity = node[1]['quantity']
    node_color = node_colors[node_type]
    node_status = node[1]['status']
    if node_status == "insufficient":
        node_color = "#db2525"
    if node_status == "blocked":
        node_color = "#6b6b6b"
    nt.add_node(node_name, title=node, color=node_color, value=node_quantity)
for edge in work_product_graph.edges(data=True):
    node_1_type = work_product_graph.nodes[edge[0]]['type']
    val = edge[2]['weight']
    val = round(val, 2)
    label = edge[2]['label']
    try :
        label = float(label)
    except:
        label = edge[2]['label']
    if isinstance(label, float):
        label = round(label, 0)
    if edge[2]['status'] == 'not ok':
        edge_color = "#db2525"
    else:
        edge_color = node_colors[node_1_type]
    nt.add_edge(edge[0], edge[1], 
                value=val, 
                weight=val,
                title=str(val),
                label=str(label),
                color=edge_color,
    )
nt.force_atlas_2based(gravity=-50, overlap=1.0, central_gravity=0.005)
nt.show_buttons(filter_=['physics'])#,'nodes'
nt.show("graph_analysis/work_products_graph.html")

graph_analysis/work_products_graph.html


In [139]:
random_1 = np.random.randint(0, 10)
random_2 = np.random.randint(0, 5)
random_3 = np.random.randint(0, 3)

dict_work = {"p1":[], "p2":[], "work":[]}
for i in range(0, 15):
    random_1 = np.random.randint(1, 10)
    random_2 = np.random.randint(10, 15)
    dict_work["p1"].append(random_1)
    dict_work["p2"].append(random_2)
    work = np.random.randint(1, 10)
    dict_work["work"].append(work)
for i in range(0, 5):
    random_2 = np.random.randint(10, 15)
    random_3 = np.random.randint(15, 18)
    dict_work["p1"].append(random_2)
    dict_work["p2"].append(random_3)
    work = np.random.randint(1, 25)
    dict_work["work"].append(work)

df_work = pd.DataFrame(dict_work)
df_work

Unnamed: 0,p1,p2,work
0,9,12,5
1,3,14,2
2,3,11,5
3,4,11,6
4,4,11,9
5,3,14,2
6,7,11,2
7,4,10,9
8,8,14,3
9,3,10,5


In [140]:
entities = list((df_work['p1'].unique().tolist() + df_work['p2'].unique().tolist()))
entities

[9, 3, 4, 7, 8, 6, 1, 5, 12, 13, 11, 12, 14, 11, 10, 13, 17, 16, 15]

In [142]:
fig = go.Figure(
    data=
    [
      go.Sankey(
        #valueformat = ".2%",
        #valuesuffix = "%",
        #arrangement='snap',
        # Define nodes
        node = dict(
          pad = 15,
          thickness = 15,
          line = dict(color = "blue", width = 0.5),
          label = entities,
          #align="left",
        ),
        # Add links
        link = dict(
          arrowlen=10,
          source = df_work['p1'].to_list(),
          target =  df_work['p2'].to_list(),
          value =  df_work['work'].to_list(),
          #label =  df_product['label_percentage'].to_list(),
          color =  "#ddc696"
        )
      )
    ]
)

fig.update_layout(title_text="Products work flow",
                  font_size=10, height=600, width=1500)
fig.write_html('graph_analysis/sankey_work.html')
fig.show()