In [20]:
import pandas as pd
import numpy as np
import networkx as nx
from pyvis.network import Network
import json
import plotly.express as px
import plotly.graph_objects as go

In [21]:
with open('dict_entities.json', 'r') as f:
    db_dict = json.load(f)

In [34]:
db_dict

{'prod_017.PM152': {'cod_articolo': '017.PM152',
  'variante': 0,
  'costo': 10000.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Vetri',
  'descrizione': 'VETRO STRATIFICATO 33.1'},
 'prod_090.VST33': {'cod_articolo': '090.VST33',
  'variante': 0,
  'costo': 24.5,
  'data_riferimento': '2023-08-28',
  'categoria': 'Vetri',
  'descrizione': 'VETRO TRASPARENTE STRATIFICATO 3+3 F.G. ANTA INTELAIATA'},
 'prod_090.VST55': {'cod_articolo': '090.VST55',
  'variante': 0,
  'costo': 37.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Vetri',
  'descrizione': 'VETRO STRATIFICATO TRASPARENTE 5+5'},
 'prod_045.OB.P1T': {'cod_articolo': '045.OB.P1T',
  'variante': 0,
  'costo': 75.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Visive',
  'descrizione': "VETROCAMERA OBLO' PER PORTA 400X700 R100 TRASPARENTE S45"},
 'prod_052.MP.VT.POR.TR.45.04': {'cod_articolo': '052.MP.VT.POR.TR.45.04',
  'variante': 0,
  'costo': 112.0,
  'data_riferimento': '2023-08-28',
  'categoria': 'Visi

In [22]:
graph_db = nx.DiGraph()
for key in db_dict.keys():
    key_start = key.split('_')[0]

    if key_start == 'documento':
        cod_fornitore = db_dict[key]['cod_fornitore']
        azienda = db_dict[key]['azienda']
        importo = db_dict[key]['importo']
        id_documento = db_dict[key]['id_documento']
        fornitore = "fornitore_"+ str(cod_fornitore)
        if not graph_db.has_node(fornitore):
            graph_db.add_node(fornitore, type='fornitore')
        if not graph_db.has_node(azienda):
            graph_db.add_node(azienda, type='azienda')
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            graph_db.add_edge(fornitore, azienda, type='materia', label= codice_articolo, name= codice_articolo, weight= 1)
        graph_db.add_edge(azienda, fornitore, type='money_neg', label= str(importo), name= str(importo), weight= importo)
        graph_db.add_edge(fornitore, azienda, type='document', label= id_documento, name= id_documento, weight= 1)

    elif key_start == 'preventivo':
        documento = db_dict[key]['id_documento']
        cliente = db_dict[key]['id_cliente']
        azienda = db_dict[key]['id_azienda']
        status = db_dict[key]['status']
        if not graph_db.has_node(cliente):
            graph_db.add_node(cliente, type='cliente')
        if not graph_db.has_node(azienda):
            graph_db.add_node(azienda, type='azienda')
        graph_db.add_edge(azienda, cliente, type='document', label= documento + "(" + status + ")", name= documento + "(" + status + ")", weight= 1)

    elif key_start == 'fattura':
        documento = db_dict[key]['id_documento']
        cliente = db_dict[key]['id_cliente']
        azienda = db_dict[key]['id_azienda']
        importo = db_dict[key]['importo']
        if not graph_db.has_node(cliente):
            graph_db.add_node(cliente, type='cliente')
        if not graph_db.has_node(azienda):
            graph_db.add_node(azienda, type='azienda')
        graph_db.add_edge(azienda, cliente, type='document', label= id_documento, name= id_documento, weight= 1)
        graph_db.add_edge(cliente, azienda, type='money_pos', label= str(importo), name= str(importo), weight= importo)
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            graph_db.add_edge(azienda, cliente, type='materia', label= codice_articolo, name= codice_articolo, weight= 1)


In [23]:
graph_db_money = graph_db.copy()
for edge in graph_db.edges(data=True):
    if edge[2]['type'] not in ['money_pos','money_neg']:
        graph_db_money.remove_edge(edge[0], edge[1])
        
graph_db_materia = graph_db.copy()
for edge in graph_db.edges(data=True):
    if edge[2]['type'] != 'materia':
        graph_db_materia.remove_edge(edge[0], edge[1])

graph_db_document = graph_db.copy()
for edge in graph_db.edges(data=True):
    if edge[2]['type'] != 'document':
        graph_db_document.remove_edge(edge[0], edge[1])

In [None]:
graph_db_product = nx.DiGraph()
for key in db_dict.keys():
    key_start = key.split('_')[0]

    if key_start == 'documento':
        cod_fornitore = db_dict[key]['cod_fornitore']
        fornitore = "fornitore_"+ str(cod_fornitore)
        importo = db_dict[key]['importo']
        if not graph_db_product.has_node(fornitore):
            graph_db_product.add_node(fornitore, type='fornitore')
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            articolo = "prod_" + codice_articolo
            if not graph_db_product.has_node(articolo):
                graph_db_product.add_node(articolo, type='materia', label= articolo, name= articolo, weight= 1)
        graph_db_product.add_edge(articolo, fornitore, type='money_neg', label= importo, name= importo, weight= importo)

    # CONTINUARE DA QUI
    elif key_start == 'fattura':
        documento = db_dict[key]['id_documento']
        cliente = db_dict[key]['id_cliente']
        azienda = db_dict[key]['id_azienda']
        importo = db_dict[key]['importo']
        if not graph_db_product.has_node(cliente):
            graph_db_product.add_node(cliente, type='cliente')
        if not graph_db_product.has_node(azienda):
            graph_db_product.add_node(azienda, type='azienda')
        graph_db_product.add_edge(azienda, cliente, type='document', label= id_documento, name= id_documento, weight= 1)
        graph_db_product.add_edge(cliente, azienda, type='money_pos', label= str(importo), name= str(importo), weight= importo)
        for prod in db_dict[key]['venduto']:
            codice_articolo = prod['codice_articolo']
            graph_db_product.add_edge(azienda, cliente, type='materia', label= codice_articolo, name= codice_articolo, weight= 1)

In [24]:
graph_dict = {'all': graph_db, 'money': graph_db_money, 'materia': graph_db_materia, 'document': graph_db_document}

In [25]:
def create_graph(graph, name):
    # create pyvis network
    nt = Network(height="1024px", width="100%",notebook=True, directed=True, neighborhood_highlight=True, select_menu=False, filter_menu=True, cdn_resources='in_line')
    #nt.from_nx(mrg)
    node_colors = {'fornitore': '#e0e098', 'azienda': '#76cc76', 'cliente': '#9d6ec9'}
    edge_colors = {'materia': 'black', 'money_neg': 'red', 'money_pos': 'green', 'document': 'blue'}
    for node in graph.nodes(data=True):
        node_name = node[0]
        node_type = node[1]['type']
        nt.add_node(node_name, title=node, color=node_colors[node_type])
    for edge in graph.edges(data=True):
        val = edge[2]['weight']
        edge_type = edge[2]['type']
        val = round(val, 2)
        label = edge[2]['label']
        try :
            label = float(label)
        except:
            label = edge[2]['label']
        if isinstance(label, float):
            label = round(label, 0)
        nt.add_edge(edge[0], edge[1], 
                    value=val, 
                    weight=val,
                    title=str(val),
                    label=str(label),
                    color=edge_colors[edge_type],
        )
    nt.force_atlas_2based(gravity=-50, overlap=1.0, central_gravity=0.005)
    nt.show_buttons(filter_=['physics'])#,'nodes'
    #nt.set_options("""
    #const options = {
    #  "borderWidth": 3,
    #  "borderWidthSelected": 5,
    #  "nodes": {
    #    "font": {
    #      "size": 42,
    #      "face": "verdana",
    #      "strokeWidth": 2
    #    },
    #    "size": 30
    #  },
    #  "physics": {
    #    "forceAtlas2Based": {
    #      "gravitationalConstant": -100,
    #      "centralGravity": 0.005,
    #      "springLength": 100,
    #      "avoidOverlap": 1
    #    },
    #    "minVelocity": 0.75,
    #    "solver": "forceAtlas2Based",
    #    "timestep": 1
    #  }
    #}
    #                """)
    nt.show("graph_analysis/"+name+".html")

In [26]:
for key in graph_dict.keys():
    create_graph(graph_dict[key], key)

graph_analysis/all.html
graph_analysis/money.html


graph_analysis/materia.html
graph_analysis/document.html


In [27]:
# for graph_money calculate in and out degree, betweenness and closeness, vitaility

def calculate_metrics(graph):
    in_degree = dict(graph.in_degree(weight='weight'))
    out_degree = dict(graph.out_degree(weight='weight'))
    betweenness = nx.betweenness_centrality(graph, weight='weight')
    closeness = nx.closeness_centrality(graph, distance='weight')
    #vitality = nx.closeness_vitality(graph, weight='weight')
    metrics = {}
    for node in graph.nodes():
        metrics[node] = {'in_degree': in_degree[node], 'out_degree': out_degree[node], 'betweenness': betweenness[node], 'closeness': closeness[node]}#, 'vitality': vitality[node]}
    return pd.DataFrame(metrics).T

In [28]:
df_metrics = calculate_metrics(graph_db)
df_metrics.to_csv('graph_analysis/metrics_money.csv')
df_metrics

Unnamed: 0,in_degree,out_degree,betweenness,closeness
fornitore_1964,47.5,2.0,0.073099,4.9e-05
mangini_3,1.0,0.0,0.0,4.9e-05
mangini_1,130055.864647,56.5,0.336257,4.9e-05
fornitore_1571,11040.0,1.0,0.0,1.9e-05
mangini_2,4.0,21762.12,0.035088,0.210526
fornitore_597,8500.0,1.0,0.0,2.5e-05
fornitore_5624,0.0,1.0,0.0,4.9e-05
fornitore_3493,620.77,1.0,0.0,0.000339
fornitore_584,1601.35,1.0,0.0,0.000131
cliente_1,2.0,50000.839623,0.0,6.3e-05


In [32]:
# create a dataframe from graph_money
df_money = pd.DataFrame(graph_db_money.edges(data=True), columns=['source', 'target', 'data'])
df_money['type'] = df_money['data'].apply(lambda x: x['type'])
df_money['label'] = df_money['data'].apply(lambda x: x['label'])
df_money['label'] = df_money['label'].apply(lambda x: round(float(x), 2) if isinstance(x, str) else x)
money_neg_sum = df_money[df_money['type'] == 'money_neg']["label"].sum()
money_pos_sum = df_money[df_money['type'] == 'money_pos']["label"].sum()
df_money["label_percentage"] = df_money.apply(lambda x: x['label']/money_neg_sum if x['type'] == 'money_neg' else x['label']/money_pos_sum, axis=1)
df_money.drop(columns=['data'], inplace=True)
#df_money.to_csv('graph_analysis/df_money.csv')
df_money

Unnamed: 0,source,target,type,label,label_percentage
0,mangini_3,fornitore_1964,money_neg,0.0,0.0
1,mangini_1,fornitore_1964,money_neg,47.5,0.002178
2,mangini_1,fornitore_5624,money_neg,0.0,0.0
3,mangini_2,fornitore_1571,money_neg,11040.0,0.506199
4,mangini_2,fornitore_597,money_neg,8500.0,0.389736
5,mangini_2,fornitore_3493,money_neg,620.77,0.028463
6,mangini_2,fornitore_584,money_neg,1601.35,0.073424
7,cliente_1,mangini_1,money_pos,10000.31,0.037024
8,cliente_1,mangini_0,money_pos,40000.53,0.148092
9,cliente_6,mangini_1,money_pos,10001.59,0.037028


In [None]:
node_dict = {}
for node in graph_db_money.nodes(data=True):
    node_dict[node[0]] = node[1]['type']

In [None]:
fig = go.Figure(data=[go.Sankey(
    valueformat = ".0f",
    valuesuffix = "TWh",
    # Define nodes
    node = dict(
      pad = 15,
      thickness = 15,
      line = dict(color = "black", width = 0.5),
      label =  data['data'][0]['node']['label'],
      color =  data['data'][0]['node']['color']
    ),
    # Add links
    link = dict(
      source =  data['data'][0]['link']['source'],
      target =  data['data'][0]['link']['target'],
      value =  data['data'][0]['link']['value'],
      label =  data['data'][0]['link']['label'],
      color =  data['data'][0]['link']['color']
))])

fig.update_layout(title_text="Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>",
                  font_size=10)
fig.show()