In [101]:
import pandas as pd
import networkx as nx
import pyvis as pv

In [102]:
# create a networkx directed graph
G = nx.DiGraph()

In [103]:
with open('aziende.txt') as f:
    files_names = f.readlines()
# remove \n for each line
files_names = [x.strip() for x in files_names]

In [104]:
first = True
for file in files_names:
    #read file
    with open(file) as f:
        lines = f.readlines()
    #preprocess file
    i = 0
    new_lines_dict = {}
    new_lines_dict[i] = []
    for line in lines:
        if line == '\n' or line == '\t\n' or line =="Follow\n":
            i += 1
            new_lines_dict[i] = []
            continue
        line = line.strip()
        new_lines_dict[i].append(line)
    new_lines_dict = {k:v for k,v in new_lines_dict.items() if v}

    #create dataframe
    df = pd.DataFrame.from_dict(new_lines_dict, orient='index', columns=["Nome","Settore","Followers"])
    df.index = range(len(df))
    df.reset_index(inplace=True)
    df.rename(columns={'index':'Posizione'}, inplace=True)
    df.fillna('0', inplace=True)
    df['Posizione'] = df['Posizione'] - 1
    df['Weight'] = df['Posizione'].apply(lambda x: 0.9**int(x)+0.1)
    df['Followers'] = df['Followers'].apply(lambda x: x.split(' ')[0])
    df['Followers'] = df['Followers'].apply(lambda x: int(x.replace(',','')))
    
    # create first node of the dataframe
    if first == True :
        G.add_node(df.iloc[0]["Nome"], type = df.iloc[1]["Nome"].split()[0], value = int(df.iloc[1]["Nome"].split()[3].replace(',','')))
        first = False
    # create nodes and edges
    for j in range(2,len(df)):
        if not G.has_node(df.iloc[j]["Nome"]):
            G.add_node(df.iloc[j]["Nome"], 
                       type = df.iloc[j]["Settore"], 
                       value = int(df.iloc[j]["Followers"]))
        if not G.has_edge(df.iloc[0]["Nome"], df.iloc[j]["Nome"]):
            G.add_edge(df.iloc[0]["Nome"], df.iloc[j]["Nome"], value = float(df.iloc[j]["Weight"]))

In [105]:
#G.nodes(data=True)

In [106]:
#G.edges(data=True)

In [107]:
# get the adjacency matrix of G, with weights and convert it to a pandas dataframe
adjacency_df = nx.to_pandas_adjacency(G, weight='value')
adjacency_df.to_csv('adjacency_matrix.csv')
adjacency_df

Unnamed: 0,Colombo Costruzioni S.p.A.,Impresa Percassi,Impresa Pizzarotti & C. S.p.A.,De Sanctis Costruzioni SpA,Techbau S.p.A.,Carron Spa,Gruppo ICM,Webuild,CMB Società cooperativa Muratori e Braccianti di Carpi,csc costruzioni sa,...,Ricciardello Costruzioni S.p.A.,Todini Costruzioni Generali Spa,TOTO HOLDING SPA,InfraEngineering,Renexia Spa,Clough,Terna SpA,RFI Rete Ferroviaria Italiana,Saipem,Eni
Colombo Costruzioni S.p.A.,0.000000,1.000000,0.910,0.829000,0.756100,0.690490,0.631441,0.578297,0.530467,0.487420,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Impresa Percassi,1.000000,0.000000,0.910,0.829000,0.690490,0.487420,0.631441,0.756100,0.382430,0.285302,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Impresa Pizzarotti & C. S.p.A.,0.578297,0.530467,0.000,0.413811,0.000000,0.000000,0.756100,1.000000,0.221577,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
De Sanctis Costruzioni SpA,1.000000,0.829000,0.910,0.000000,0.354187,0.305891,0.756100,0.690490,0.235085,0.382430,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Techbau S.p.A.,1.000000,0.910000,0.829,0.631441,0.000000,0.756100,0.578297,0.487420,0.530467,0.305891,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Clough,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Terna SpA,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RFI Rete Ferroviaria Italiana,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Saipem,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [108]:
# create a new graph, undirected in wich the edges are the average of the two directed edges
G2 = nx.Graph()
for u,v,d in G.edges(data=True):
    if G2.has_edge(u,v):
        G2[u][v]['value'] = (G2[u][v]['value'] + d['value'])/2
    else:
        G2.add_edge(u, v, value=d['value'])

In [109]:
# create a G3 graph, in which the edges values are changed for the MST algorithm
G3 = nx.Graph()
for u,v,d in G2.edges(data=True):
    G3.add_edge(u, v, value=1/(d['value']+1))
MST = nx.minimum_spanning_tree(G3)
MST2 = nx.Graph()
for u,v,d in MST.edges(data=True):
    MST2.add_edge(u, v, value=1/(d['value']+1))

In [110]:
# display directed graph with pyvis
nt = pv.network.Network(height="1024px", width="100%", notebook=True, directed=False, neighborhood_highlight=True)
nt.from_nx(MST2)
# The node size is proportional to the number of followers
nt.show_buttons(filter_=['physics'])
nt.show("MST_simili_online.html")

MST_simili_online.html


In [111]:
# display directed graph with pyvis
nt = pv.network.Network(height="1024px", width="100%", notebook=True, directed=False, neighborhood_highlight=False)
nt.from_nx(G2)
# The node size is proportional to the number of followers
nt.show_buttons(filter_=['physics'])
nt.show("colombo_linkedin_all_undirected.html")

colombo_linkedin_all_undirected.html


In [112]:
# display directed graph with pyvis
nt = pv.network.Network(height="1024px", width="100%", notebook=True, directed=True, neighborhood_highlight=False)
nt.from_nx(G)
# The node size is proportional to the number of followers
nt.show_buttons(filter_=['physics'])
nt.show("colombo_linkedin_all.html")

colombo_linkedin_all.html
