In [3]:
import igraph as ig
import utils
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [4]:
g22 = ig.Graph.Read('../../data/graphs/april2022_Lspace.graphml')
g21 = ig.Graph.Read('../../data/graphs/gen2021_Lspace.graphml')
g19 = ig.Graph.Read('../../data/graphs/december2019_Lspace.graphml')

### Indegree Centrality

In [5]:
in22 = np.array(g22.degree(g22.vs, mode="in")).astype(int)
in21 = np.array(g21.degree(g21.vs, mode="in")).astype(int)
in19 = np.array(g19.degree(g19.vs, mode="in")).astype(int)

In [6]:
w_in22 = np.array(g22.strength(g22.vs, mode="in", loops=True, weights=g22.es['num_train'])).astype(int)
w_in21 = np.array(g21.strength(g21.vs, mode="in", loops=True, weights=g21.es['num_train'])).astype(int)
w_in19 = np.array(g19.strength(g19.vs, mode="in", loops=True, weights=g19.es['num_train'])).astype(int)

### Outdegree Centrality

In [7]:
out22 = np.array(g22.degree(g22.vs, mode="out")).astype(int)
out21 = np.array(g21.degree(g21.vs, mode="out")).astype(int)
out19 = np.array(g19.degree(g19.vs, mode="out")).astype(int)

In [8]:
w_out22 = np.array(g22.strength(g22.vs, mode="out", loops=True, weights=g22.es['num_train'])).astype(int)
w_out21 = np.array(g21.strength(g21.vs, mode="out", loops=True, weights=g21.es['num_train'])).astype(int)
w_out19 = np.array(g19.strength(g19.vs, mode="out", loops=True, weights=g19.es['num_train'])).astype(int)

### Betweeness

In [9]:
bw22 = np.array(g22.betweenness(directed=True)).astype(int)
bw21 = np.array(g21.betweenness(directed=True)).astype(int)
bw19 = np.array(g19.betweenness(directed=True)).astype(int)

In [10]:
w_bw22 = np.array(g22.betweenness(directed=True, weights=g22.es['num_train'])).astype(int)
w_bw21 = np.array(g21.betweenness(directed=True, weights=g21.es['num_train'])).astype(int)
w_bw19 = np.array(g19.betweenness(directed=True, weights=g19.es['num_train'])).astype(int)

### Closeness

In [11]:
cl22 = np.array(g22.closeness())
cl21 = np.array(g21.closeness())
cl19 = np.array(g19.closeness())

In [12]:
w_cl22 = np.array(g22.closeness(weights=g22.es['num_train']))
w_cl21 = np.array(g21.closeness(weights=g21.es['num_train']))
w_cl19 = np.array(g19.closeness(weights=g19.es['num_train']))

### Pagerank

In [13]:
pr22 = np.array(g22.pagerank(directed=True))
pr21 = np.array(g21.pagerank(directed=True))
pr19 = np.array(g19.pagerank(directed=True))

In [14]:
w_pr22 = np.array(g22.pagerank(directed=True, weights=g22.es["num_train"]))
w_pr21 = np.array(g21.pagerank(directed=True, weights=g21.es["num_train"]))
w_pr19 = np.array(g19.pagerank(directed=True, weights=g19.es["num_train"]))

# Merge analysis

In [96]:
def populate_rank(g, rank: dict, m_values):
    for val in m_values:
        position = 0
        for station in utils.top_n_indices(val, len(val)):
            position_value = (len(val)-position)/len(val)
            key = g.vs[station]['label']
            rank[key] = rank[key] + position_value
            position += 1
    return rank    

In [97]:
rank22 = dict()
rank21 = dict()
rank19 = dict()

In [98]:
for i in g22.vs:
    rank22[i["label"]] = 0 
for i in g21.vs:
    rank21[i["label"]] = 0
for i in g19.vs:
    rank19[i["label"]] = 0 


In [104]:
rank22 = populate_rank(g22, rank22, [in22, out22, bw22, cl22, pr22])
rank21 = populate_rank(g21, rank21, [in21, out21, bw21, cl21, pr21])
rank19 = populate_rank(g19, rank19, [in19, out19, bw19, cl19, pr19])

In [107]:
key_sorted22 = sorted(rank22, key=rank22.get, reverse=True)
key_sorted21 = sorted(rank21, key=rank21.get, reverse=True)
key_sorted19 = sorted(rank19, key=rank19.get, reverse=True)

In [113]:
print("2022")
for i in range(0,10):
    print(key_sorted22[i], " : ", rank22[key_sorted22[i]])


2022
MILANO LAMBRATE  :  9.971962616822431
MONZA  :  9.906542056074768
TREVIGLIO  :  9.883177570093459
MILANO ROGOREDO  :  9.85514018691589
RHO FIERA  :  9.817757009345794
PAVIA  :  9.799065420560748
BRESCIA  :  9.761682242990654
MILANO PORTA GARIBALDI  :  9.757009345794394
LECCO  :  9.7196261682243
GALLARATE  :  9.719626168224298


In [114]:
print("2021")
for i in range(0,10):
    print(key_sorted21[i], " : ", rank22[key_sorted21[i]])


2021
MILANO LAMBRATE  :  9.971962616822431
MONZA  :  9.906542056074768
TREVIGLIO  :  9.883177570093459
LECCO  :  9.7196261682243
MILANO ROGOREDO  :  9.85514018691589
PAVIA  :  9.799065420560748
RHO FIERA  :  9.817757009345794
GALLARATE  :  9.719626168224298
MILANO BOVISA FNM  :  9.710280373831775
SARONNO  :  9.710280373831775


In [115]:
print("2019")
for i in range(0,10):
    print(key_sorted19[i], " : ", rank22[key_sorted19[i]])

2019
MILANO LAMBRATE  :  9.971962616822431
MONZA  :  9.906542056074768
TREVIGLIO  :  9.883177570093459
MILANO BOVISA FNM  :  9.710280373831775
LECCO  :  9.7196261682243
MILANO PORTA GARIBALDI  :  9.757009345794394
RHO FIERA  :  9.817757009345794
MILANO ROGOREDO  :  9.85514018691589
SARONNO  :  9.710280373831775
PAVIA  :  9.799065420560748


## Saving Tables

In [53]:
def station_extraction(g, val):
    stations = []
    for i in range(0,3):
        for station in utils.top_n_indices(val[i], 5):
            stations.append(g[i].vs[station]['label'])
    return list(set(stations))  

In [55]:
def populate_df(station, column, df, l, g):
    index_name=0
    for i in range(0,3):
        list_to_append = []
        for name in station:
            for j in utils.top_n_indices(l[i], len(l[i])):
                if j == g[i].vs.find(label = name).index:
                    if type(l[i][j]) == int:
                        list_to_append.append(l[i][j])
                    else:
                        list_to_append.append(round(l[i][j],4))
        df[column[index_name]] = list_to_append
        index_name = index_name + 1
    return df

In [77]:
def table_write(table, weighted, name):
    if weighted == True:
        full_table_path = f'../../data/tables/{name}_weighted_centrality_table.csv'
        table.to_csv(full_table_path)
    else:
        full_table_path = f'../../data/tables/{name}_centrality_table.csv'
        table.to_csv(full_table_path)

In [79]:
column_names = ['2022', '2021', '2019']

In [78]:
stations = [] 
graphs = [g22, g21, g19]
values = [[in22, in21, in19], [out22, out21, out19], [bw22, bw21, bw19], [cl22, cl21, cl19], [pr22, pr21, pr19], \
    [w_in22, w_in21, w_in19], [w_out22, w_out21, w_out19], [w_bw22, w_bw21, w_bw19], [w_cl22, w_cl21, w_cl19], [w_pr22, w_pr21, w_pr19]]
tables_name = ["in" , "out", "betweeness", "closeness", "pagerank",\
     "weighted_in", "weighted_out", "weighted_betweeness", "weighted_closeness", "weighted_pagerank"]
for r in range(0, len(values)):
    stations = station_extraction(graphs, values[r])
    stations.sort()
    table = pd.DataFrame({'Names' : stations})
    table = populate_df(stations, column_names, table, values[r], graphs)
    if tables_name[r].startswith("w"):
        table_write(table, True, tables_name[r])
    else:
        table_write(table, False, tables_name[r])