In [None]:
import copy
import numpy as np
import scipy as sp
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
import pandas as pd
import seaborn as sns

from NEMtropy import DirectedGraph

In [None]:
# @formatter:off
%store -r df_airports
%store -r airports_dict
%store -r df_merged
# @formatter:on
GG = nx.read_gml('Graphs/airlines.gml')

In [None]:
# build dbcm - directed binary configuration model, samples from distribution
adj_g = nx.to_numpy_array(GG)
edges = np.array(GG.edges)
graph_d = DirectedGraph(edgelist=edges)
graph_d.solve_tool(model="dcm_exp")
graph_d.ensemble_sampler(1, cpu_n=4, output_dir='dbcm/')

In [None]:
# read sample
edgelist_dbcm = np.loadtxt(f"dbcm/0.txt", dtype=str)
GG_RANDOMIZED = nx.DiGraph()
GG_RANDOMIZED.add_edges_from(edgelist_dbcm)

In [None]:
# measure centrality measures
degree_centrality = nx.degree_centrality(GG)
closeness_centrality = nx.closeness_centrality(GG)
betweenness_centrality = nx.betweenness_centrality(GG)
eigenvector_centrality = nx.eigenvector_centrality(GG)

# set node attributes
nx.set_node_attributes(GG, closeness_centrality, 'closeness centrality')
nx.set_node_attributes(GG, degree_centrality, 'degree centrality')
nx.set_node_attributes(GG, betweenness_centrality, 'betweenness centrality')
nx.set_node_attributes(GG, eigenvector_centrality, 'eigenvector centrality')

# build arrays for easier plotting
centralityArr = [degree_centrality, closeness_centrality,
                 betweenness_centrality, eigenvector_centrality]
centralityNames = ['degree', 'closeness', 'betweenness', 'eigenvector']

In [None]:
# measure centrality measures of randomized
degree_centrality_RAND = nx.degree_centrality(GG_RANDOMIZED)
closeness_centrality_RAND = nx.closeness_centrality(GG_RANDOMIZED)
betweenness_centrality_RAND = nx.betweenness_centrality(GG_RANDOMIZED)
eigenvector_centrality_RAND = nx.eigenvector_centrality(GG_RANDOMIZED)

nx.set_node_attributes(GG_RANDOMIZED, closeness_centrality, 'closeness centrality')
nx.set_node_attributes(GG_RANDOMIZED, degree_centrality, 'degree centrality')
nx.set_node_attributes(GG_RANDOMIZED, betweenness_centrality, 'betweenness centrality')
nx.set_node_attributes(GG_RANDOMIZED, eigenvector_centrality, 'eigenvector centrality')

# build arrays for easier plotting
centralityArrRAND = [degree_centrality_RAND, closeness_centrality_RAND,
                 betweenness_centrality_RAND, eigenvector_centrality_RAND]
centralityNames = ['degree', 'closeness', 'betweenness', 'eigenvector']

In [None]:
# dbcm takes as constraint in & out degree. visualize as reference.

G_DEGS = [GG.degree()[i] for i in GG.nodes()]
G_DEGS_RAND = [GG_RANDOMIZED.degree()[i] for i in GG_RANDOMIZED.nodes()]
fig = plt.figure(dpi=300)
ax = sns.kdeplot(G_DEGS,alpha=0.5)
sns.kdeplot(G_DEGS_RAND, ax=ax, alpha=0.5)
plt.title('Degree Distribution')
plt.show()

GG_in_degree_dict = dict(GG.in_degree)
G_DEGS = [ GG_in_degree_dict[i] for i in GG.nodes() ]
GG_in_degree_dict = dict(GG_RANDOMIZED.in_degree)
G_DEGS_RAND = [ GG_in_degree_dict[i] for i in GG_RANDOMIZED.nodes() ]

fig = plt.figure(dpi=300)
ax = sns.kdeplot(G_DEGS,alpha=0.5)
sns.kdeplot(G_DEGS_RAND, ax=ax, alpha=0.5)
plt.title('In Degree Distribution')
plt.show()

GG_in_degree_dict = dict(GG.out_degree)
G_DEGS = [ GG_in_degree_dict[i] for i in GG.nodes() ]
GG_in_degree_dict = dict(GG_RANDOMIZED.out_degree)
G_DEGS_RAND = [ GG_in_degree_dict[i] for i in GG_RANDOMIZED.nodes() ]

fig = plt.figure(dpi=300)
ax = sns.kdeplot(G_DEGS,alpha=0.5)
sns.kdeplot(G_DEGS_RAND, ax=ax, alpha=0.5)
plt.title('Out Degree Distribution')
plt.show()

In [None]:
# if want to check individual nodes, we need to remap node id's.
# directed graph class uses different node indices than what we originally had.
# need to remap to our airport ids back to be able to do comparisons.
# degree_centrality_RAND2 = {}
# for k,v in degree_centrality_RAND.items():
#     realkey = graph_d.nodes_dict.get(int(k))
#     degree_centrality_RAND2[realkey] = v
#


# degree_centrality_RAND3 = copy.deepcopy(degree_centrality_RAND2)
# for k,v in degree_centrality.items():
#
#     if not k in degree_centrality_RAND3:
#         degree_centrality_RAND3[k] = 0

# fig = plt.figure(dpi=300)
# plt.scatter(degree_centrality.values(), degree_centrality_RAND3.values(), alpha=0.3)
# plt.title('Degree Centrality')
# plt.show()


In [None]:
# build centrality comparison subplots. todo big loop.
fig, axes = plt.subplots(2, 2, figsize=(12,8))
axes[0,0].set_title('Degree centrality', fontsize=20)
ax = sns.kdeplot(degree_centrality.values(),alpha=0.5, ax=axes[0,0], label='Original')
sns.kdeplot(degree_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized')
axes[0,0].legend(loc='upper right')

ax = sns.kdeplot(betweenness_centrality.values(),alpha=0.5, ax=axes[0,1], label='Original')
sns.kdeplot(betweenness_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized')
axes[0,1].set_title('Betweenness centrality', fontsize=20)
axes[0,1].legend(loc='upper right')

axes[1,0].set_title('Eigenvector centrality', fontsize=20)
ax = sns.kdeplot(eigenvector_centrality.values(),alpha=0.5, ax=axes[1,0], label='Original')
sns.kdeplot(eigenvector_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized')
axes[1,0].legend(loc='upper right')
# plt.show()

axes[1,1].set_title('Closeness centrality', fontsize=20)
ax = sns.kdeplot(closeness_centrality.values(),alpha=0.5, ax=axes[1,1], label='Original')
sns.kdeplot(closeness_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized')
axes[1,1].legend(loc='upper right')
plt.savefig(f'Figures/centrality_comparison.pdf', dpi=300)

plt.show()

In [None]:
# print averace closeness centrality for reference.
print(f'Average closeness centrality: {np.average(list(closeness_centrality.values())):.4f}')
print(f'Average closeness centrality (randomized): {np.average(list(closeness_centrality_RAND.values())):.4f}')

Due to high average degree, little differences are discernible.
so even though we reshuffle the edges and the nodes essentially lose their meaning
(nodes are results from random draws from samples, so even though we can reassign their original identity, they don't mean anything)
, the centralities don't change that much, because the fundamental degree distribution remains more or less the same.
This also means, applied to the context of airports, that just performing a random draw of a air traffic network,
we don't see any signficant differences, at least regarding to the centrality measures.
Or in other words, there is no reason, why an airport `Charles the Gaulle` should be a hub, and not some provincial airport in Greenland,
other than the underlying infrastructural, historical and cultural context, so everything except the airports themselves is important.

Even though the average closeness centrality significantly, looking at the density functions, there are some meaningful changes,
i.e. over 0.1 the distribution has been shifted by not an insignificant amount, though,
on average, that seems to be cancelled out by the higher density distribution around 0.