In [None]:
import copy
import numpy as np
import scipy as sp
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
import pandas as pd
import seaborn as sns

from NEMtropy import DirectedGraph

In [None]:
# @formatter:off
%store -r df_airports
%store -r airports_dict
%store -r df_merged
# @formatter:on
GG = nx.read_gml('Graphs/airlines.gml')

In [None]:
# build dbcm - directed binary configuration model, samples from distribution
adj_g = nx.to_numpy_array(GG)
edges = np.array(GG.edges)
graph_d = DirectedGraph(edgelist=edges)
graph_d.solve_tool(model="dcm_exp")
graph_d.ensemble_sampler(1, cpu_n=4, output_dir='dbcm/')

In [None]:
# read sample
edgelist_dbcm = np.loadtxt(f"dbcm/0.txt", dtype=str)
GG_RANDOMIZED = nx.DiGraph()
GG_RANDOMIZED.add_edges_from(edgelist_dbcm)

In [None]:
# measure centrality measures
degree_centrality = nx.degree_centrality(GG)
closeness_centrality = nx.closeness_centrality(GG)
betweenness_centrality = nx.betweenness_centrality(GG)
eigenvector_centrality = nx.eigenvector_centrality(GG)

# set node attributes
nx.set_node_attributes(GG, closeness_centrality, 'closeness centrality')
nx.set_node_attributes(GG, degree_centrality, 'degree centrality')
nx.set_node_attributes(GG, betweenness_centrality, 'betweenness centrality')
nx.set_node_attributes(GG, eigenvector_centrality, 'eigenvector centrality')

# build arrays for easier plotting
centralityArr = [degree_centrality, closeness_centrality,
                 betweenness_centrality, eigenvector_centrality]
centralityNames = ['degree', 'closeness', 'betweenness', 'eigenvector']

In [None]:
# measure centrality measures of randomized
degree_centrality_RAND = nx.degree_centrality(GG_RANDOMIZED)
closeness_centrality_RAND = nx.closeness_centrality(GG_RANDOMIZED)
betweenness_centrality_RAND = nx.betweenness_centrality(GG_RANDOMIZED)
eigenvector_centrality_RAND = nx.eigenvector_centrality(GG_RANDOMIZED)

nx.set_node_attributes(GG_RANDOMIZED, closeness_centrality, 'closeness centrality')
nx.set_node_attributes(GG_RANDOMIZED, degree_centrality, 'degree centrality')
nx.set_node_attributes(GG_RANDOMIZED, betweenness_centrality, 'betweenness centrality')
nx.set_node_attributes(GG_RANDOMIZED, eigenvector_centrality, 'eigenvector centrality')

# build arrays for easier plotting
centralityArrRAND = [degree_centrality_RAND, closeness_centrality_RAND,
                 betweenness_centrality_RAND, eigenvector_centrality_RAND]
centralityNames = ['degree', 'closeness', 'betweenness', 'eigenvector']

In [None]:
# dbcm takes as constraint in & out degree. visualize as reference.

G_DEGS = [GG.degree()[i] for i in GG.nodes()]
G_DEGS_RAND = [GG_RANDOMIZED.degree()[i] for i in GG_RANDOMIZED.nodes()]
fig = plt.figure(dpi=300)
ax = sns.kdeplot(G_DEGS,alpha=0.5)
sns.kdeplot(G_DEGS_RAND, ax=ax, alpha=0.5)
plt.title('Degree Distribution')
plt.show()

GG_in_degree_dict = dict(GG.in_degree)
G_DEGS = [ GG_in_degree_dict[i] for i in GG.nodes() ]
GG_in_degree_dict = dict(GG_RANDOMIZED.in_degree)
G_DEGS_RAND = [ GG_in_degree_dict[i] for i in GG_RANDOMIZED.nodes() ]

fig = plt.figure(dpi=300)
ax = sns.kdeplot(G_DEGS,alpha=0.5)
sns.kdeplot(G_DEGS_RAND, ax=ax, alpha=0.5)
plt.title('In Degree Distribution')
plt.show()

GG_in_degree_dict = dict(GG.out_degree)
G_DEGS = [ GG_in_degree_dict[i] for i in GG.nodes() ]
GG_in_degree_dict = dict(GG_RANDOMIZED.out_degree)
G_DEGS_RAND = [ GG_in_degree_dict[i] for i in GG_RANDOMIZED.nodes() ]

fig = plt.figure(dpi=300)
ax = sns.kdeplot(G_DEGS,alpha=0.5)
sns.kdeplot(G_DEGS_RAND, ax=ax, alpha=0.5)
plt.title('Out Degree Distribution')
plt.show()

In [None]:
myarray = list(betweenness_centrality.values())

print(np.isnan(myarray).any())
print(np.isinf(myarray).any())
mm = min(myarray)

# note => have 0's, cannot use log here !!

In [None]:
stat='density'
# build centrality comparison subplots.
# if using seaborn, not need to use log space for binning => https://stackoverflow.com/questions/69573823/log-scale-true-in-seaborn-histplot

fig, axes = plt.subplots(2, 2, figsize=(12,8), constrained_layout=True)
axes[0,0].set_title('Degree centrality', fontsize=20)
ax = sns.histplot(degree_centrality.values(),alpha=0.5, ax=axes[0,0], label='Original', bins=20, log_scale=(True, True), stat=stat)
sns.histplot(degree_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized', palette='Accent', bins=20, log_scale=(True, True), stat=stat)
axes[0,0].legend(loc='upper right')

arr = np.array(list(betweenness_centrality.values()))
arrRand = np.array(list(betweenness_centrality_RAND.values()))

ax = sns.histplot(betweenness_centrality.values(), alpha=0.5, ax=axes[0,1], label='Original', log_scale=(False, True), bins=20, stat=stat)
sns.histplot(betweenness_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized',  log_scale=(False, True), palette='Accent', bins=20,stat=stat)
axes[0,1].set_title('Betweenness centrality', fontsize=20)
axes[0,1].legend(loc='upper right')
#
axes[1,0].set_title('Eigenvector centrality', fontsize=20)
ax = sns.histplot(eigenvector_centrality.values(),alpha=0.5, ax=axes[1,0], label='Original', stat=stat, log_scale=(False, True), bins=30)
sns.histplot(eigenvector_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized', stat=stat, log_scale=(False, True), palette='Accent', bins=30)
axes[1,0].legend(loc='upper right')
#

axes[1,1].set_title('Closeness centrality', fontsize=20)
ax = sns.histplot(closeness_centrality.values(), alpha=0.5, ax=axes[1,1], label='Original', stat=stat, log_scale=(False, True), bins=30)
sns.histplot(closeness_centrality_RAND.values(), ax=ax, alpha=0.5, label='Randomized', stat=stat, log_scale=(False, True), palette='Accent', bins=30)
axes[1,1].legend(loc='upper right')
#
plt.savefig(f'Figures/centrality_comparison.pdf', dpi=300)

In [None]:
# print average closeness centrality for reference.
print(f'Average closeness centrality: {np.average(list(closeness_centrality.values())):.4f}')
print(f'Average closeness centrality (randomized): {np.average(list(closeness_centrality_RAND.values())):.4f}')

# print average betweenness centrality for reference.
print(f'Average betweenness centrality: {np.average(list(betweenness_centrality.values())):.4f}')
print(f'Average betweenness centrality (randomized): {np.average(list(betweenness_centrality_RAND.values())):.4f}')

# print average eigenvector centrality for reference.
print(f'Average eigenvector centrality: {np.average(list(eigenvector_centrality.values())):.4f}')
print(f'Average eigenvector centrality (randomized): {np.average(list(eigenvector_centrality_RAND.values())):.4f}')

# print average closeness centrality for reference.
print(f'Average closeness centrality: {np.average(list(closeness_centrality.values())):.4f}')
print(f'Average closeness centrality (randomized): {np.average(list(closeness_centrality_RAND.values())):.4f}')

Due to high average degree, little differences are discernible.
so even though we reshuffle the edges and the nodes essentially lose their meaning
(nodes are results from random draws from samples, so even though we can reassign their original identity, they don't mean anything)
, the centralities don't change that much, because the fundamental degree distribution remains more or less the same.
This also means, applied to the context of airports, that just performing a random draw of a air traffic network,
we don't see any signficant differences, at least regarding to the centrality measures.
Or in other words, there is no reason, why an airport `Charles the Gaulle` should be a hub, and not some provincial airport in Greenland,
other than the underlying infrastructural, historical and cultural context, so everything except the airports themselves is important.

Even though the average closeness centrality significantly, looking at the density functions, there are some meaningful changes,
i.e. over 0.1 the distribution has been shifted by not an insignificant amount, though,
on average, that seems to be cancelled out by the higher density distribution around 0.


In [None]:
# if want to check individual nodes, we need to remap node id's.
# directed graph class uses different node indices than what we originally had.

# need to remap to our airport ids back to be able to do comparisons.
degree_centrality_RAND2 = {}
for k,v in degree_centrality_RAND.items():
    realkey = graph_d.nodes_dict.get(int(k))
    degree_centrality_RAND2[realkey] = v

# set to 0 if does not exist
degree_centrality_RAND3 = copy.deepcopy(degree_centrality_RAND2)
for k,v in degree_centrality.items():
    if not k in degree_centrality_RAND3:
        degree_centrality_RAND3[k] = 0

# betweenness
betweenness_centrality_RAND2 = {}
for k,v in betweenness_centrality_RAND.items():
    realkey = graph_d.nodes_dict.get(int(k))
    betweenness_centrality_RAND2[realkey] = v

# set to 0 if does not exist
betweenness_centrality_RAND3 = copy.deepcopy(betweenness_centrality_RAND2)
for k,v in betweenness_centrality.items():
    if not k in betweenness_centrality_RAND3:
        betweenness_centrality_RAND3[k] = 0

# eigenv
eigenvector_centrality_RAND2 = {}
for k,v in eigenvector_centrality_RAND.items():
    realkey = graph_d.nodes_dict.get(int(k))
    eigenvector_centrality_RAND2[realkey] = v

# set to 0 if does not exist
eigenvector_centrality_RAND3 = copy.deepcopy(eigenvector_centrality_RAND2)
for k,v in eigenvector_centrality.items():
    if not k in eigenvector_centrality_RAND3:
        eigenvector_centrality_RAND3[k] = 0

# closeness
closeness_centrality_RAND2 = {}
for k,v in closeness_centrality_RAND.items():
    realkey = graph_d.nodes_dict.get(int(k))
    closeness_centrality_RAND2[realkey] = v

# set to 0 if does not exist
closeness_centrality_RAND3 = copy.deepcopy(closeness_centrality_RAND2)
for k,v in closeness_centrality.items():
    if not k in closeness_centrality_RAND3:
        closeness_centrality_RAND3[k] = 0

fig = plt.figure(dpi=300)
plt.scatter(degree_centrality.values(), degree_centrality_RAND3.values(), alpha=0.3)
plt.title('Degree Centrality')
plt.show()

In [None]:
degreeArr = [v for k,v in degree_centrality.items()]
degreeArrR = [v for k,v in degree_centrality_RAND3.items()]
closenessArr = [v for k,v in closeness_centrality.items()]
closenessArrR = [v for k,v in closeness_centrality_RAND3.items()]
betweennessArr = [v for k,v in betweenness_centrality.items()]
betweennessArrR = [v for k,v in betweenness_centrality_RAND3.items()]
eigenvArr = [v for k,v in eigenvector_centrality.items()]
eigenvArrR = [v for k,v in eigenvector_centrality_RAND3.items()]


In [None]:
# compute betweenness scores
peaDegree = sp.stats.pearsonr(betweennessArr,betweennessArrR)[0]
peaBetweenness = sp.stats.pearsonr(degreeArr,degreeArrR)[0]
peaCloseness = sp.stats.pearsonr(closenessArr,closenessArrR)[0]
peaEigenv = sp.stats.pearsonr(eigenvArr,eigenvArrR)[0]

In [None]:
# scatter plots of randomized vs original with additional pearson's correlation shown between the two.

# note: randomization should not lose nodes using dbcm. this result is therefore meaningless.
llim=0
ulim=0.5
fig, axs = plt.subplots(2, 2, constrained_layout=True, figsize=(12,10))
axs[0,0].scatter(betweennessArr, betweennessArrR, label=f"$r={peaBetweenness:.2f}$", s=10, alpha=0.6)
axs[0,0].set_xlabel('Betweenness' , fontsize=14)
axs[0,0].set_ylabel('Betweenness - Randomized', fontsize=14)
axs[0,0].legend(loc='upper left', frameon=True, fontsize=24)
axs[0,0].set_xlim(llim, ulim)
axs[0,0].set_ylim(llim, ulim)

axs[0,1].scatter(degreeArr, degreeArrR, label=f"$r={peaDegree:.2f}$", s=10, alpha=0.6)
axs[0,1].set_xlabel('Degree' , fontsize=14)
axs[0,1].set_ylabel('Degree - Randomized', fontsize=14)
axs[0,1].legend(loc='upper left', frameon=True, fontsize=24)
axs[0,1].set_xlim(llim, ulim)
axs[0,1].set_ylim(llim, ulim)

axs[1,0].scatter(closenessArr, closenessArrR, label=f"$r={peaCloseness:.2f}$",s=10, alpha=0.6)
axs[1,0].set_xlabel('Closeness' , fontsize=14)
axs[1,0].set_ylabel('Closeness - Randomized', fontsize=14)
axs[1,0].legend(loc='upper left', frameon=True, fontsize=24)
axs[1,0].set_xlim(llim, ulim)
axs[1,0].set_ylim(llim, ulim)

axs[1,1].scatter(eigenvArr, eigenvArrR, label=f"$r={peaEigenv:.2f}$", s=10, alpha=0.6)
axs[1,1].set_xlabel('Eigenvector' , fontsize=14)
axs[1,1].set_ylabel('Eigenvector - Randomized', fontsize=14)
axs[1,1].legend(loc='upper left', frameon=True, fontsize=24)
axs[1,1].set_xlim(llim, ulim)
axs[1,1].set_ylim(llim, ulim)
plt.suptitle('Centrality Comparison Original - Randomized', fontsize=24)
# plt.savefig(f'Figures/centrality_scatters.pdf', dpi=300)

# plt.show()