In [None]:
import copy
import numpy as np
import scipy as sp
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
import pandas as pd
from mpl_toolkits.basemap import Basemap
import seaborn as sns



Read stored data from the data_preparation notebook (that needs to have run beforehand).

In [None]:
# @formatter:off
%store -r df_airports
%store -r airports_dict
%store -r df_merged
# @formatter:on
GG = nx.read_gml('Graphs/airlines.gml')


Calculate the centralities metrics for the network, and set them as node attributes, and for easier plotting,
we include the centralities in an array.

In [None]:
degree_centrality = nx.degree_centrality(GG)
closeness_centrality = nx.closeness_centrality(GG)
betweenness_centrality = nx.betweenness_centrality(GG)
eigenvector_centrality = nx.eigenvector_centrality(GG)

nx.set_node_attributes(GG, closeness_centrality, 'closeness centrality')
nx.set_node_attributes(GG, degree_centrality, 'degree centrality')
nx.set_node_attributes(GG, betweenness_centrality, 'betweenness centrality')
nx.set_node_attributes(GG, eigenvector_centrality, 'eigenvector centrality')


centralityArr = [degree_centrality, closeness_centrality,
                 betweenness_centrality, eigenvector_centrality]
centralityNames = ['degree', 'closeness', 'betweenness', 'eigenvector']


We need to convert the airports long/lat positions into the map projection

c.f. https://matplotlib.org/basemap/users/mapcoords.html

In [None]:
map = Basemap(projection='cyl', resolution=None,
              llcrnrlat=-90, urcrnrlat=90,
              llcrnrlon=-180, urcrnrlon=180, )

lats = [k[1]['Latitude'] for k in airports_dict.items()]
longs = [k[1]['Longitude'] for k in airports_dict.items()]

pos = {}
for n in GG.nodes():
    x,y = map(GG.nodes[n]['Longitude'], GG.nodes[n]['Latitude'])
    pos[n] = (x,y)

## Centralities
Plot each previously computed centrality on the world map.
These plots use the `viridis` colormap, c.f. https://matplotlib.org/stable/tutorials/colors/colormaps.html
where purple values are low, blue-greenish in the middle and high value are mapped to yellow.

In [None]:
for centrality, name in zip(centralityArr, centralityNames):
    minCentralityVal = min(centrality.values())
    maxCentralityVal = max(centrality.values())
    print(name, minCentralityVal, maxCentralityVal)
    # normalize values into [0,1] for color map
    ff = np.array(list(nx.get_node_attributes(
        GG, f'{name} centrality').values()))
    CC = list((ff - minCentralityVal) / (maxCentralityVal - minCentralityVal))

    fig2 = plt.figure(dpi=1000, figsize=(8, 6))
    map = Basemap(projection='cyl', resolution=None,
                  llcrnrlat=-90, urcrnrlat=90,
                  llcrnrlon=-180, urcrnrlon=180, )

    map.bluemarble(alpha=0.9)
    # draw twice: first one is lines only with alpha 0.1, second one is nodes only
    nx.draw_networkx(GG, pos=pos, node_size=1, width=0.005, node_color=CC, vmin=min(CC), vmax=max(CC),
                     cmap=plt.cm.get_cmap('viridis'), arrows=False, with_labels=False, edge_color='w', alpha=0.1,
                     node_shape='.')
    nx.draw_networkx(GG, pos=pos, node_size=1, width=0.0, node_color=CC, vmin=min(CC), vmax=max(CC),
                     cmap=plt.cm.get_cmap('viridis'), arrows=False, with_labels=False, edge_color='w', alpha=0.8,
                     node_shape='.')

    plt.title(f'{name.capitalize()} Centrality')
    plt.savefig(f'Figures/{name}_centrality_world.pdf', dpi=1000)


## Degree Distribution

In [None]:
G_DEGS = [GG.degree()[i] for i in GG.nodes()]
bins = np.logspace(np.log10(min(G_DEGS)), np.log10(max(G_DEGS) + 1), 30)
# take the average value of all degrees
AVG_VAL = np.average(G_DEGS)

In [None]:
fig = plt.figure(dpi=300, figsize=(6, 6))
plt.hist(G_DEGS, bins=bins, density=True, alpha=0.8)
plt.axvline(x=AVG_VAL, color='r', label='Average Degree', c='orange')
plt.xlabel('Degree $k$', fontsize=14)
plt.ylabel('$\mathcal{P}(k)$', fontsize=14)
plt.title('Degree Distribution', fontsize=20)
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.savefig('Figures/degree_distribution.pdf', dpi=1000)
plt.show()

In [None]:
fig, ax = plt.subplots(dpi=300, figsize=(8, 4))
plt.plot(degree_centrality.keys(), degree_centrality.values(), linewidth=0, marker='.', label='Degree Centrality',alpha=0.3)
plt.plot(closeness_centrality.keys(), closeness_centrality.values(), linewidth=0, marker='.',
label='Closeness Centrality', alpha=0.3)
plt.plot(betweenness_centrality.keys(), betweenness_centrality.values(), linewidth=0, marker='.',
label='Betweenness Centrality', alpha=0.3)
plt.plot(eigenvector_centrality.keys(), eigenvector_centrality.values(), linewidth=0, marker='.',
label='Eigenvector Centrality', alpha=0.3)
plt.title('Centralities', fontsize=20)
ax.xaxis.set_major_locator(plt.MaxNLocator(3))
plt.xlabel('Airport ID $n$')
plt.ylabel('Centrality value')
plt.legend()
plt.savefig('Figures/centralities.pdf', dpi=1000)
plt.show()

build a dataframe with rows as airport ids and columns as the centralities.
This allows us to use seaborn's pair plotting feature.

In [None]:
centralityDict = {}
for k in GG.nodes():
    centralityDict[k] = {}
    centralityDict[k]['betweenness_centrality'] = betweenness_centrality[k]
    centralityDict[k]['closeness_centrality'] = closeness_centrality[k]
    centralityDict[k]['degree_centrality'] = degree_centrality[k]
    centralityDict[k]['eigenvector_centrality'] = eigenvector_centrality[k]
df_centralities = pd.DataFrame.from_dict(centralityDict, orient='index')

In [None]:
fig = plt.figure(dpi=100, figsize=(10,10))
f = sns.pairplot(df_centralities,
                 # kind='hist',
                 plot_kws={
    'alpha':0.6,
    # 'line_kws':{'color':'red'}, 'scatter_kws': {'alpha': 0.1}
}, diag_kind = 'kde'
                 )
f.fig.suptitle('Centralities', y=1.04, fontsize=24)
plt.savefig('Figures/centrality_pairplot.pdf', dpi=100)

Get degrees for each node. We want to find the airports with the highest degrees.
Also,

In [None]:
degrees = nx.degree(GG)

In [None]:
betweenness_centrality_unnormalized = nx.betweenness_centrality(GG, normalized=False)

In [None]:
print(min(betweenness_centrality.values()), max(betweenness_centrality.values()))
print(min(betweenness_centrality_unnormalized.values()), max(betweenness_centrality_unnormalized.values()))

# sort betweenness centrality to take highest values
# sortedByBetweennessCentrality = dict(sorted(betweenness_centrality.items(), key=lambda item: item[1], reverse=True))
# df_betweennesscentrality = df_centralities.nlargest(n=10, columns='betweenness_centrality')
df_centralities.index.name = 'Airport ID'
print(df_centralities.dtypes)
print(df_airports.dtypes)

In [None]:
# df_airports.set_index('Airport ID', inplace=True)
df_airports.index = df_airports.index.astype(int)
df_centralities.index = df_centralities.index.astype(int)
print(df_centralities.dtypes)
print(df_airports.dtypes)

df_1 = df_centralities.join(df_airports, how='left')

In [None]:
df_1 = df_1.nlargest(n=10, columns='betweenness_centrality')

In [None]:
fig=plt.figure()

bins = np.logspace(np.log10(min(betweenness_centrality.values())), np.log10(max(betweenness_centrality.values()) + 1), 30)
plt.hist(betweenness_centrality.values(), bins=25, density=True)
plt.xscale('log')
plt.yscale('log')
plt.show()

In [None]:
# G_DEGS = [GG.degree()[i] for i in GG.nodes()]
# sns.kdeplot(G_DEGS)