In [None]:
import pandas as pd
import numpy as np
import networkx as nx


airports = pd.read_csv('Airports.csv', encoding = "ISO-8859-1")
flights = pd.read_excel('Flight Data.xlsx')

print(f'Number of Airports : {airports.shape[0]},\nNumber of flights : {flights.shape[0]}\n')

print('Airports examples : \n', airports.head())
print('\nFlights examples : \n' , flights.head())

print('\nChecking for missing values : \n',airports.isna().sum(),flights.isna().sum()) 

In [None]:
# Aim : Draw maps of countries of interest aswell as a world map to get a global view

from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt

m = Basemap(projection='cyl', resolution=None,
            llcrnrlat=-90, urcrnrlat=90,
            llcrnrlon=-180, urcrnrlon=180, )

fig = plt.figure(figsize=(10, 10), edgecolor='w')

uk_map = Basemap(
        projection='merc',
        llcrnrlon=-15,
        llcrnrlat=45,
        urcrnrlon=10,
        urcrnrlat=60,
        lat_ts=0,
        resolution='i',
        suppress_ticks=True)
uk_map.drawcountries()
uk_map.drawstates()
uk_map.bluemarble()
plt.show()

fig = plt.figure(figsize=(100, 200), edgecolor='w')
usa_map = Basemap(projection='merc', resolution='i',
         llcrnrlon=-175,
         llcrnrlat=10,
         urcrnrlon=-50,
         urcrnrlat=70,
            lat_ts=0,
            suppress_ticks=True)
usa_map.drawcountries()
usa_map.drawstates()
usa_map.bluemarble()

plt.show()


fig = plt.figure(figsize=(100, 200), edgecolor='w')
china_map = Basemap(projection='merc', resolution='i',
         llcrnrlon=70,
         llcrnrlat=10,
         urcrnrlon=140,
         urcrnrlat=50,
            lat_ts=0,
            suppress_ticks=True)
china_map.drawcountries()
china_map.drawstates()
china_map.bluemarble()

plt.show()

fig = plt.figure(figsize=(100, 200), edgecolor='w')
aus_map = Basemap(projection='merc', resolution='i',
         llcrnrlon=100,
         llcrnrlat=-45,
         urcrnrlon=170,
         urcrnrlat=0,
            lat_ts=0,
            suppress_ticks=True)
aus_map.drawcountries()
aus_map.drawstates()
aus_map.bluemarble()

plt.show()



In [None]:
# Aim : Make one data frame with the countries of interest (Australia, China, USA, UK)

Countries = ['USA','United Kingdom', 'China', 'Australia']

our_flights = flights[(flights['Source Country'].isin(Countries)) & (flights['Target Country'] == flights['Source Country'])]

print('Number of flights we are interested in in this assignement :',our_flights.shape[0])


In [None]:
# Aim : Add airport's location data for source and target for each flight while cleaning the data (Taking out name mismatch)

res_df = our_flights.copy()
# res_df[['Source Lon', 'Source Lat', 'Target Lon', 'Target Lat']] = None
drop_idx= []
world_pos = {}
uk_pos = {}
usa_pos = {}
aus_pos = {}
china_pos = {}

usa_max_lon = -40
usa_max_lat = 70
usa_min_lon = -175
usa_min_lat = 10
for idx, flight in our_flights.iterrows():
    drop = False
    source_city = flight['Source City']
    source_id =  flight['Source']
    country = flight['Source Country']
    
    target_city = flight['Target City']
    target_id =  flight['Target']
    
#     We first try to find a match in the airport's city name. If there isn't a single match, we then look at the airport's ID.
#     If nonetheless we don't find the airport, the flight data point is discarded.
    if  len(airports[airports['label'] == source_city] == 1) : 
        airport_source = airports[airports['label'] == source_city]
    elif len(airports[airports['id'] == source_id] == 1) :
        airport_source = airports[airports['id'] == source_id]
    else:
        drop_idx.append(idx)
        drop = True
        
    if  len(airports[airports['label'] == target_city] == 1) : 
        airport_target = airports[airports['label'] == target_city]
    elif len(airports[airports['id'] == target_id] == 1) :
        airport_target = airports[airports['id'] == target_id]
        
    else :
        drop_idx.append(idx)
        drop = True
    if not drop :

        source_lon = airport_source['Lon'].values[0]
        source_lat = airport_source['Lat '].values[0]
        target_lon = airport_target['Lon'].values[0]
        target_lat =  airport_target['Lat '].values[0]
                             
#         Project the positions of the airports on the world map and store them into a dictionnary
                             
        world_pos[source_id] = m(source_lon, source_lat) 
        world_pos[target_id] = m(target_lon, target_lat)  
        
#         Then project them into their associate country map

        if country == 'USA' :   
            if usa_min_lon < source_lon < usa_max_lon and usa_min_lat < source_lat < usa_max_lat and usa_min_lon < target_lon < usa_max_lon and usa_min_lat < target_lat < usa_max_lat  :
                usa_pos[source_id] = usa_map(source_lon, source_lat) 
                usa_pos[target_id] = usa_map(target_lon, target_lat) 
            else : 
                drop_idx.append(idx)

                    
        elif country == 'Australia' :
            aus_pos[source_id] = aus_map(source_lon, source_lat) 
            aus_pos[target_id] = aus_map(target_lon, target_lat)  
        elif country == 'United Kingdom' :
            uk_pos[source_id] = uk_map(source_lon, source_lat) 
            uk_pos[target_id] = uk_map(target_lon, target_lat)  
        elif country == 'China' :
            china_pos[source_id] = china_map(source_lon, source_lat) 
            china_pos[target_id] = china_map(target_lon, target_lat)  
            
        
#         Take ouf if unecessary
        # res_df.at[idx,'Source Lon'] = airport_source['Lon'].values[0]
        # res_df.at[idx,'Source Lat'] = airport_source['Lat '].values[0]
        # res_df.at[idx,'Target Lon'] = airport_target['Lon'].values[0]
        # res_df.at[idx,'Target Lat'] = airport_target['Lat '].values[0]

res_df = res_df.drop(drop_idx,axis=0)
print(f'We have dropped {len(drop_idx)} flights')


In [None]:
import pandas as pd
import networkx as nx
from itertools import chain

# Aim : Use Networkx to create graphs and project them onto our maps

def draw_map(m, scale=0.2):
    # draw a shaded-relief image
    m.shadedrelief(scale=scale)
    
    # lats and longs are returned as a dictionary
    lats = m.drawparallels(np.linspace(-90, 90, 13))
    lons = m.drawmeridians(np.linspace(-180, 180, 13))

    # keys contain the plt.Line2D instances
    lat_lines = chain(*(tup[1][0] for tup in lats.items()))
    lon_lines = chain(*(tup[1][0] for tup in lons.items()))
    all_lines = chain(lat_lines, lon_lines)
    
    # cycle through these lines and set the desired style
    for line in all_lines:
        line.set(linestyle='-', alpha=0.3, color='w')

#         World Graph
world_graph = nx.from_pandas_edgelist(res_df, source = 'Source', target = 'Target',
                        edge_attr = 'Weight',create_using = nx.DiGraph())
fig = plt.figure(figsize=(50, 100), edgecolor='w')
draw_map(m)

nx.draw_networkx_edges(G = world_graph, pos = world_pos, edge_color='g', alpha=0.2, arrows = False)
nx.draw_networkx_nodes(G = world_graph, pos = world_pos, node_color = 'r', alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*0.0005 for node in world_graph.nodes()])

plt.savefig('world_map.png')
plt.show()



In [None]:
uk_graph = nx.from_pandas_edgelist(res_df[(res_df['Source Country'] == 'United Kingdom') & (res_df['Target Country'] == 'United Kingdom')], source = 'Source', target = 'Target',
                        edge_attr = 'Weight',create_using = nx.DiGraph())
fig = plt.figure(figsize=(50, 250), edgecolor='w')
uk_map.drawcountries()
uk_map.drawstates()
uk_map.bluemarble()

nx.draw_networkx_edges(G = uk_graph, pos = uk_pos, edge_color='b', alpha=1, arrows = False)
nx.draw_networkx_nodes(G = uk_graph, pos = uk_pos, node_color = 'r', alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*0.005 for node in uk_graph.nodes()])

plt.savefig('uk_graph.png')

plt.show()

In [None]:
aus_graph = nx.from_pandas_edgelist(res_df[(res_df['Source Country'] == 'Australia') & (res_df['Target Country'] == 'Australia')], source = 'Source', target = 'Target',
                        edge_attr = 'Weight',create_using = nx.DiGraph())
fig = plt.figure(figsize=(50, 250), edgecolor='w')
aus_map.drawcountries()
aus_map.drawstates()
aus_map.bluemarble()

nx.draw_networkx_edges(G = aus_graph, pos = aus_pos, edge_color='b', alpha=1, arrows = False)
nx.draw_networkx_nodes(G = aus_graph, pos = aus_pos, node_color = 'r', alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*0.005 for node in aus_graph.nodes()])

plt.savefig('aus_graph.png')
plt.show()

In [None]:
usa_graph = nx.from_pandas_edgelist(res_df[(res_df['Source Country'] == 'USA') & (res_df['Target Country'] == 'USA')], source = 'Source', target = 'Target',
                        edge_attr = 'Weight',create_using = nx.DiGraph())
fig = plt.figure(figsize=(50, 250), edgecolor='w')
usa_map.drawcountries()
usa_map.drawstates()
usa_map.bluemarble()

nx.draw_networkx_edges(G = usa_graph, pos = usa_pos, edge_color='b', alpha=1, arrows = False)
nx.draw_networkx_nodes(G = usa_graph, pos = usa_pos, node_color = 'r', alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*0.005 for node in usa_graph.nodes()])

plt.savefig('usa_graph.png')
plt.show()

In [None]:
china_graph = nx.from_pandas_edgelist(res_df[(res_df['Source Country'] == 'China') & (res_df['Target Country'] == 'China')], source = 'Source', target = 'Target',
                        edge_attr = 'Weight',create_using = nx.DiGraph())
fig = plt.figure(figsize=(50, 250), edgecolor='w')
china_map.drawcountries()
china_map.drawstates()
china_map.bluemarble()

nx.draw_networkx_edges(G = china_graph, pos = china_pos, edge_color='b', alpha=1, arrows = False)
nx.draw_networkx_nodes(G = china_graph, pos = china_pos, node_color = 'r', alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*0.005 for node in china_graph.nodes()])

plt.savefig('china_graph.png')
plt.show()

In [None]:
countries = ['USA','United Kingdom', 'China', 'Australia']
graphs = [usa_graph, uk_graph, china_graph, aus_graph]
maps = [usa_map, uk_map, china_map, aus_map]
positions = [usa_pos, uk_pos, china_pos, aus_pos]

fig, axs = plt.subplots(2, 2, figsize=(12,12))
axs = axs.flatten()

for ax, graph,country in zip(axs,graphs,countries):
    
    # Get the degree sequence5
    degree_sequence = [d for n, d in graph.degree(weight='Weight')]

    # Sort the degree sequence in descending order
    degree_sequence = sorted(degree_sequence, reverse=True)
    
    # Get the rank of each degree
    ranks = range(1, len(degree_sequence)+1)
    
    # Plot the degree rank distribution on a log-linear scale
    ax.semilogy(ranks, degree_sequence, 'b-', marker='o')
    ax.set_xlabel('Rank')
    ax.set_ylabel('Weighted Degree (log scale)')
    title = 'Degree rank distribution for '+ country
    ax.set_title(title)

# Show the plot
plt.savefig('Degree_distribution.png')
plt.show()

fig = plt.figure(figsize=(12, 12), edgecolor='w')
# Get the degree sequence5
degree_sequence = [d for n, d in world_graph.degree(weight='Weight')]
# Sort the degree sequence in descending order
degree_sequence = sorted(degree_sequence, reverse=True)

# Get the rank of each degree
ranks = range(1, len(degree_sequence)+1)

# Plot the degree rank distribution on a log-linear scale
plt.semilogy(ranks, degree_sequence, 'b-', marker='o')
plt.xlabel('Rank')
plt.ylabel('Weighted Degree (log scale)')
title = 'Degree rank distribution for all countries'
plt.title(title)
plt.savefig('degree_distribution_all.png')
plt.show()



In [None]:
fig, axs = plt.subplots(2, 2, figsize=(12,12))
axs = axs.flatten()

for ax, graph,country in zip(axs,graphs,countries):
    
    # Compute the betweenness centrality of the nodes
    betweenness = nx.betweenness_centrality(graph)
    
    # Get the degree and betweenness of each node
    degrees = [graph.degree(n) for n in graph.nodes()]
    betweennesses = [betweenness[n] for n in graph.nodes()]
    
    # Get the rank of each degree
    ranks = range(1, len(degree_sequence)+1)
    
    # Plot the degree rank distribution on a log-linear scale
    ax.scatter(degrees, betweennesses)
    ax.set_xlabel('Degree')
    ax.set_ylabel('Betweenness')
    title = 'Degree vs betweenness distribution for '+ country
    ax.set_title(title)

# Show the plot
plt.savefig('Degree_Betweenness.png')
plt.show()

fig = plt.figure(figsize=(12, 12), edgecolor='w')

# Compute the betweenness centrality of the nodes
betweenness = nx.betweenness_centrality(graph)

# Get the degree and betweenness of each node
degrees = [graph.degree(n) for n in graph.nodes()]
betweennesses = [betweenness[n] for n in graph.nodes()]

# Get the rank of each degree
ranks = range(1, len(degree_sequence)+1)

# Plot the degree rank distribution on a log-linear scale
plt.scatter(degrees, betweennesses)
plt.xlabel('Degree')
plt.ylabel('Betweenness')
title = 'Degree vs betweenness for all countries'
plt.title(title)
plt.savefig('degree_Betweenness_all.png')


In [None]:
# Aim : compute assortativity for each graph and export as a latex table

r_df = pd.DataFrame(columns=['Graph','r','r_in','r_out'])

i=0
for graph,country in zip(graphs,countries):
    
    # Compute the assortativity coefficients
    r = nx.degree_assortativity_coefficient(graph)
    r_in = nx.degree_assortativity_coefficient(graph, x='in', y='in')
    r_out = nx.degree_assortativity_coefficient(graph, x='out', y='out')
    
    # Update dataframe
    r_df.loc[i] = [country,r,r_in,r_out]
    i+=1
    
r = nx.degree_assortativity_coefficient(world_graph)
r_in = nx.degree_assortativity_coefficient(world_graph, x='in', y='in')
r_out = nx.degree_assortativity_coefficient(world_graph, x='out', y='out')

# Update dataframe
r_df.loc[i] = ["Global Network",r,r_in,r_out]
print(r_df.to_latex())

In [None]:
from networkx import core_number

# Aim : Apply graphical method to find core communities

fig, axs = plt.subplots(2, 2, figsize=(20,20))
axs = axs.flatten()
core_communities = {'USA' : [], 'United Kingdom' : [], 'China' : [], 'Australia' : []}

for ax, graph,country in zip(axs,graphs,countries):
    
    # Initialize a list to store the values of k+ for each node
    k_plus = []
    
    # Compute the degree of each node
    degrees = graph.degree(weight='Weight')
    
    # Sort the nodes based on degree
    sorted_nodes = sorted(degrees,key=lambda x: degrees[x[0]], reverse=True)
    
    # Iterate over the sorted list of nodes
    for node in sorted_nodes:
        
      # Get the list of neighbors for the current node
      neighbors = list(graph.neighbors(node[0]))
        
      # Count the number of neighbors with a higher degree
      k_plus.append(sum([degrees[n] > degrees[node[0]] for n in neighbors]))
    
        
    #     Smooth values averaging each value over 5 values
    averaged_k_plus = np.convolve(k_plus, np.ones(5)/5, mode='valid')
    
    ranks = range(1, len(averaged_k_plus)+1)
    
    argmax= np.argmax(averaged_k_plus)
    print('Core community size with our method for ',country,' : ',ranks[argmax])
    
    
    
    # Store the core community for visualization 
    core_communities[country] = [s[0] for s in sorted_nodes[:argmax-1]]
    
    # Plot 
    ax.plot(ranks, averaged_k_plus)
    ax.axvline(x=argmax, color = 'red')

    ax.set_xlabel('Rank')
    ax.set_ylabel('Number of connections to higher degree nodes')
    title = 'Core periphery structure for '+ country
    ax.set_title(title)

# Show the plot
plt.savefig('CPS.png')
plt.show()


In [None]:
# Plotting graphs generated from above graphical method

for graph,country,basemap,pos,core in zip(graphs,countries,maps,positions,core_communities.values()):
    fig = plt.figure(figsize=(50, 50), edgecolor='w')

     # Create a list of colors for each community
    colors = ['orange' if node in core else 'red' for node in graph.nodes]
    print(core)
    
    basemap.drawcountries()
    basemap.drawstates()
    basemap.bluemarble()
    if country == 'USA':
        size = 0.001
    else:
        size = 0.01
    
    nx.draw_networkx_edges(G = graph, pos =pos , edge_color='b', alpha=1, arrows = False)
    nx.draw_networkx_nodes(G = graph, pos =pos , node_color = colors, alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*size for node in graph.nodes()])
    plt.savefig(country+'_graphical')


In [None]:
# Aim : Apply Louvain's method to find core communities 

import networkx.algorithms.community as nx_comm


for graph,country,basemap,pos in zip(graphs,countries,maps,positions):
    
    partition = nx_comm.louvain_communities(graph, seed=123)
    node_to_community = {}
    i=0
    for community in partition:
        for node in community:
            node_to_community[node] = i
        i += 1
    fig = plt.figure(figsize=(50, 250), edgecolor='w')

     # Create a list of colors for each community
    colors = [plt.cm.viridis(float(i) / max(node_to_community.values())) for i in node_to_community.values()]
    
    
    break
    basemap.drawcountries()
    basemap.drawstates()
    basemap.bluemarble()
    if country == 'USA':
        size = 0.001
    else:
        size = 0.01
    
    nx.draw_networkx_edges(G = graph, pos =pos , edge_color='b', alpha=1, arrows = False)
    nx.draw_networkx_nodes(G = graph, pos =pos , node_color = colors, alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*size for node in graph.nodes()])

    fig.savefig(country)


In [None]:
for graph,country,basemap,pos in zip(graphs,countries,maps,positions):
    partition = nx_comm.louvain_communities(graph, seed=123)
    node_to_community = {}
    i=0
    for community in partition:
        for node in community:
            node_to_community[node] = i
        i += 1
   
    m=max(map(len,partition))
    community_core = [x for x in partition if len(x) == m][0]
    print('Community core size : ',len(community_core))
    # Create a list of colors for each community
    colors = ['orange' if node in community_core else 'red' for node in graph.nodes]
    
    fig = plt.figure(figsize=(50, 50), edgecolor='w')
    
    basemap.drawcountries()
    basemap.drawstates()
    basemap.bluemarble()
#     USA nodes have too high degree
    if country == 'USA':
        size = 0.001
    else:
        size = 0.01
    
    nx.draw_networkx_edges(G = graph, pos =pos , edge_color='b', alpha=1, arrows = False)
    nx.draw_networkx_nodes(G = graph, pos =pos , node_color = colors, alpha = 0.8,
                       node_size = [res_df[(res_df['Source'] == node) | (res_df['Target'] == node)]['Weight'].sum()*size for node in graph.nodes()])
    plt.show()
    fig.savefig(country)

