In [1]:
import pandas as pd
import numpy as np
import igraph as ig
import pickle
import pycountry
import os
import matplotlib.pyplot as plt

In [2]:
airports_df = pd.read_csv('./data/airports.csv', sep=',', engine='python')
airports_df.head()

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports


In [3]:
routes_df = pd.read_csv('./data/routes.csv', sep=',', engine='python')
routes_df.head()

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2
2,2B,410,ASF,2966,MRV,2962,,0,CR2
3,2B,410,CEK,2968,KZN,2990,,0,CR2
4,2B,410,CEK,2968,OVB,4078,,0,CR2


In [4]:
def is_eu(country_name):
    try:
        country = pycountry.countries.lookup(country_name)
        return country.alpha_2 in ('AD', 'AL', 'AM', 'AT', 'AZ', 'BA', 'BE', 'BG', 'BY', 'CH', 'CY', 'CZ', 
                                   'DE', 'DK', 'EE', 'ES', 'FI', 'FO', 'FR', 'GB', 'GE', 'GI', 'GR', 'HR', 
                                   'HU', 'IE', 'IS', 'IT', 'KZ', 'LI', 'LT', 'LU', 'LV', 'MC', 'MD', 'ME', 
                                   'MK', 'MT', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'RU', 'SE', 'SI', 'SK', 
                                   'SM', 'TR', 'UA', 'VA')
    except:
        return False

In [5]:
airports_df = airports_df[[is_eu(r['Country']) for _, r in airports_df.iterrows()]]
airports_df = airports_df.reset_index(drop=True)
airports_df.head()

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone,Type,Source
0,11,Akureyri Airport,Akureyri,Iceland,AEY,BIAR,65.660004,-18.072701,6,0,N,Atlantic/Reykjavik,airport,OurAirports
1,12,Egilsstaðir Airport,Egilsstadir,Iceland,EGS,BIEG,65.283302,-14.4014,76,0,N,Atlantic/Reykjavik,airport,OurAirports
2,13,Hornafjörður Airport,Hofn,Iceland,HFN,BIHN,64.295601,-15.2272,24,0,N,Atlantic/Reykjavik,airport,OurAirports
3,14,Húsavík Airport,Husavik,Iceland,HZK,BIHU,65.952301,-17.426001,48,0,N,Atlantic/Reykjavik,airport,OurAirports
4,15,Ísafjörður Airport,Isafjordur,Iceland,IFJ,BIIS,66.058098,-23.1353,8,0,N,Atlantic/Reykjavik,airport,OurAirports


In [6]:
airports_df[airports_df['Airport ID'] == 1382]

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone,Type,Source
677,1382,Charles de Gaulle International Airport,Paris,France,CDG,LFPG,49.012798,2.55,392,1,E,Europe/Paris,airport,OurAirports


In [7]:
graph = ig.Graph(directed=True)
id2index_map = {}

for index, row in airports_df.iterrows():
    id2index_map[row['Airport ID']] = index
    graph.add_vertex(name=row['Name'], ID=row['Airport ID'], lat=row['Latitude'], lon=row['Longitude'])

In [8]:
edges = []
for index, row in routes_df.iterrows():
    if not row['Source airport ID'].strip().isdigit() or not row['Destination airport ID'].strip().isdigit():
        continue
    
    aid = int(row['Source airport ID'])
    aid2 = int(row['Destination airport ID'])
    if 1328 in (aid, aid2):
        print(aid, aid2)
        
    if aid not in id2index_map or aid2 not in id2index_map:
        continue
        
    edges.append((id2index_map[aid], id2index_map[aid2]))

graph.add_edges(edges)

In [9]:
len([(l,r) for l,r in edges if 677 in (l,r)]) # degree of the paris airport

490

In [10]:
f'Nodes: {graph.vcount()}, Edges: {graph.ecount()}'

'Nodes: 1544, Edges: 15341'

In [11]:
id2index_map[1328], graph.vs[623]

(623,
 igraph.Vertex(<igraph.Graph object at 0x00000170004E8140>, 623, {'ID': 1328, 'lat': 47.85020065307617, 'lon': 3.4971098899841317, 'name': 'Auxerre-Branches Airport'}))

In [12]:
nodes_with_degree_zero = [vertex for vertex, degree in enumerate(graph.degree()) if degree == 0]
graph.delete_vertices(nodes_with_degree_zero)
len(nodes_with_degree_zero)

1030

In [13]:
f'Nodes: {graph.vcount()}, Edges: {graph.ecount()}'

'Nodes: 514, Edges: 15341'

In [14]:
avg_degree = np.array(graph.degree()).mean()
avg_degree

59.69260700389105

In [15]:
sccs = graph.connected_components(mode='strong')
lcc = sccs.giant()
g = lcc

f'Nodes: {g.vcount()}, Edges: {g.ecount()}'

'Nodes: 508, Edges: 15333'

In [16]:
with open("./data/graph.pickle", "wb") as f:
    pickle.dump(g, f)

In [17]:
g.vs.find(ID=1382), g.degree(271)

(igraph.Vertex(<igraph.Graph object at 0x00000170004E8540>, 271, {'ID': 1382, 'lat': 49.012798, 'lon': 2.55, 'name': 'Charles de Gaulle International Airport'}),
 490)

In [18]:
max(g.degree()) # max degree in the graph

630