In [None]:
import numpy as np
import networkx as nx
import pandas as pd

In [None]:
cols = ['Airport ID', 'Name', 'City', 'Country', 'IATA', 'ICAO', 'Latitude', 'Longitude', 'Altitude', 'Timezone', 'DST',
        'TZ', 'Type', 'Source']

df_airports = pd.read_table('Data/airports.dat', sep=',', names=cols)

cols = ['Airline', 'Airline ID', 'Source Airport', 'Source Airport ID', 'Destination Airport', 'Destination Airport ID',
        'Codeshare', 'Stops', 'Equipment']
df_routes = pd.read_table('Data/routes.dat', sep=',', names=cols)

In [None]:
# remove all routes which have an intermediate stop
df_routes.drop(df_routes[df_routes.Stops >= 1].index, inplace=True)

# drop stops since is constant now, as well as equipment & codeshare
df_routes.drop(columns=['Stops', 'Codeshare', 'Equipment', 'Airline'], inplace=True)

# replace all invalid values with nan
df_routes.replace('\\N', np.NaN, inplace=True)

# drop all nan values that occur in source / destination ids
df_routes.dropna(subset=['Source Airport ID', 'Destination Airport ID'], inplace=True)

# drop columns from airports that are not needed
df_airports.drop(columns=['City', 'Country', 'IATA', 'ICAO', 'Timezone', 'DST', 'TZ', 'Type', 'Source'], inplace=True)

# convert dtypes to numeric for ids
df_routes['Source Airport ID'] = pd.to_numeric(df_routes['Source Airport ID'])
df_routes['Destination Airport ID'] = pd.to_numeric(df_routes['Destination Airport ID'])

# merge both together for source airport data
# no matches get dropped, e.g. 7167
df = pd.merge(df_routes, df_airports, left_on='Source Airport ID', right_on='Airport ID')

# drop source airport ID since redundant
df.drop(columns=['Airport ID'], inplace=True)

# rename columns for source airport
df.rename(columns={'Name': 'Source Airport Name', 'Latitude': 'Source Latitude', 'Longitude': 'Source Longitude',
                   'Altitude': 'Source Altitude'}, inplace=True)

# merge both together for destination airport data
# no matches get dropped
df_merged = pd.merge(df, df_airports, left_on='Destination Airport ID', right_on='Airport ID')

# drop source airport ID since redundant
df_merged.drop(columns=['Airport ID'], inplace=True)

# rename columns for destination airport
df_merged.rename(columns={'Name': 'Destination Airport Name', 'Latitude': 'Destination Latitude',
                          'Longitude': 'Destination Longitude', 'Altitude': 'Destination Altitude'}, inplace=True)

In [None]:
# read as directed graph
G = nx.from_pandas_edgelist(df_merged, source='Source Airport ID', target='Destination Airport ID',
                            create_using=nx.DiGraph)
print(G.is_directed())

In [None]:
# read airport data as dict with ID as index so that we can assign node attributes
airports_dict = df_airports.set_index('Airport ID').T.to_dict()

# assign node attributes
nx.set_node_attributes(G, airports_dict)

# write graph data
nx.write_gml(G, 'Graphs/airlines.gml')