In [1]:
# 0. LOAD PACKAGES

import numpy as np
import networkx
import pandas as pd
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim

In [2]:
# 1. LOAD DATA

# Load airport names and locations
airports_df = pd.read_csv('*/Full_Merge_of_All_Unique Airports.csv')

# Load airline routes
routes_df = pd.read_csv('*/Full_Merge_of_All_Unique_Routes.csv')

In [3]:
# 2. EXPLORE DATA

print(airports_df)
print(routes_df)

                                            Label   ID   Latitude   Longitude
0                                  Goroka Airport  GKA  -6.081690  145.391998
1                                  Madang Airport  MAG  -5.207080  145.789001
2                    Mount Hagen Kagamuga Airport  HGU  -5.826790  144.296005
3                                  Nadzab Airport  LAE  -6.569803  146.725977
4     Port Moresby Jacksons International Airport  POM  -9.443380  147.220001
...                                           ...  ...        ...         ...
9070                            Liangping Airport  LIA  30.679399  107.786003
9071                           Öndörkhaan Airport  UNR  47.304860  110.609200
9072         Chengdu Tianfu International Airport  TFU  30.312520  104.441284
9073                     Chongqing Wushan Airport  WSK  31.068960  109.708958
9074                                Jilin Airport  JIL  44.002201  126.396004

[9075 rows x 4 columns]
      Airline ID Departure Destination


Data exploration shows that airport locations are given as latitude and longitude values. However, in order to determine whether an airport is located in a developed or in a developing country, those values are worthless and must therefore be turned into country names.

In [9]:
# 3. DETERMINE COUNTRY NAMES

# Turn dataframe into array in order to speed up loop speed and make all entries a string (=necessary for Nominatim API)
airports_ar = np.asarray(airports_df, dtype='str')
routes_ar = np.asarray(routes_df, dtype='str')

# Initialize Nominatim API
geolocator = Nominatim(user_agent="geoapiExercises")

# Use latitude and longitude values for determining the country with the help of Nominatim API
# This can take a while. It is also possible that the code might crash, showing a 502 Error, which means that 
# the Nominatim API received too many requests. In this case, run this cell again.
countries = []
for row in airports_ar:
    location = geolocator.reverse(row[2]+","+row[3], language = 'en')
    if location == None:
        countries.append('None')
    else:
        address = location.raw['address']
        country = address.get('country')
        countries.append(country)
        
# Add 'countries' as last column to 'airports_ar'
airports_ar = np.insert(airports_ar, 4, np.asarray(countries), axis=1)

In [27]:
# Identify all entries where country = 'None' and delete them
del_inds = [i for i in range(len(airports_ar)) if airports_ar[i][4] == 'None']
airports_ar = np.asarray([airports_ar[i] for i in range(len(airports_ar)) if i not in del_inds])

# Save result in dataframe, omitting latitude and longitude since they are not importan anymore
airports_df_final = pd.DataFrame(data = list(zip(airports_ar[:, 0], airports_ar[:, 1], airports_ar[:, 4])),
                                columns = ['Name', 'Airport_ID', 'Country'])

In [33]:
# 4. SAVE DATA

airports_df_final.to_csv('*/Airports_IDs_Countries.csv', index=False, header=True)