In [1]:
import json
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from math import sin, cos, sqrt, atan2, radians
from settings import *

settings = Settings()

#coordinates for starting location amsterdam schiphol airport
schiphol_lat = 52.3080392
schiphol_lon = 4.7621975

In [2]:
def get_latitude(city_name):
    try:
        geolocator = Nominatim(user_agent="airport_locator")
        location = geolocator.geocode(city_name)
        if location:
            return location.latitude
        else:
            None
    except GeocoderTimedOut:
        return None
    
def get_longitude(city_name):
    try:
        geolocator = Nominatim(user_agent="airport_locator")
        location = geolocator.geocode(city_name)
        if location:
            return location.longitude
        else:
            None
    except GeocoderTimedOut:
        return None


def is_airport_south_of_amsterdam(row):
    amsterdam_latitude = get_latitude("Amsterdam, Netherlands")
    airport_latitude = get_latitude(f"{row['destinations']}, {row['country']}")

    if amsterdam_latitude is None or airport_latitude is None:
        return False

    return airport_latitude < amsterdam_latitude

In [3]:
def calculate_distance(row, schiphol_lat, schiphol_lon):
    R = 6373.0
    lat1 = radians(schiphol_lat)
    lon1 = radians(schiphol_lon)
    lat2 = radians(row['latitude'])
    lon2 = radians(row['longitude'])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return distance

In [5]:
# Specify the path to the JSON file in data/flight_data.json
file_path = "data/flightList.json"

# Open the JSON file and load its contents
with open(file_path, 'r') as file:
    flight_list = json.load(file)


In [7]:
df_flight = pd.DataFrame(flight_list['flights'])
df_flight[['destinations', 'eu', 'visa']] = pd.DataFrame(df_flight['route'].tolist(), index=df_flight.index)
df_flight = df_flight[df_flight['serviceType'] == 'J']
df_flight['destinations'] = df_flight['destinations'].str[0]
df_flight = df_flight[['flightName', 'flightNumber', 'id','destinations', 'eu', 'scheduleTime']]
df_flight.reset_index(drop=True, inplace=True)

In [6]:
# Specify the path to the JSON file
file_path = "data/destinationList.json"

# Open the JSON file and load its contents
with open(file_path, 'r') as file:
    destinationList = json.load(file)


In [8]:
df_destination = pd.DataFrame(destinationList['destinations'])
df_destination[['dutch', 'english']] = pd.DataFrame(df_destination['publicName'].tolist(), index=df_destination.index)
df_destination = df_destination[['iata', 'country', 'city', 'dutch', 'english']]
df_destination.head()

Unnamed: 0,iata,country,city,dutch,english
0,AAA,French Polynes,,"Anaa, TU","Anaa, TU"
1,AAB,Australia,,"Arrabury, QL","Arrabury, QL"
2,AAC,Egypt,Al-Arish,Al-Arish,Al-Arish
3,AAD,Sudan,Ad-Dabbah,Ad-Dabbah,Ad-Dabbah
4,AAE,Algeria,Annaba,Annaba,Annaba


In [9]:
df_combined = df_flight.merge(df_destination, left_on='destinations', right_on='iata', how='left')
df_combined = df_combined[['flightName', 'flightNumber', 'id',  'eu', 'scheduleTime','destinations', 'country', 'city', 'dutch', 'english']]


In [10]:
df_coordinates = df_combined
df_coordinates['latitude'] = df_coordinates.apply(lambda row: get_latitude(f"{row['destinations']}, {row['country']}"), axis=1)
df_coordinates['longitude'] = df_coordinates.apply(lambda row: get_longitude(f"{row['destinations']}, {row['country']}"), axis=1) 


In [11]:
df_coordinates['south_of_amsterdam'] = df_coordinates.apply(is_airport_south_of_amsterdam, axis=1)


In [12]:
df_distance = df_coordinates
df_distance['distance_to_schiphol'] = df_distance.apply(lambda row: calculate_distance(row, schiphol_lat, schiphol_lon), axis=1)

In [13]:
# Create the final DataFrame with filtering and sorting
df_final = df_distance[
    (df_distance['distance_to_schiphol'] < 1900) & 
    (df_distance['south_of_amsterdam']) 
].sort_values(by='distance_to_schiphol').reset_index(drop=True)

In [14]:
df_airports = df_final[['destinations', 'city', 'dutch', 'latitude','longitude']].drop_duplicates(subset=['destinations'])
df_airports.head(20)


Unnamed: 0,destinations,city,dutch,latitude,longitude
0,LCY,London,Londen City,51.504266,0.054902
2,LTN,London,Londen Luton,51.878102,-0.366425
5,LGW,London,Londen Gatwick,51.154108,-0.182312
8,LHR,London,Londen Heathrow,51.467739,-0.45878
10,BRS,Bristol,Bristol,51.38297,-2.718624
12,GVA,Geneva,Geneve,46.237816,6.108121
13,SZG,Salzburg,Salzburg,47.793455,13.001831
15,MXP,Milan,Milaan Malpensa,45.629627,8.723547
17,BGY,Milan,Milan Bergamo,45.67093,9.698756
18,LIN,Milan,Milaan Linate,45.444205,9.277833


In [16]:
#save the dataframe to a csv file
df_final.to_csv(f"{settings.data_dir}/final_output.csv", index=False)
df_airports.to_csv(f"{settings.data_dir}/airports.csv", index=False)