In [235]:
import json
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from math import sin, cos, sqrt, atan2, radians

#coordinates for starting location amsterdam schiphol airport
schiphol_lat = 52.3080392
schiphol_lon = 4.7621975

In [236]:
def get_latitude(city_name):
    try:
        geolocator = Nominatim(user_agent="airport_locator")
        location = geolocator.geocode(city_name)
        if location:
            return location.latitude
        else:
            None
    except GeocoderTimedOut:
        return None
    
def get_longitude(city_name):
    try:
        geolocator = Nominatim(user_agent="airport_locator")
        location = geolocator.geocode(city_name)
        if location:
            return location.longitude
        else:
            None
    except GeocoderTimedOut:
        return None


def is_airport_south_of_amsterdam(row):
    amsterdam_latitude = get_latitude("Amsterdam, Netherlands")
    airport_latitude = get_latitude(f"{row['destinations']}, {row['country']}")

    if amsterdam_latitude is None or airport_latitude is None:
        return False

    return airport_latitude < amsterdam_latitude

In [237]:
def calculate_distance(row, schiphol_lat, schiphol_lon):
    R = 6373.0
    lat1 = radians(schiphol_lat)
    lon1 = radians(schiphol_lon)
    lat2 = radians(row['latitude'])
    lon2 = radians(row['longitude'])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return distance

In [254]:
# Specify the path to the JSON file
file_path = 'flightlist.json'

# Open the JSON file and load its contents
with open(file_path, 'r') as file:
    flight_list = json.load(file)


In [269]:
df_flight = pd.DataFrame(flight_list['flights'])
df_flight[['destinations', 'eu', 'visa']] = pd.DataFrame(df_flight['route'].tolist(), index=df_flight.index)
df_flight = df_flight[df_flight['serviceType'] == 'J']
df_flight['destinations'] = df_flight['destinations'].str[0]
df_flight = df_flight[['flightName', 'flightNumber', 'id','destinations', 'eu', 'scheduleTime']]
df_flight.reset_index(drop=True, inplace=True)

In [264]:
# Specify the path to the JSON file
file_path = 'destinationList.json'

# Open the JSON file and load its contents
with open(file_path, 'r') as file:
    destinationList = json.load(file)


In [265]:
df_destination = pd.DataFrame(destinationList['destinations'])
df_destination[['dutch', 'english']] = pd.DataFrame(df_destination['publicName'].tolist(), index=df_destination.index)
df_destination = df_destination[['iata', 'country', 'city', 'dutch', 'english']]
df_destination.head()

Unnamed: 0,iata,country,city,dutch,english
0,AAA,French Polynes,,"Anaa, TU","Anaa, TU"
1,AAB,Australia,,"Arrabury, QL","Arrabury, QL"
2,AAC,Egypt,Al-Arish,Al-Arish,Al-Arish
3,AAD,Sudan,Ad-Dabbah,Ad-Dabbah,Ad-Dabbah
4,AAE,Algeria,Annaba,Annaba,Annaba


In [267]:
df_combined = df_flight.merge(df_destination, left_on='destinations', right_on='iata', how='left')
df_combined = df_combined[['flightName', 'flightNumber', 'id',  'eu', 'scheduleTime','destinations', 'country', 'city', 'dutch', 'english']]


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94 entries, 0 to 93
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   flightName    94 non-null     object
 1   flightNumber  94 non-null     int64 
 2   id            94 non-null     object
 3   eu            94 non-null     object
 4   scheduleTime  94 non-null     object
 5   destinations  94 non-null     object
 6   country       94 non-null     object
 7   city          94 non-null     object
 8   dutch         94 non-null     object
 9   english       94 non-null     object
dtypes: int64(1), object(9)
memory usage: 7.5+ KB


In [270]:
df_coordinates = df_combined
df_coordinates['latitude'] = df_coordinates.apply(lambda row: get_latitude(f"{row['destinations']}, {row['country']}"), axis=1)
df_coordinates['longitude'] = df_coordinates.apply(lambda row: get_longitude(f"{row['destinations']}, {row['country']}"), axis=1) 


In [271]:
df_coordinates['south_of_amsterdam'] = df_coordinates.apply(is_airport_south_of_amsterdam, axis=1)


In [272]:
df_distance = df_coordinates
df_distance['distance_to_schiphol'] = df_distance.apply(lambda row: calculate_distance(row, schiphol_lat, schiphol_lon), axis=1)

In [280]:
# Create the final DataFrame with filtering and sorting
df_final = df_distance[
    (df_distance['distance_to_schiphol'] < 1900) & 
    (df_distance['south_of_amsterdam']) 
].sort_values(by='distance_to_schiphol').reset_index(drop=True)

In [286]:
df_airports = df_final[['destinations', 'city', 'dutch', 'latitude','longitude']].drop_duplicates(subset=['destinations'])


In [287]:
#save the dataframe to a csv file
df_final.to_csv("final_output.csv", index=False)
df_airports.to_csv("airports.csv", index=False)