In [1]:
import pandas as pd
import requests

# Airports data

## Calling the API

In [2]:
#The URL for the API endpoint we're accessing, which searches for airports near a specific location
# In this case, the API endpoint expects the latitude, longitude, radius, and other parameters to be part of the URL path, not as query parameters. querystring = {"withFlightInfoOnly":"true"} # A dictionary containing query parameters for the API request
url = "https://aerodatabox.p.rapidapi.com/airports/search/location/52.31/13.24/km/50/10" 

querystring = {"lat":"52.31","lon":"13.24","radiusKm":"50","limit":"10","withFlightInfoOnly":"true"}


headers = {
	"X-RapidAPI-Key": #"Your Key",
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com" # A dictionary containing headers for the API request, including the API key and host
}

response = requests.request("GET", url, headers=headers, params=querystring) # Making a GET request to the API endpoint with the specified headers and query parameters

print(response.text) # Printing the text content of the response from the API

{"searchBy":{"lat":52.31,"lon":13.24},"count":1,"items":[{"icao":"EDDB","iata":"BER","name":"Berlin Brandenburg","shortName":"Brandenburg","municipalityName":"Berlin","location":{"lat":52.35139,"lon":13.493889},"countryCode":"DE","timeZone":"Europe/Berlin"}]}


In [3]:
response.json() # Converting JSON response to a python dictionary

{'searchBy': {'lat': 52.31, 'lon': 13.24},
 'count': 1,
 'items': [{'icao': 'EDDB',
   'iata': 'BER',
   'name': 'Berlin Brandenburg',
   'shortName': 'Brandenburg',
   'municipalityName': 'Berlin',
   'location': {'lat': 52.35139, 'lon': 13.493889},
   'countryCode': 'DE',
   'timeZone': 'Europe/Berlin'}]}

In [4]:
pd.json_normalize(response.json()['items']) # Normalizing the JSON data in the 'items' key into a flat table (dataframe)

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,timeZone,location.lat,location.lon
0,EDDB,BER,Berlin Brandenburg,Brandenburg,Berlin,DE,Europe/Berlin,52.35139,13.493889


## Creating a function to work with multiple cities

In [5]:
# Defining a function to retrieve ICAO airport codes given lists of latitudes and longitudes
def icao_airport_codes(latitudes, longitudes):
    
    # Assert len(latitudes) == len(longitudes)
    
    
    list_for_df = []  # Initializing an empty list to store dataframes
    
    for index, value in enumerate(latitudes): # Looping through the latitudes and corresponding longitudes
        url = f"https://aerodatabox.p.rapidapi.com/airports/search/location/{value}/{longitudes[index]}/km/50/5"
        querystring = {"withFlightInfoOnly":"true"}

        headers = {
          "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com",
          "X-RapidAPI-Key": #"Your Key"
        }

        response = requests.request("GET", url, headers=headers, params=querystring)
        
        # Normalizing the JSON response and append the resulting dataframe to the list
        list_for_df.append(pd.json_normalize(response.json()['items']))
        

    return pd.concat(list_for_df, ignore_index=True)  # Concatenating all dataframes in the list into a single dataframe and return it

In [7]:
list(enumerate(latitudes)) # Create an enumerated list of tuples from the latitudes list (index, value)

[(0, 52.52), (1, 53.5511), (2, 48.1351)]

In [6]:
# Entering the coordinates for Berlin, Paris, London (Desired Cities)
latitudes = [52.5200, 53.5511, 48.1351]
longitudes = [13.4050, 9.9937, 11.5820]

icao_airport_codes(latitudes, longitudes) # Calling the function

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,timeZone,location.lat,location.lon
0,EDDB,BER,Berlin Brandenburg,Brandenburg,Berlin,DE,Europe/Berlin,52.35139,13.493889
1,EDDH,HAM,Hamburg,Hamburg,Hamburg,DE,Europe/Berlin,53.6304,9.988229
2,EDDM,MUC,Munich,Munich,Munich,DE,Europe/Berlin,48.3538,11.7861


# Flights data

## Calling the API

In [21]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone

In [22]:
# Defining the parameters

icao = "EDDB" # For Berlin airport
date = datetime.now().date()
time_1 = "00:00"
time_2 = "11:59"

url =  f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{date}T{time_1}/{date}T{time_2}"

querystring = {"withLeg":"true",
               "direction":"Arrival",
               "withCancelled":"false",
               "withCodeshared":"true",
               "withCargo":"false",
               "withPrivate":"false"}

headers = {
    'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
    'x-rapidapi-key': #"Your Key"
    }

response = requests.request("GET",
                            url,
                            headers = headers,
                            params = querystring)

flights_json = response.json()

flights_json


{'arrivals': [{'departure': {'airport': {'icao': 'EDDK',
     'iata': 'CGN',
     'name': 'Cologne',
     'timeZone': 'Europe/Berlin'},
    'scheduledTime': {'utc': '2024-07-08 02:46Z',
     'local': '2024-07-08 04:46+02:00'},
    'revisedTime': {'utc': '2024-07-08 02:46Z',
     'local': '2024-07-08 04:46+02:00'},
    'runwayTime': {'utc': '2024-07-08 02:46Z',
     'local': '2024-07-08 04:46+02:00'},
    'runway': '06',
    'quality': ['Basic', 'Live']},
   'arrival': {'scheduledTime': {'utc': '2024-07-08 03:25Z',
     'local': '2024-07-08 05:25+02:00'},
    'revisedTime': {'utc': '2024-07-08 03:25Z',
     'local': '2024-07-08 05:25+02:00'},
    'quality': ['Basic', 'Live']},
   'number': 'DJ 6228',
   'callSign': 'SRR6228',
   'status': 'Approaching',
   'codeshareStatus': 'IsOperator',
   'isCargo': False,
   'aircraft': {'reg': 'OY-SRM', 'modeS': '45CE4D', 'model': 'Boeing 767'},
   'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}},
  {'departure': {'airport': {'icao

In [23]:
flights_json.keys()

dict_keys(['arrivals'])

In [24]:
flights_json["arrivals"]

[{'departure': {'airport': {'icao': 'EDDK',
    'iata': 'CGN',
    'name': 'Cologne',
    'timeZone': 'Europe/Berlin'},
   'scheduledTime': {'utc': '2024-07-08 02:46Z',
    'local': '2024-07-08 04:46+02:00'},
   'revisedTime': {'utc': '2024-07-08 02:46Z',
    'local': '2024-07-08 04:46+02:00'},
   'runwayTime': {'utc': '2024-07-08 02:46Z',
    'local': '2024-07-08 04:46+02:00'},
   'runway': '06',
   'quality': ['Basic', 'Live']},
  'arrival': {'scheduledTime': {'utc': '2024-07-08 03:25Z',
    'local': '2024-07-08 05:25+02:00'},
   'revisedTime': {'utc': '2024-07-08 03:25Z',
    'local': '2024-07-08 05:25+02:00'},
   'quality': ['Basic', 'Live']},
  'number': 'DJ 6228',
  'callSign': 'SRR6228',
  'status': 'Approaching',
  'codeshareStatus': 'IsOperator',
  'isCargo': False,
  'aircraft': {'reg': 'OY-SRM', 'modeS': '45CE4D', 'model': 'Boeing 767'},
  'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}},
 {'departure': {'airport': {'icao': 'UGTB',
    'iata': 'TBS',
    'na

In [25]:
flights_json["arrivals"][0]

{'departure': {'airport': {'icao': 'EDDK',
   'iata': 'CGN',
   'name': 'Cologne',
   'timeZone': 'Europe/Berlin'},
  'scheduledTime': {'utc': '2024-07-08 02:46Z',
   'local': '2024-07-08 04:46+02:00'},
  'revisedTime': {'utc': '2024-07-08 02:46Z',
   'local': '2024-07-08 04:46+02:00'},
  'runwayTime': {'utc': '2024-07-08 02:46Z',
   'local': '2024-07-08 04:46+02:00'},
  'runway': '06',
  'quality': ['Basic', 'Live']},
 'arrival': {'scheduledTime': {'utc': '2024-07-08 03:25Z',
   'local': '2024-07-08 05:25+02:00'},
  'revisedTime': {'utc': '2024-07-08 03:25Z',
   'local': '2024-07-08 05:25+02:00'},
  'quality': ['Basic', 'Live']},
 'number': 'DJ 6228',
 'callSign': 'SRR6228',
 'status': 'Approaching',
 'codeshareStatus': 'IsOperator',
 'isCargo': False,
 'aircraft': {'reg': 'OY-SRM', 'modeS': '45CE4D', 'model': 'Boeing 767'},
 'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}}

In [26]:
flights_json["arrivals"][0].keys()

dict_keys(['departure', 'arrival', 'number', 'callSign', 'status', 'codeshareStatus', 'isCargo', 'aircraft', 'airline'])

Useful Info:
* Departure airport icao
* scheduled arrival time, local
* flight number

## Creating the DataFrame

### Using Loops

In [28]:
flight_items = [] # Empty list

for item in flights_json["arrivals"]: # Referring to arrivals dict in flights_json
    flight_item = {
        "arrival airport_icao" : icao, 
        "departure_airport_icao": item["departure"]["airport"].get("icao", None),  #item["Key"]["Value"].get("1st element")
        "scheduled_arrival_time" : item["arrival"]["scheduledTime"].get("local", None),
        "arrival_terminal" : item['arrival'].get('terminal', None),
        "flight_number" : item.get("number", None)
    } # Defining the elements in for loop completed
    
    flight_items.append(flight_item) # The one to which we append comes first followed by one getting appended
    
flights_df = pd.DataFrame(flight_items)

flights_df.head()

Unnamed: 0,arrival airport_icao,departure_airport_icao,scheduled_arrival_time,arrival_terminal,flight_number
0,EDDB,EDDK,2024-07-08 05:25+02:00,,DJ 6228
1,EDDB,UGTB,2024-07-08 05:40+02:00,1.0,EW 8993
2,EDDB,ZBAA,2024-07-08 06:45+02:00,1.0,HU 489
3,EDDB,EVRA,2024-07-08 07:40+02:00,1.0,BT 211
4,EDDB,EVRA,2024-07-08 07:40+02:00,1.0,DL 7313


We can see in the above result under arrival_time column, there is an offset "+2:00".
We need to get rid of it as it will complicate our analysis and will not provide accurate results.

In [29]:
flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6] # Removing last 6 characters from the string
flights_df.head()

Unnamed: 0,arrival airport_icao,departure_airport_icao,scheduled_arrival_time,arrival_terminal,flight_number
0,EDDB,EDDK,2024-07-08 05:25,,DJ 6228
1,EDDB,UGTB,2024-07-08 05:40,1.0,EW 8993
2,EDDB,ZBAA,2024-07-08 06:45,1.0,HU 489
3,EDDB,EVRA,2024-07-08 07:40,1.0,BT 211
4,EDDB,EVRA,2024-07-08 07:40,1.0,DL 7313


## Creating a function for multiple cities

In [30]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone

def get_flight_data(icao_list):
  api_key = #"Your Key"

  berlin_timezone = timezone('Europe/Berlin')
  today = datetime.now(berlin_timezone).date()
  tomorrow = (today + timedelta(days=1))

  flight_items = []

  for icao in icao_list:
    # the api can only make 12 hour calls, therefore, two 12 hour calls make a full day
    # using the nested lists below we can make a morning call and extract the data
    # then make an afternoon call and extract the data
    times = [["00:00","11:59"],
             ["12:00","23:59"]]

    for time in times:
      url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

      querystring = {"withLeg":"true",
                    "direction":"Arrival",
                    "withCancelled":"false",
                    "withCodeshared":"true",
                    "withCargo":"false",
                    "withPrivate":"false"}

      headers = {
          'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
          'x-rapidapi-key': api_key
          }

      response = requests.request("GET",
                                  url,
                                  headers = headers,
                                  params = querystring)

      flights_json = response.json()

      retrieval_time = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")

      for item in flights_json["arrivals"]:
        flight_item = {
            "arrival_airport_icao": icao,
            "departure_airport_icao": item["departure"]["airport"].get("icao", None),
            "scheduled_arrival_time": item["arrival"]["scheduledTime"].get("local", None),
            'arrival_terminal': item['arrival'].get('terminal', None),
            "flight_number": item.get("number", None),
            "data_retrieved_at": retrieval_time
        }

        flight_items.append(flight_item)

  flights_df = pd.DataFrame(flight_items)
  flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6]
  flights_df["scheduled_arrival_time"] = pd.to_datetime(flights_df["scheduled_arrival_time"])
  flights_df["data_retrieved_at"] = pd.to_datetime(flights_df["data_retrieved_at"])

  return flights_df
    

In [33]:
icao_list = ["EDDB", "EDDH", "EDDM"]

get_flight_data(icao_list)

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,arrival_terminal,flight_number,data_retrieved_at
0,EDDB,BIKF,2024-07-09 06:15:00,1,FI 518,2024-07-08 12:45:35
1,EDDB,LTCC,2024-07-09 06:45:00,1,XQ 1700,2024-07-08 12:45:35
2,EDDB,LTBJ,2024-07-09 06:30:00,1,XQ 966,2024-07-08 12:45:35
3,EDDB,,2024-07-09 07:20:00,1,5F 611,2024-07-08 12:45:35
4,EDDB,EVRA,2024-07-09 07:40:00,1,A3 3219,2024-07-08 12:45:35
...,...,...,...,...,...,...
922,EDDM,LEPA,2024-07-09 23:25:00,1,DE 1131,2024-07-08 12:45:36
923,EDDM,EGLL,2024-07-09 23:15:00,2,LH 2481,2024-07-08 12:45:36
924,EDDM,LPPT,2024-07-09 23:30:00,2,TP 556,2024-07-08 12:45:36
925,EDDM,LPMA,2024-07-09 23:15:00,1,X3 2847,2024-07-08 12:45:36


Using normalize method

In [34]:
def tomorrows_flight_arrivals(icao_list):

    api_key = "14a44098c8mshe4536a007985112p1e3b4bjsn8fd805eb6bd4"

    berlin_timezone = timezone('Europe/Berlin')
    today = datetime.now(berlin_timezone).date()
    tomorrow = (today + timedelta(days=1))

    list_for_arrivals_df = []

    for icao in icao_list:

        times = [["00:00","11:59"],["12:00","23:59"]]

        for time in times:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

            querystring = {"direction":"Arrival","withCancelled":"false"}

            headers = {
                "X-RapidAPI-Key": #"Your Key",
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
                }

            response = requests.request("GET", url, headers=headers, params=querystring)
            flights_resp = response.json()

            arrivals_df = pd.json_normalize(flights_resp["arrivals"])[["number", "airline.name", "movement.scheduledTime.local", "movement.terminal", "movement.airport.name", "movement.airport.icao"]]
            arrivals_df = arrivals_df.rename(columns={"number": "flight_number", "airline.name": "airline", "movement.scheduledTime.local": "arrival_time", "movement.terminal": "arrival_terminal", "movement.airport.name": "departure_city", "movement.airport.icao": "departure_airport_icao"})
            arrivals_df["arrival_airport_icao"] = icao
            arrivals_df["data_retrieved_on"] = datetime.now().date()
            arrivals_df = arrivals_df[["arrival_airport_icao", "flight_number", "airline", "arrival_time", "arrival_terminal", "departure_city", "departure_airport_icao", "data_retrieved_on"]]

            # fixing arrival_time
            arrivals_df["arrival_time"] = arrivals_df["arrival_time"].str.split("+").str[0]

            list_for_arrivals_df.append(arrivals_df)

    return pd.concat(list_for_arrivals_df, ignore_index=True)

In [36]:
icao_list = ["EDDB", "EDDH", "EDDM"]

tomorrows_flight_arrivals(icao_list)

KeyError: 'arrivals'