In [19]:
import pandas as pd
from datetime import datetime, date, timedelta
import requests
from pytz import timezone

In [20]:
# read the airports dataframe to get the icao code of all airports we want flight data for

airport_data = pd.read_csv("airport_data.csv")
airport_data.drop(columns = ["Unnamed: 0"], inplace = True)
#airport_data

In [24]:
icao = airport_data['icao'].to_list()
len(icao)

15

In [25]:
# this code is copied from rapidapi as an example of flight data

import requests

url = "https://aerodatabox.p.rapidapi.com/flights/airports/icao/EDDF/2022-07-08T20:01/2022-07-08T20:06"

querystring = {"withLeg":"true","direction":"Arrival","withCancelled":"false","withCargo":"false","withPrivate":"false"}

headers = {
	"X-RapidAPI-Key": "Use Your API key here",
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.request("GET", url, headers=headers, params=querystring)

#print(response.text)

## API Data Structue

this is the structure of the data returned by the API when formatted

{"arrivals":[
	{"departure":{
		"airport":{
			"icao":"LGMK","iata":"JMK","name":"Mykonos Island"
			},
		"quality":[]
			},
		"arrival":{
			"scheduledTimeLocal":"2022-07-08 20:05+02:00",
            "actualTimeLocal":"2022-07-08 20:05+02:00",
            "scheduledTimeUtc":"2022-07-08 18:05Z",
            "actualTimeUtc":"2022-07-08 18:05Z",
            "terminal":"1",
            "quality":["Basic","Live"]
			},
	"number":"A3 1543",
	"status":"Expected",
	"codeshareStatus":"IsCodeshared",
	"isCargo":false,
	"aircraft":{
           "reg":"D-AIJC",
           "modeS":"3C6543",
           "model":"Airbus A320 NEO"
           },
	"airline":{"name":"Aegean"}
	},
	{"departure":{"airport":{"icao": ...... and so on

In [30]:
# function is required to enable auto updating in the cloud once linked to sql

def tomorrows_flight_arrivals(icao_list):
    
    today = datetime.now().astimezone(timezone('Europe/Berlin')).date() # these are the times to retrieve data for next day
    tomorrow = (today + timedelta(days=1))
    
    list_for_df =[]
    
    for icao in icao_list:
        times = [['00:00','11:59'],['12:00','23:59']] # data can be called from API for max of 12hrs so x2 periods defined per day
        
        for t in times:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{t[0]}/{tomorrow}T{t[1]}"
            querystring = {"withLeg":"true","direction":"Arrival","withCancelled":"false","withCargo":"false","withPrivate":"false"}
            headers = {
                "X-RapidAPI-Key": "Use Your API key here",
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
            }
        r = requests.request('GET', url, headers=headers, params=querystring)
        flights_json = r.json()
        
        for flight in flights_json['arrivals']:    # arrivals is the 1st item returned by API - this dict contains all other dicts/lists
            flights_dict = {}                     # blank dict created to hold extracted values created below
            flights_dict['arrival_icao'] = icao   # this is the icao value returned from the API list ref'd through the for loop 
            # .get() is another way of ensuring our code doesn't break
            # in the previous 2 notebooks you learnt about 'if' (cities) and 'try/except' (weather)
            # .get() works similar, it will get the text if possible, if there is no text a None value will be inserted instead
            flights_dict['arrival_time_local'] = flight['arrival'].get('scheduledTimeLocal', None)   # various cols formed from the dict & list refs under 'arrivals'
            flights_dict['arrival_terminal'] = flight['arrival'].get('terminal', None)
            flights_dict['status'] = flight.get('status', None)
            flights_dict['departure_city'] = flight['departure']['airport'].get('name', None)
            flights_dict['departure_icao'] = flight['departure']['airport'].get('icao', None)
            flights_dict['departure_time_local'] = flight['departure'].get('scheduledTimeLocal', None)
            flights_dict['airline'] = flight['airline'].get('name', None)
            flights_dict['flight_number'] = flight.get('number', None)    # here number is directly under arrivals 
            flights_dict['data_retrieved_on'] = datetime.now().astimezone(timezone('Europe/Berlin')).date()
            list_for_df.append(flights_dict) # the list_for_df is appended with values created, this enables pandas to convert easily to a dataframe
            
        arrivals_data = pd.DataFrame(list_for_df) # converting list to dataframe
            
    return arrivals_data
        

In [53]:
# test with manual input

icaos = ['EICK','EDDF']
tomorrows_flight_arrivals(icaos).head()

Unnamed: 0,arrival_icao,arrival_time_local,arrival_terminal,status,departure_city,departure_icao,departure_time_local,airline,flight_number,data_retrieved_on
0,EICK,2022-07-11 12:55+01:00,,Unknown,Málaga,LEMG,2022-07-11 11:05+02:00,Ryanair,FR 9902,2022-07-10
1,EICK,2022-07-11 12:55+01:00,,Unknown,Amsterdam,EHAM,2022-07-11 12:05+02:00,KLM,KL 1085,2022-07-10
2,EICK,2022-07-11 13:00+01:00,,Unknown,Palma De Mallorca,LEPA,2022-07-11 11:20+02:00,Ryanair,FR 7888,2022-07-10
3,EICK,2022-07-11 13:10+01:00,,Unknown,Gdańsk,EPGD,2022-07-11 11:10+02:00,Ryanair,FR 8782,2022-07-10
4,EICK,2022-07-11 13:35+01:00,,Unknown,London,EGKK,2022-07-11 12:10+01:00,Ryanair,FR 9853,2022-07-10


In [54]:
# test function using the earlier defined icao list from the imported airport_data

tomorrows_flight_arrivals(icao).head()

Unnamed: 0,arrival_icao,arrival_time_local,arrival_terminal,status,departure_city,departure_icao,departure_time_local,airline,flight_number,data_retrieved_on
0,EDDB,2022-07-11 12:20+02:00,0,Unknown,Barcelona,,,Ryanair,FR 133,2022-07-10
1,EDDB,2022-07-11 12:20+02:00,1,Unknown,Palma De Mallorca,LEPA,2022-07-11 09:25+02:00,easyJet,U2 4502,2022-07-10
2,EDDB,2022-07-11 12:10+02:00,0,Unknown,Istanbul,LTFJ,2022-07-11 10:10+03:00,Pegasus,PC 979,2022-07-10
3,EDDB,2022-07-11 12:25+02:00,0,Unknown,Nottingham,EGNX,2022-07-11 09:30+01:00,Ryanair,FR 1638,2022-07-10
4,EDDB,2022-07-11 12:25+02:00,1,Unknown,Lisbon,LPPT,2022-07-11 07:55+01:00,TAP Air Portugal,TP 538,2022-07-10


In [51]:
arrivals_data["arrival_time_local"] = pd.to_datetime(arrivals_data["arrival_time_local"])

In [56]:
arrivals_data.drop(columns = ["departure_time_local"], inplace = True)

In [57]:
arrivals_data.dtypes

arrival_icao                                         object
arrival_time_local    datetime64[ns, pytz.FixedOffset(120)]
arrival_terminal                                     object
departure_city                                       object
departure_icao                                       object
airline                                              object
flight_number                                        object
data_retrieved_on                                    object
dtype: object

In [59]:
arrivals_data.to_csv("arrivals_data.csv")