In [2]:
# Import the API key from api_config.py
import pandas as pd
from api_config import *
import requests

# Define the API URL and parameters
url = "https://aerodatabox.p.rapidapi.com/airports/search/location/52.31/13.24/km/50/16"
querystring = {"withFlightInfoOnly": "true"}

# Use the API key from the imported config file
headers = {
    "X-RapidAPI-Key": RAPIDAPI_KEY,
    "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

# Make the request and print the response
response = requests.get(url, headers=headers, params=querystring)
print(response.text)

{"searchBy":{"lat":52.31,"lon":13.24},"count":1,"items":[{"icao":"EDDB","iata":"BER","name":"Berlin Brandenburg","shortName":"Brandenburg","municipalityName":"Berlin","location":{"lat":52.35139,"lon":13.493889},"countryCode":"DE","timeZone":"Europe/Berlin"}]}


In [3]:
response.json()

{'searchBy': {'lat': 52.31, 'lon': 13.24},
 'count': 1,
 'items': [{'icao': 'EDDB',
   'iata': 'BER',
   'name': 'Berlin Brandenburg',
   'shortName': 'Brandenburg',
   'municipalityName': 'Berlin',
   'location': {'lat': 52.35139, 'lon': 13.493889},
   'countryCode': 'DE',
   'timeZone': 'Europe/Berlin'}]}

In [4]:
pd.json_normalize(response.json()['items'])

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,timeZone,location.lat,location.lon
0,EDDB,BER,Berlin Brandenburg,Brandenburg,Berlin,DE,Europe/Berlin,52.35139,13.493889


In [5]:
def icao_airport_codes(latitudes, longitudes):

  #assert len(latitudes) == len(longitudes)

  list_for_df = []

  for index, value in enumerate(latitudes):

    url = f"https://aerodatabox.p.rapidapi.com/airports/search/location/{value}/{longitudes[index]}/km/100/16"

    querystring = {"withFlightInfoOnly":"true"}

    headers = {
    "X-RapidAPI-Key": RAPIDAPI_KEY,
    "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)

    list_for_df.append(pd.json_normalize(response.json()['items']))

  return pd.concat(list_for_df, ignore_index=True)

In [6]:
# coordinates for Berlin, Paris, London
latitudes = [52.5200, 48.8567, 51.5072]
longitudes = [13.4050, 2.3522, -0.1275]

icao_airport_codes(latitudes, longitudes)

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,timeZone,location.lat,location.lon
0,EDDB,BER,Berlin Brandenburg,Brandenburg,Berlin,DE,Europe/Berlin,52.35139,13.493889
1,LFPB,LBG,Paris -Le Bourget,-Le Bourget,Paris,FR,Europe/Paris,48.9694,2.44139
2,LFPO,ORY,Paris -Orly,-Orly,Paris,FR,Europe/Paris,48.7253,2.35944
3,LFPG,CDG,Paris Charles de Gaulle,Charles de Gaulle,Paris,FR,Europe/Paris,49.0128,2.549999
4,LFOB,BVA,Beauvais/Tillé Paris Beauvais Tillé,Paris Beauvais Tillé,Beauvais/Tillé,FR,Europe/Paris,49.4544,2.11278
5,EGLC,LCY,London City,City,London,GB,Europe/London,51.5053,0.055277
6,EGLL,LHR,London Heathrow,Heathrow,London,GB,Europe/London,51.4706,-0.461941
7,EGKR,KRH,Redhill Aerodrome,Aerodrome,Redhill,GB,Europe/London,51.2136,-0.138611
8,EGKK,LGW,London Gatwick,Gatwick,London,GB,Europe/London,51.1481,-0.190277
9,EGGW,LTN,London Luton,Luton,London,GB,Europe/London,51.8747,-0.368333


In [7]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone

In [8]:
# Defining the parameters

icao = "EDDB" # For Berlin airport
date = datetime.now().date()
time_1 = "00:00"
time_2 = "11:59"

url =  f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{date}T{time_1}/{date}T{time_2}"

querystring = {"withLeg":"true",
               "direction":"Arrival",
               "withCancelled":"false",
               "withCodeshared":"true",
               "withCargo":"false",
               "withPrivate":"false"}

headers = {
     "X-RapidAPI-Key": RAPIDAPI_KEY,
     "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }

response = requests.request("GET",
                            url,
                            headers = headers,
                            params = querystring)

flights_json = response.json()

flights_json

{'arrivals': [{'departure': {'airport': {'icao': 'LTFH',
     'iata': 'SZF',
     'name': 'Samsun',
     'timeZone': 'Europe/Istanbul'},
    'scheduledTime': {'utc': '2024-09-11 01:10Z',
     'local': '2024-09-11 04:10+03:00'},
    'terminal': 'NULL',
    'quality': ['Basic']},
   'arrival': {'scheduledTime': {'utc': '2024-09-11 04:45Z',
     'local': '2024-09-11 06:45+02:00'},
    'revisedTime': {'utc': '2024-09-11 04:11Z',
     'local': '2024-09-11 06:11+02:00'},
    'runwayTime': {'utc': '2024-09-11 04:18Z',
     'local': '2024-09-11 06:18+02:00'},
    'terminal': '1',
    'gate': 'N01',
    'baggageBelt': 'B3',
    'quality': ['Basic', 'Live']},
   'number': 'XQ 1768',
   'callSign': 'SXS6JD',
   'status': 'Arrived',
   'codeshareStatus': 'IsOperator',
   'isCargo': False,
   'aircraft': {'reg': 'TC-SPJ', 'modeS': '4BCE0A', 'model': 'Boeing 737'},
   'airline': {'name': 'Sun Express', 'iata': 'XQ', 'icao': 'SXS'}},
  {'departure': {'airport': {'icao': 'KEWR',
     'iata': 'EWR',
  

In [9]:
flights_json.keys()

dict_keys(['arrivals'])

In [10]:
flights_json["arrivals"]

[{'departure': {'airport': {'icao': 'LTFH',
    'iata': 'SZF',
    'name': 'Samsun',
    'timeZone': 'Europe/Istanbul'},
   'scheduledTime': {'utc': '2024-09-11 01:10Z',
    'local': '2024-09-11 04:10+03:00'},
   'terminal': 'NULL',
   'quality': ['Basic']},
  'arrival': {'scheduledTime': {'utc': '2024-09-11 04:45Z',
    'local': '2024-09-11 06:45+02:00'},
   'revisedTime': {'utc': '2024-09-11 04:11Z',
    'local': '2024-09-11 06:11+02:00'},
   'runwayTime': {'utc': '2024-09-11 04:18Z',
    'local': '2024-09-11 06:18+02:00'},
   'terminal': '1',
   'gate': 'N01',
   'baggageBelt': 'B3',
   'quality': ['Basic', 'Live']},
  'number': 'XQ 1768',
  'callSign': 'SXS6JD',
  'status': 'Arrived',
  'codeshareStatus': 'IsOperator',
  'isCargo': False,
  'aircraft': {'reg': 'TC-SPJ', 'modeS': '4BCE0A', 'model': 'Boeing 737'},
  'airline': {'name': 'Sun Express', 'iata': 'XQ', 'icao': 'SXS'}},
 {'departure': {'airport': {'icao': 'KEWR',
    'iata': 'EWR',
    'name': 'Newark',
    'timeZone': 'Am

In [11]:
flights_json["arrivals"][0]

{'departure': {'airport': {'icao': 'LTFH',
   'iata': 'SZF',
   'name': 'Samsun',
   'timeZone': 'Europe/Istanbul'},
  'scheduledTime': {'utc': '2024-09-11 01:10Z',
   'local': '2024-09-11 04:10+03:00'},
  'terminal': 'NULL',
  'quality': ['Basic']},
 'arrival': {'scheduledTime': {'utc': '2024-09-11 04:45Z',
   'local': '2024-09-11 06:45+02:00'},
  'revisedTime': {'utc': '2024-09-11 04:11Z',
   'local': '2024-09-11 06:11+02:00'},
  'runwayTime': {'utc': '2024-09-11 04:18Z',
   'local': '2024-09-11 06:18+02:00'},
  'terminal': '1',
  'gate': 'N01',
  'baggageBelt': 'B3',
  'quality': ['Basic', 'Live']},
 'number': 'XQ 1768',
 'callSign': 'SXS6JD',
 'status': 'Arrived',
 'codeshareStatus': 'IsOperator',
 'isCargo': False,
 'aircraft': {'reg': 'TC-SPJ', 'modeS': '4BCE0A', 'model': 'Boeing 737'},
 'airline': {'name': 'Sun Express', 'iata': 'XQ', 'icao': 'SXS'}}

In [12]:
flights_json["arrivals"][0].keys()

dict_keys(['departure', 'arrival', 'number', 'callSign', 'status', 'codeshareStatus', 'isCargo', 'aircraft', 'airline'])

In [59]:
flight_items = [] # Empty list

for item in flights_json["arrivals"]: # Referring to arrivals dict in flights_json
    flight_item = {
        "arrival airport_icao" : icao, 
        "departure_airport_icao": item["departure"]["airport"].get("icao", None),  #item["Key"]["Value"].get("1st element")
        "scheduled_arrival_time" : item["arrival"]["scheduledTime"].get("local", None),
        "arrival_terminal" : item['arrival'].get('terminal', None),
        "flight_number" : item.get("number", None)
    } # Defining the elements in for loop completed
    
    flight_items.append(flight_item) # The one to which we append comes first followed by one getting appended
    
flights_df = pd.DataFrame(flight_items)

flights_df.head()

Unnamed: 0,arrival airport_icao,departure_airport_icao,scheduled_arrival_time,arrival_terminal,flight_number
0,EDDB,LTFH,2024-09-11 06:45+02:00,1,XQ 1768
1,EDDB,KEWR,2024-09-11 07:15+02:00,1,UA 962
2,EDDB,OTHH,2024-09-11 07:20+02:00,1,QR 79
3,EDDB,ZBAA,2024-09-11 06:45+02:00,1,HU 489
4,EDDB,LOWW,2024-09-11 07:30+02:00,1,OS 221


In [33]:
flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6] # Removing last 6 characters from the string
flights_df.head()

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number
0,EDDB,LTFH,2024-09-11 06:45,XQ 1768
1,EDDB,KEWR,2024-09-11 07:15,UA 962
2,EDDB,OTHH,2024-09-11 07:20,QR 79
3,EDDB,ZBAA,2024-09-11 06:45,HU 489
4,EDDB,LOWW,2024-09-11 07:30,OS 221


In [35]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone

def get_flight_data(icao_list):
  api_key = RAPIDAPI_KEY,

  berlin_timezone = timezone('Europe/Berlin')
  today = datetime.now(berlin_timezone).date()
  tomorrow = (today + timedelta(days=1))

  flight_items = []

  for icao in icao_list:
    # the api can only make 12 hour calls, therefore, two 12 hour calls make a full day
    # using the nested lists below we can make a morning call and extract the data
    # then make an afternoon call and extract the data
    times = [["00:00","11:59"],
             ["12:00","23:59"]]

    for time in times:
      url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

      querystring = {"withLeg":"true",
                    "direction":"Arrival",
                    "withCancelled":"false",
                    "withCodeshared":"true",
                    "withCargo":"false",
                    "withPrivate":"false"}

      headers = {
                "X-RapidAPI-Key": RAPIDAPI_KEY,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
          }
      response = requests.request("GET",
                                  url,
                                  headers = headers,
                                  params = querystring)

      flights_json = response.json()

      retrieval_time = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")

      for item in flights_json["arrivals"]:
        flight_item = {
            "arrival_airport_icao": icao,
            "departure_airport_icao": item["departure"]["airport"].get("icao", None),
            "scheduled_arrival_time": item["arrival"]["scheduledTime"].get("local", None),
            "flight_number": item.get("number", None),
            "data_retrieved_at": retrieval_time
        }

        flight_items.append(flight_item)

  flights_df = pd.DataFrame(flight_items)
  flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6]
  flights_df["scheduled_arrival_time"] = pd.to_datetime(flights_df["scheduled_arrival_time"])
  flights_df["data_retrieved_at"] = pd.to_datetime(flights_df["data_retrieved_at"])

  return flights_df

In [81]:
icao_list = ["EDDB"]

get_flight_data(icao_list)

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number,data_retrieved_at
0,EDDB,HEMA,2024-09-12 06:20:00,SM 2962,2024-09-11 12:51:05
1,EDDB,LTBJ,2024-09-12 06:30:00,XQ 966,2024-09-11 12:51:05
2,EDDB,LPCV,2024-09-12 06:45:00,XQ 1774,2024-09-11 12:51:05
3,EDDB,ZBAA,2024-09-12 06:45:00,HU 489,2024-09-11 12:51:05
4,EDDB,KEWR,2024-09-12 07:15:00,UA 962,2024-09-11 12:51:05
...,...,...,...,...,...
345,EDDB,LEMD,2024-09-12 23:00:00,I2 3674,2024-09-11 12:51:06
346,EDDB,EGCC,2024-09-12 23:00:00,FR 1155,2024-09-11 12:51:06
347,EDDB,LFPO,2024-09-12 23:10:00,TO 3412,2024-09-11 12:51:06
348,EDDB,LEPA,2024-09-12 23:15:00,EW 8595,2024-09-11 12:51:06


In [83]:
def tomorrows_flight_arrivals(icao_list):

    api_key = "14a44098c8mshe4536a007985112p1e3b4bjsn8fd805eb6bd4"

    berlin_timezone = timezone('Europe/Berlin')
    today = datetime.now(berlin_timezone).date()
    tomorrow = (today + timedelta(days=1))

    list_for_arrivals_df = []

    for icao in icao_list:

        times = [["00:00","11:59"],["12:00","23:59"]]

        for time in times:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

            querystring = {"direction":"Arrival","withCancelled":"false"}

            headers = {
                "X-RapidAPI-Key": RAPIDAPI_KEY,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
                }

            response = requests.request("GET", url, headers=headers, params=querystring)
            flights_resp = response.json()

            arrivals_df = pd.json_normalize(flights_resp["arrivals"])[["number", "movement.scheduledTime.local", "movement.airport.name", "movement.airport.icao"]]
            arrivals_df = arrivals_df.rename(columns={"number": "flight_number", "movement.scheduledTime.local": "arrival_time","movement.airport.name": "departure_city", "movement.airport.icao": "departure_airport_icao"})
            arrivals_df["arrival_airport_icao"] = icao
            arrivals_df["data_retrieved_on"] = datetime.now().date()
            arrivals_df = arrivals_df[["arrival_airport_icao", "flight_number", "arrival_time","departure_city", "departure_airport_icao", "data_retrieved_on"]]

            # fixing arrival_time
            arrivals_df["arrival_time"] = arrivals_df["arrival_time"].str.split("+").str[0]

            list_for_arrivals_df.append(arrivals_df)

    return pd.concat(list_for_arrivals_df, ignore_index=True)

In [93]:
icao_list = ["EDDB"]

flights_df = get_flight_data(icao_list)
flights_df

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number,data_retrieved_at
0,EDDB,HEMA,2024-09-12 06:20:00,SM 2962,2024-09-11 12:52:35
1,EDDB,LTBJ,2024-09-12 06:30:00,XQ 966,2024-09-11 12:52:35
2,EDDB,LPCV,2024-09-12 06:45:00,XQ 1774,2024-09-11 12:52:35
3,EDDB,ZBAA,2024-09-12 06:45:00,HU 489,2024-09-11 12:52:35
4,EDDB,KEWR,2024-09-12 07:15:00,UA 962,2024-09-11 12:52:35
...,...,...,...,...,...
345,EDDB,LEMD,2024-09-12 23:00:00,IB 3674,2024-09-11 12:52:35
346,EDDB,LEMD,2024-09-12 23:00:00,I2 3674,2024-09-11 12:52:35
347,EDDB,LFPO,2024-09-12 23:10:00,TO 3412,2024-09-11 12:52:35
348,EDDB,LEPA,2024-09-12 23:15:00,EW 8595,2024-09-11 12:52:35


In [95]:
flights_df

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number,data_retrieved_at
0,EDDB,HEMA,2024-09-12 06:20:00,SM 2962,2024-09-11 12:52:35
1,EDDB,LTBJ,2024-09-12 06:30:00,XQ 966,2024-09-11 12:52:35
2,EDDB,LPCV,2024-09-12 06:45:00,XQ 1774,2024-09-11 12:52:35
3,EDDB,ZBAA,2024-09-12 06:45:00,HU 489,2024-09-11 12:52:35
4,EDDB,KEWR,2024-09-12 07:15:00,UA 962,2024-09-11 12:52:35
...,...,...,...,...,...
345,EDDB,LEMD,2024-09-12 23:00:00,IB 3674,2024-09-11 12:52:35
346,EDDB,LEMD,2024-09-12 23:00:00,I2 3674,2024-09-11 12:52:35
347,EDDB,LFPO,2024-09-12 23:10:00,TO 3412,2024-09-11 12:52:35
348,EDDB,LEPA,2024-09-12 23:15:00,EW 8595,2024-09-11 12:52:35


### Push the flights information to sql 

In [69]:
schema = "sql_workshop"
host = "34.77.20.25"
user = "root"
password = "sushma25121998"
port = 3306

connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [97]:
flights_df.to_sql('flights',
                    if_exists='append',
                    con=connection_string,
                    index=False)

350