In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup as bs
from my_passwords import my_password,API_key_flights
import my_functions as fn

In [2]:
# search by free text Berlin
url = "https://aerodatabox.p.rapidapi.com/airports/search/term"

querystring = {"q":"berlin","limit":"10","withFlightInfoOnly":"true"}

headers = {
	"X-RapidAPI-Key": API_key_flights,
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)
flight_json1=response.json()
print(flight_json1)

{'searchBy': 'berlin', 'count': 1, 'items': [{'icao': 'EDDB', 'iata': 'BER', 'name': 'Berlin Brandenburg', 'shortName': 'Brandenburg', 'municipalityName': 'Berlin', 'location': {'lat': 52.35139, 'lon': 13.493889}, 'countryCode': 'DE'}]}


In [3]:
pd.json_normalize(flight_json1["items"])

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,location.lat,location.lon
0,EDDB,BER,Berlin Brandenburg,Brandenburg,Berlin,DE,52.35139,13.493889


In [4]:
# search by location Berlin
url = "https://aerodatabox.p.rapidapi.com/airports/search/location"

querystring = {"lat":"52.31","lon":"13.24","radiusKm":"50","limit":"10","withFlightInfoOnly":"True"}

headers = {
	"X-RapidAPI-Key": API_key_flights,
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

flight_json2=response.json()
print(flight_json2)

{'searchBy': {'lat': 52.31, 'lon': 13.24}, 'count': 1, 'items': [{'icao': 'EDDB', 'iata': 'BER', 'name': 'Berlin Brandenburg', 'shortName': 'Brandenburg', 'municipalityName': 'Berlin', 'location': {'lat': 52.35139, 'lon': 13.493889}, 'countryCode': 'DE'}]}


In [5]:
pd.json_normalize(flight_json2["items"])

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,location.lat,location.lon
0,EDDB,BER,Berlin Brandenburg,Brandenburg,Berlin,DE,52.35139,13.493889


In [6]:
# search by free text Munich
url = "https://aerodatabox.p.rapidapi.com/airports/search/term"

querystring = {"q":"Munich","limit":"10","withFlightInfoOnly":"true"}

headers = {
	"X-RapidAPI-Key": API_key_flights,
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)
flight_json3=response.json()
print(flight_json3)

{'searchBy': 'Munich', 'count': 1, 'items': [{'icao': 'EDDM', 'iata': 'MUC', 'name': 'Munich ', 'shortName': 'Munich', 'municipalityName': 'Munich', 'location': {'lat': 48.3538, 'lon': 11.7861}, 'countryCode': 'DE'}]}


In [7]:
pd.json_normalize(flight_json3["items"])

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,location.lat,location.lon
0,EDDM,MUC,Munich,Munich,Munich,DE,48.3538,11.7861


In [8]:
# search by location Munich
url = "https://aerodatabox.p.rapidapi.com/airports/search/location"

querystring = {"lat":"48.08","lon":"11.34","radiusKm":"50","limit":"10","withFlightInfoOnly":"True"}



headers = {
	"X-RapidAPI-Key": API_key_flights,
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

flight_json4=response.json()
print(flight_json4)

{'searchBy': {'lat': 48.08, 'lon': 11.34}, 'count': 1, 'items': [{'icao': 'EDDM', 'iata': 'MUC', 'name': 'Munich ', 'shortName': 'Munich', 'municipalityName': 'Munich', 'location': {'lat': 48.3538, 'lon': 11.7861}, 'countryCode': 'DE'}]}


In [9]:
pd.json_normalize(flight_json4["items"])

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,location.lat,location.lon
0,EDDM,MUC,Munich,Munich,Munich,DE,48.3538,11.7861


Connect to SQL

In [10]:
schema = "gans"
host = "127.0.0.1"
user = "root"
password = my_password
port = 3306

connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [11]:
cities_from_sql_df = pd.read_sql("cities", con=connection_string)
cities_from_sql_df

Unnamed: 0,city_id,city,country_code
0,1,Berlin,DE
1,2,Hamburg,DE
2,3,Munich,DE
3,4,Stuttgart,DE


Airports

In [12]:
# This function takes the cities dataframe and returns the Dataframe that contains all airports data
def get_airpots_info(cities_df):
  
  # Initialising the airport dictionary
  airport_dic = {"city_id": [],
    "airport_icao": [],
    "airport_name": []
    }
  
  for city_name in cities_df["city"]: # For each city in the cities Dataframe
    
    # Define the sections that will together form the url 
    # These sections were generated by the API
    url = "https://aerodatabox.p.rapidapi.com/airports/search/term" # URL of the airport API
    querystring = {"q":city_name,"limit":"10","withFlightInfoOnly":"True"} # We need the city name and only airports with flights
    headers = {
      "X-RapidAPI-Key": API_key_flights, # Change the generated API key to our API Key
      "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }

    airport = requests.get(url, headers=headers, params=querystring) # Send the request
    airport_json=airport.json() # Get the response in a JSON format
    
    # We take the city_id and the country code from the cities dataframe
    city_id = cities_df.loc[cities_df["city"] == city_name, "city_id"].values[0]
    country_code = cities_df.loc[cities_df["city"] == city_name, "country_code"].values[0]
    for w in airport_json["items"]: # For each airport in the JSON response
      if(w["countryCode"]==country_code): # Since we are searching for airport by the airport name and not by coordinates,  
                                          # we need to check if the country code is the same as the one of the city
                                          # The same city name can exist in more than one country
        # Adding all the information about the airport to the airport dictionary
        airport_dic["city_id"].append(city_id)
        airport_dic["airport_icao"].append(w["icao"])
        airport_dic["airport_name"].append(w["name"])
    
    # Create the weather Dataframe
    airport_df = pd.DataFrame(airport_dic)

  return airport_df

In [13]:
airport_infos_df=get_airpots_info(cities_from_sql_df)
airport_infos_df

Unnamed: 0,city_id,airport_icao,airport_name
0,1,EDDB,Berlin Brandenburg
1,2,EDDH,Hamburg
2,3,EDDM,Munich
3,4,EDDS,Stuttgart


In [14]:
# Send airports data to the table "airports" and "cities_airports"
airport_infos_df.loc[:,["airport_icao","airport_name"]].to_sql('airports',
                if_exists='append',
                con=connection_string,
                index=False)

airport_infos_df.loc[:,["city_id","airport_icao"]].to_sql('cities_airports',
                if_exists='append',
                con=connection_string,
                index=False)

4

In [15]:
airport_infos_from_sql = pd.read_sql("airports", con=connection_string) # Read the table "airports"
print(airport_infos_from_sql)
cities_airports_from_sql = pd.read_sql("cities_airports", con=connection_string) # Read the table "cities_airports"
print(cities_airports_from_sql)

  airport_icao        airport_name
0         EDDB  Berlin Brandenburg
1         EDDH            Hamburg 
2         EDDM             Munich 
3         EDDS          Stuttgart 
   city_id airport_icao
0        1         EDDB
1        2         EDDH
2        3         EDDM
3        4         EDDS


Flights

In [16]:
today=datetime.now()
tomorrow= (today + timedelta(1)).strftime("%Y-%m-%d")
from_date1=tomorrow+"T"+"00:00"

to_date2=tomorrow+"T"+"11:59"
print(from_date1,to_date2)
from_date3=tomorrow+"T"+"12:00"
to_date4=tomorrow+"T"+"23:59"
print(from_date3,to_date4)

2024-03-19T00:00 2024-03-19T11:59
2024-03-19T12:00 2024-03-19T23:59


In [17]:
icao_list=airport_infos_df["airport_icao"]
icao_list

0    EDDB
1    EDDH
2    EDDM
3    EDDS
Name: airport_icao, dtype: object

In [18]:
import json
import requests

url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/EDDB/{from_date1}/{to_date2}"

querystring = {"direction":"Arrival","withCancelled":"true"}

headers = {
    "X-RapidAPI-Key": API_key_flights,
    "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())
pd.json_normalize(response.json()["arrivals"])

{'arrivals': [{'movement': {'airport': {'icao': 'OTHH', 'iata': 'DOH', 'name': 'Doha'}, 'scheduledTime': {'utc': '2024-03-19 05:50Z', 'local': '2024-03-19 06:50+01:00'}, 'revisedTime': {'utc': '2024-03-19 05:50Z', 'local': '2024-03-19 06:50+01:00'}, 'terminal': '1', 'gate': 'X07', 'baggageBelt': 'A4', 'quality': ['Basic', 'Live']}, 'number': 'QR 79', 'status': 'Expected', 'codeshareStatus': 'IsOperator', 'isCargo': False, 'aircraft': {'model': 'Boeing 787-9'}, 'airline': {'name': 'Qatar Airways', 'iata': 'QR', 'icao': 'QTR'}}, {'movement': {'airport': {'icao': 'EVRA', 'iata': 'RIX', 'name': 'Riga'}, 'scheduledTime': {'utc': '2024-03-19 06:35Z', 'local': '2024-03-19 07:35+01:00'}, 'revisedTime': {'utc': '2024-03-19 06:35Z', 'local': '2024-03-19 07:35+01:00'}, 'terminal': '2', 'gate': 'Z34', 'baggageBelt': 'C1', 'quality': ['Basic', 'Live']}, 'number': 'FR 2600', 'status': 'Expected', 'codeshareStatus': 'IsOperator', 'isCargo': False, 'aircraft': {'model': 'Boeing 737-800 (winglets)'}, '

Unnamed: 0,number,status,codeshareStatus,isCargo,movement.airport.icao,movement.airport.iata,movement.airport.name,movement.scheduledTime.utc,movement.scheduledTime.local,movement.revisedTime.utc,...,movement.gate,movement.baggageBelt,movement.quality,aircraft.model,airline.name,airline.iata,airline.icao,aircraft.reg,aircraft.modeS,callSign
0,QR 79,Expected,IsOperator,False,OTHH,DOH,Doha,2024-03-19 05:50Z,2024-03-19 06:50+01:00,2024-03-19 05:50Z,...,X07,A4,"[Basic, Live]",Boeing 787-9,Qatar Airways,QR,QTR,,,
1,FR 2600,Expected,IsOperator,False,EVRA,RIX,Riga,2024-03-19 06:35Z,2024-03-19 07:35+01:00,2024-03-19 06:35Z,...,Z34,C1,"[Basic, Live]",Boeing 737-800 (winglets),Ryanair,FR,RYR,,,
2,DS 1185,Expected,IsCodeshared,False,LFSB,BSL,Bâle/Mulhouse,2024-03-19 06:45Z,2024-03-19 07:45+01:00,2024-03-19 06:45Z,...,A36,A3,"[Basic, Live]",Airbus A320,Easyjet Switzerland,DS,EZS,HB-JXR,4B1A2B,
3,U2 1185,Expected,IsOperator,False,LFSB,BSL,Bâle/Mulhouse,2024-03-19 06:45Z,2024-03-19 07:45+01:00,2024-03-19 06:45Z,...,A36,A3,"[Basic, Live]",Airbus A320,easyJet,U2,EZY,HB-JXR,4B1A2B,
4,W6 2315,Expected,IsOperator,False,LHBP,BUD,Budapest,2024-03-19 06:45Z,2024-03-19 07:45+01:00,2024-03-19 06:45Z,...,Z36,C1,"[Basic, Live]",Airbus A321-100,Wizz Air,W6,WZZ,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,AF 1734,Expected,IsOperator,False,LFPG,CDG,Paris,2024-03-19 10:30Z,2024-03-19 11:30+01:00,2024-03-19 10:30Z,...,B07,B1,"[Basic, Live]",Embraer Lineage 1000,Air France,AF,AFR,,,
72,EC 5148,Expected,IsCodeshared,False,LFPG,CDG,Paris,2024-03-19 10:35Z,2024-03-19 11:35+01:00,2024-03-19 10:35Z,...,A32,A3,"[Basic, Live]",Airbus A320-200,EasyJet Europe,EC,EJU,,,
73,U2 5148,Expected,IsOperator,False,LFPG,CDG,Paris,2024-03-19 10:35Z,2024-03-19 11:35+01:00,2024-03-19 10:35Z,...,A32,A3,"[Basic, Live]",Airbus A320-200,easyJet,U2,EZY,,,
74,FR 133,Expected,IsOperator,False,LEBL,BCN,Barcelona,2024-03-19 10:55Z,2024-03-19 11:55+01:00,2024-03-19 10:55Z,...,Z34,C1,"[Basic, Live]",Boeing 737-800 (winglets),Ryanair,FR,RYR,,,


In [19]:
url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/EDDB/{from_date1}/{to_date2}"

querystring = {"withLeg":"true",
               "direction":"Arrival",
               "withCancelled":"false",
               "withCodeshared":"true",
               "withCargo":"false",
               "withPrivate":"false"}

headers = {
    "X-RapidAPI-Key": API_key_flights,
    "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())
pd.json_normalize(response.json()["arrivals"])

{'arrivals': [{'departure': {'airport': {'icao': 'OTHH', 'iata': 'DOH', 'name': 'Doha'}, 'scheduledTime': {'utc': '2024-03-18 23:35Z', 'local': '2024-03-19 02:35+03:00'}, 'quality': ['Basic']}, 'arrival': {'scheduledTime': {'utc': '2024-03-19 05:50Z', 'local': '2024-03-19 06:50+01:00'}, 'revisedTime': {'utc': '2024-03-19 05:50Z', 'local': '2024-03-19 06:50+01:00'}, 'terminal': '1', 'gate': 'X07', 'baggageBelt': 'A4', 'quality': ['Basic', 'Live']}, 'number': 'QR 79', 'status': 'Expected', 'codeshareStatus': 'IsOperator', 'isCargo': False, 'aircraft': {'model': 'Boeing 787-9'}, 'airline': {'name': 'Qatar Airways', 'iata': 'QR', 'icao': 'QTR'}}, {'departure': {'airport': {'icao': 'EVRA', 'iata': 'RIX', 'name': 'Riga'}, 'scheduledTime': {'utc': '2024-03-19 04:45Z', 'local': '2024-03-19 06:45+02:00'}, 'quality': ['Basic']}, 'arrival': {'scheduledTime': {'utc': '2024-03-19 06:35Z', 'local': '2024-03-19 07:35+01:00'}, 'revisedTime': {'utc': '2024-03-19 06:35Z', 'local': '2024-03-19 07:35+01:0

Unnamed: 0,number,status,codeshareStatus,isCargo,departure.airport.icao,departure.airport.iata,departure.airport.name,departure.scheduledTime.utc,departure.scheduledTime.local,departure.quality,...,airline.iata,airline.icao,departure.revisedTime.utc,departure.revisedTime.local,departure.gate,aircraft.reg,aircraft.modeS,departure.terminal,departure.checkInDesk,callSign
0,QR 79,Expected,IsOperator,False,OTHH,DOH,Doha,2024-03-18 23:35Z,2024-03-19 02:35+03:00,[Basic],...,QR,QTR,,,,,,,,
1,FR 2600,Expected,IsOperator,False,EVRA,RIX,Riga,2024-03-19 04:45Z,2024-03-19 06:45+02:00,[Basic],...,FR,RYR,,,,,,,,
2,DS 1185,Expected,IsCodeshared,False,LFSB,BSL,Bâle/Mulhouse,2024-03-19 05:15Z,2024-03-19 06:15+01:00,"[Basic, Live]",...,DS,EZS,2024-03-19 05:15Z,2024-03-19 06:15+01:00,86,HB-JXR,4B1A2B,,,
3,U2 1185,Expected,IsOperator,False,LFSB,BSL,Bâle/Mulhouse,2024-03-19 05:15Z,2024-03-19 06:15+01:00,"[Basic, Live]",...,U2,EZY,2024-03-19 05:15Z,2024-03-19 06:15+01:00,86,HB-JXR,4B1A2B,,,
4,W6 2315,Expected,IsOperator,False,LHBP,BUD,Budapest,2024-03-19 05:10Z,2024-03-19 06:10+01:00,[Basic],...,W6,WZZ,,,,,,2B,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,AF 1734,Expected,IsOperator,False,LFPG,CDG,Paris,2024-03-19 08:45Z,2024-03-19 09:45+01:00,[Basic],...,AF,AFR,,,,,,2G,,
72,EC 5148,Expected,IsCodeshared,False,LFPG,CDG,Paris,,,[],...,EC,EJU,,,,,,,,
73,U2 5148,Expected,IsOperator,False,LFPG,CDG,Paris,2024-03-19 08:45Z,2024-03-19 09:45+01:00,[Basic],...,U2,EZY,,,,,,2B,,
74,FR 133,Expected,IsOperator,False,LEBL,BCN,Barcelona,2024-03-19 08:15Z,2024-03-19 09:15+01:00,"[Basic, Live]",...,FR,RYR,2024-03-19 08:15Z,2024-03-19 09:15+01:00,,,,2,,


In [21]:
from pytz import timezone
from datetime import datetime,timedelta
icao = "EDDB" # Searching for flights for Berlin Brandenburg airport
berlin_timezone = timezone('Europe/Berlin')
today=datetime.now(berlin_timezone).date()
tomorrow= (today + timedelta(1))
times = [["00:00","11:59"],["12:00","23:59"]]
# The AeroDataBox API only returns flight data for 12 hours. 
# To get data for a whole day, we used two requests in a for loop. 
for time in times:
    url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

    querystring = {"withLeg":"true",
                "direction":"Arrival",
                "withCancelled":"false",
                "withCodeshared":"true",
                "withCargo":"false",
                "withPrivate":"false"}

    headers = {
        'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
        'x-rapidapi-key': "14a44098c8mshe4536a007985112p1e3b4bjsn8fd805eb6bd4" #api_key
        }

    response = requests.request("GET",
                                url,
                                headers = headers,
                                params = querystring)

    flights_json = response.json()

In [22]:
flights_json

{'arrivals': [{'departure': {'airport': {'icao': 'EKCH',
     'iata': 'CPH',
     'name': 'Copenhagen'},
    'scheduledTime': {'utc': '2024-03-19 10:00Z',
     'local': '2024-03-19 11:00+01:00'},
    'revisedTime': {'utc': '2024-03-19 10:00Z',
     'local': '2024-03-19 11:00+01:00'},
    'terminal': 'erminal2',
    'quality': ['Basic', 'Live']},
   'arrival': {'scheduledTime': {'utc': '2024-03-19 11:00Z',
     'local': '2024-03-19 12:00+01:00'},
    'revisedTime': {'utc': '2024-03-19 11:00Z',
     'local': '2024-03-19 12:00+01:00'},
    'terminal': '2',
    'gate': 'Z30',
    'baggageBelt': 'C2',
    'quality': ['Basic', 'Live']},
   'number': 'D8 3302',
   'status': 'Expected',
   'codeshareStatus': 'IsOperator',
   'isCargo': False,
   'aircraft': {'model': 'Boeing 737 MAX 8 '},
   'airline': {'name': 'Norwegian Air Sweden', 'iata': 'D8', 'icao': 'NSZ'}},
  {'departure': {'airport': {'icao': 'BIKF',
     'iata': 'KEF',
     'name': 'Reykjavik'},
    'scheduledTime': {'utc': '2024-03-

In [23]:
pd.json_normalize(flights_json["arrivals"])

Unnamed: 0,number,status,codeshareStatus,isCargo,departure.airport.icao,departure.airport.iata,departure.airport.name,departure.scheduledTime.utc,departure.scheduledTime.local,departure.revisedTime.utc,...,arrival.quality,aircraft.model,airline.name,airline.iata,airline.icao,departure.checkInDesk,departure.gate,aircraft.reg,aircraft.modeS,callSign
0,D8 3302,Expected,IsOperator,False,EKCH,CPH,Copenhagen,2024-03-19 10:00Z,2024-03-19 11:00+01:00,2024-03-19 10:00Z,...,"[Basic, Live]",Boeing 737 MAX 8,Norwegian Air Sweden,D8,NSZ,,,,,
1,FI 528,Expected,IsOperator,False,BIKF,KEF,Reykjavik,2024-03-19 07:30Z,2024-03-19 07:30+00:00,2024-03-19 07:30Z,...,"[Basic, Live]",Boeing 737 MAX 8,Icelandair,FI,ICE,,,,,
2,LH 2206,Expected,IsOperator,False,EDDM,MUC,Munich,2024-03-19 09:50Z,2024-03-19 10:50+01:00,2024-03-19 09:50Z,...,"[Basic, Live]",Airbus A320-200 (sharklets),Lufthansa,LH,DLH,,,,,
3,EC 5064,Expected,IsCodeshared,False,LIML,LIN,Milan,,,,...,"[Basic, Live]",Airbus A319,EasyJet Europe,EC,EJU,,,,,
4,FR 2419,Expected,IsOperator,False,GCTS,TFS,Tenerife Island,2024-03-19 06:00Z,2024-03-19 06:00+00:00,2024-03-19 06:00Z,...,"[Basic, Live]",Boeing 737-800 (winglets),Ryanair,FR,RYR,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,U2 5186,Expected,IsOperator,False,LSGG,GVA,Geneva,2024-03-19 19:55Z,2024-03-19 20:55+01:00,,...,"[Basic, Live]",Airbus A320-200,easyJet,U2,EZY,,,,,
146,FR 9165,Expected,IsOperator,False,LIPH,TSF,Treviso,2024-03-19 20:10Z,2024-03-19 21:10+01:00,,...,"[Basic, Live]",Boeing 737 MAX 8,Ryanair,FR,RYR,,,,,
147,FR 176,Expected,IsOperator,False,EGSS,STN,London,2024-03-19 20:10Z,2024-03-19 20:10+00:00,2024-03-19 20:10Z,...,"[Basic, Live]",Boeing 737 MAX 8,Ryanair,FR,RYR,,,,,
148,I2 3674,Expected,IsOperator,False,LEMD,MAD,Madrid,2024-03-19 19:00Z,2024-03-19 20:00+01:00,2024-03-19 19:00Z,...,"[Basic, Live]",Airbus A320,Iberia Express,I2,IBS,,,,,


In [24]:
def get_arrival_flights_info(list_icao):
    
    # Initializing the Dataframes
    arrivals_df=[]
    all_arrivals_df=[]

    # We need to have data about flights for the next day
    berlin_timezone = timezone('Europe/Berlin')
    today=datetime.now(berlin_timezone).date()
    tomorrow= (today + timedelta(1))
    # The AeroDataBox API only returns flight data for 12 hours. 
    # To get data for a whole day, we used two requests in a for loop. 
    times = [["00:00","11:59"],["12:00","23:59"]]

    for icao in list_icao: # For each airport ICAO in the list
      
      for time in times: # For each 12 hours in the day
        # The sections that form the URL were generated by the flights API
        url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}" # URL fo the flights API

        querystring = {"withLeg":"true",
          "direction":"Arrival", # We only need Arrival flights
          "withCancelled":"false",
          "withCodeshared":"true",
          "withCargo":"false",
          "withPrivate":"false"}
        
        headers = {
          "X-RapidAPI-Key": API_key_flights,
          "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
        }
        response = requests.get(url, headers=headers, params=querystring) # Send the request
        # In this function, we used json_normalize rather than using dictionaries as in the previous functions
        # JSON normalize JSON data into a Dataframe
        arrivals_df=pd.json_normalize(response.json()["arrivals"]) # Add information about arrivals to the arrivals_df
        # Add the arrival airport ICAO and the time when data were collected to our dataframe since this information is not included in the JSON response
        arrivals_df["arrival_airport_icao"]=icao
        arrivals_df["data_collected_time"]=datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")
        
      
        # Concat all the generated Dataframe to our flights Dataframe
        all_arrivals_df.append(arrivals_df)
        all_flights_arrivals=pd.concat(all_arrivals_df, ignore_index=True)

        # Extract only the needed columns
        all_flights_arrivals = all_flights_arrivals.loc[:,["number","departure.airport.icao", "arrival.scheduledTime.local","arrival.terminal","arrival_airport_icao","data_collected_time"]]
        # Rename the columns
        all_flights_arrivals = all_flights_arrivals.rename(columns={"number": "flight_number", "departure.airport.icao": "departure_airport_icao", "arrival.scheduledTime.local": "scheduled_arrival_time","arrival.terminal":"arrival_terminal"})
        # Data cleaning for the scheduled_arrival_time
        all_flights_arrivals["scheduled_arrival_time"] = all_flights_arrivals["scheduled_arrival_time"].str[:-6]
        # Change the type of scheduled_arrival_time and data_collected_time
        all_flights_arrivals["scheduled_arrival_time"] = pd.to_datetime(all_flights_arrivals["scheduled_arrival_time"])
        all_flights_arrivals["data_collected_time"] = pd.to_datetime(all_flights_arrivals["data_collected_time"])
        
    return all_flights_arrivals


In [25]:
all_flights_arrivals_df=get_arrival_flights_info(airport_infos_df["airport_icao"])

In [26]:
all_flights_arrivals_df

Unnamed: 0,flight_number,departure_airport_icao,scheduled_arrival_time,arrival_terminal,arrival_airport_icao,data_collected_time
0,QR 79,OTHH,2024-03-19 06:50:00,1,EDDB,2024-03-18 16:05:32
1,FR 2600,EVRA,2024-03-19 07:35:00,2,EDDB,2024-03-18 16:05:32
2,DS 1185,LFSB,2024-03-19 07:45:00,1,EDDB,2024-03-18 16:05:32
3,U2 1185,LFSB,2024-03-19 07:45:00,1,EDDB,2024-03-18 16:05:32
4,W6 2315,LHBP,2024-03-19 07:45:00,2,EDDB,2024-03-18 16:05:32
...,...,...,...,...,...,...
807,EW 2465,EGLL,2024-03-19 21:10:00,1,EDDS,2024-03-18 16:05:34
808,EW 2785,LHBP,2024-03-19 21:35:00,1,EDDS,2024-03-18 16:05:34
809,KL 1879,EHAM,2024-03-19 22:30:00,3,EDDS,2024-03-18 16:05:34
810,LH 136,EDDF,2024-03-19 22:40:00,1,EDDS,2024-03-18 16:05:34


In [27]:
all_flights_arrivals_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 812 entries, 0 to 811
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   flight_number           812 non-null    object        
 1   departure_airport_icao  808 non-null    object        
 2   scheduled_arrival_time  812 non-null    datetime64[ns]
 3   arrival_terminal        809 non-null    object        
 4   arrival_airport_icao    812 non-null    object        
 5   data_collected_time     812 non-null    datetime64[ns]
dtypes: datetime64[ns](2), object(4)
memory usage: 38.2+ KB


In [28]:
all_flights_arrivals_df.to_sql('flights',
                if_exists='append',
                con=connection_string,
                index=False)

812

In [29]:
flight_infos_from_sql = pd.read_sql("flights", con=connection_string)
print(flight_infos_from_sql)

     flight_id flight_number departure_airport_icao scheduled_arrival_time  \
0            1         QR 79                   OTHH    2024-03-19 06:50:00   
1            2       FR 2600                   EVRA    2024-03-19 07:35:00   
2            3       DS 1185                   LFSB    2024-03-19 07:45:00   
3            4       U2 1185                   LFSB    2024-03-19 07:45:00   
4            5       W6 2315                   LHBP    2024-03-19 07:45:00   
..         ...           ...                    ...                    ...   
807        808       EW 2465                   EGLL    2024-03-19 21:10:00   
808        809       EW 2785                   LHBP    2024-03-19 21:35:00   
809        810       KL 1879                   EHAM    2024-03-19 22:30:00   
810        811        LH 136                   EDDF    2024-03-19 22:40:00   
811        812       LH 2152                   EDDM    2024-03-19 22:35:00   

    arrival_terminal arrival_airport_icao data_collected_time  