# Collecting Airports and Arrival flights info 
> **Aero Data box API**

In [36]:
import requests
import pandas as pd
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
from datetime import datetime, timedelta
import time

In [58]:
load_dotenv()

True

In [59]:
OpenWeatherAPIKey=os.environ['OpenWeatherAPIKey']
RapidAPIKey=os.environ['RapidAPIKey']
password_mysql=os.getenv('mysql_password')
my_user_mysql=os.getenv('username')
my_sql_port=os.getenv('my_port')
my_localhost=os.getenv('localhost')


In [54]:
schema = "gans"
host = my_localhost
user = my_user_mysql
password = password_mysql
port = my_sql_port
connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [24]:
additional_data= pd.read_sql("additional_data", con=connection_string) 
additional_data

Unnamed: 0,city_id,population,longitude,latitude,year_data_retrieved
0,1,3596999,13.0,53.0,2025-10-15 16:13:37
1,2,1964021,10.0,54.0,2025-10-15 16:13:38
2,3,1505005,12.0,48.0,2025-10-15 16:13:39


## Getting airports data

Using data from the **`cities`** table in Gans Database

In [55]:
cities_df_from_sql= pd.read_sql("cities", con=connection_string)
cities_df_from_sql

Unnamed: 0,city_id,city_name,country
0,1,Berlin,Germany
1,2,Hamburg,Germany
2,3,Munich,Germany


In [65]:
def airports(cities_df):
    all_airports = []  

    for _, row in cities_df.iterrows():  # Iterate through rows to get both city_name & city_id
        city = row["city_name"]
        city_id = row["city_id"]
        # Get city coordinates from OpenWeather API
        geo_url = f'http://api.openweathermap.org/geo/1.0/direct?q={city}&limit=1&appid={OpenWeatherAPIKey}'
        geo_json = requests.get(geo_url).json()

        if not geo_json:
            print(f"No coordinates found for {city}.")
            continue  # Skip to the next city

        lat, lon = geo_json[0]['lat'], geo_json[0]['lon']

        # AeroDataBox API configuration
        headers = {
            "X-RapidAPI-Key": RapidAPIKey,
            "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
        }

        querystring = {"withFlightInfoOnly": "true"}
        url = f"https://aerodatabox.p.rapidapi.com/airports/search/location/{lat}/{lon}/km/50/16"

        response = requests.get(url, headers=headers, params=querystring)

        if response.status_code != 200:
            print(f"Error fetching airports for {city}: {response.status_code}")
            continue  # Skip to the next city

        data = response.json()

        if "items" not in data or not data["items"]:
            print(f"No airports found near {city}.")
            continue  # Skip to the next city

        # Extract only needed fields
        for airport in data["items"]:
            airport_data = {
                "city_id": city_id,
                #"city_name": city, 
                "airport_name": airport.get("name", None), 
                "icao": airport.get("icao", None),
                "iata": airport.get("iata", None),
                "latitude": airport.get("location", {}).get("lat", None),
                "longitude": airport.get("location", {}).get("lon", None),
            }
            all_airports.append(airport_data)  # Append to list

    # Return DataFrame with selected columns
    return pd.DataFrame(all_airports)

In [66]:
airports_df= airports(cities_df_from_sql)
airports_df

Unnamed: 0,city_id,airport_name,icao,iata,latitude,longitude
0,1,Berlin -Tegel,EDDT,TXL,52.5597,13.287699
1,1,Berlin Brandenburg,EDDB,BER,52.35139,13.493889
2,2,Hamburg,EDDH,HAM,53.6304,9.988229
3,3,Munich,EDDM,MUC,48.3538,11.7861


In [40]:
airports_df.to_sql('airports',
                if_exists='append',
                con=connection_string,
                index=False)

4

In [41]:
pd.read_sql("airports", con=connection_string)

Unnamed: 0,city_id,airport_name,icao,iata,latitude,longitude
0,1,Berlin Brandenburg,EDDB,BER,52.0,13.0
1,2,Hamburg,EDDH,HAM,54.0,10.0
2,3,Munich,EDDM,MUC,48.0,12.0
3,1,Berlin -Tegel,EDDT,TXL,53.0,13.0


## Getting flights data 

Using data from the recently created **`airports`** table in Gans Database

In [16]:
airports_df=pd.read_sql("airports",con=connection_string)
airports_df

Unnamed: 0,city_id,airport_name,icao,iata,latitude,longitude
0,1,Berlin Brandenburg,EDDB,BER,52.0,13.0
1,2,Hamburg,EDDH,HAM,54.0,10.0
2,3,Munich,EDDM,MUC,48.0,12.0
3,1,Berlin -Tegel,EDDT,TXL,53.0,13.0


In [67]:
def arrival_flight_info(airports_df):

    tomorrow=(datetime.now()+timedelta(days=1)).strftime('%Y-%m-%d') 
    timeslots = [('00:00','11:59'),('12:00','23:59')]
    all_flights = []

    for i, airport in airports_df.iterrows():
        icao_code = airport["icao"]

        for start, end in timeslots:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao_code}/{tomorrow}T{start}/{tomorrow}T{end}"

            querystring = {"withLeg":"false",
                           "direction":"Both",
                           "withCancelled":"false",
                           "withCodeshared":"false",
                           "withCargo":"false",
                           "withPrivate":"false",
                           "withLocation":"false"}

            headers = {
                "x-rapidapi-key": RapidAPIKey,
                "x-rapidapi-host": "aerodatabox.p.rapidapi.com"
            }

            response = requests.get(url, headers=headers, params=querystring)

            if response.status_code != 200:
                print(f"Error {response.status_code} for airport {icao_code}")
                time.sleep(2)
                continue

            arrivals = response.json().get("arrivals", [])
            if not arrivals:
                print(f"No arrivals found for {icao_code}")
                time.sleep(2)
                continue

            for flight in arrivals:
                all_flights.append({
                    "icao": airport['icao'],
                    "scheduled_arrival_time": flight.get("movement", {}).get("scheduledTime",{}).get("local"),
                    "arrival_gate": flight.get("movement", {}).get("gate"),
                    "arrival_terminal": flight.get("movement", {}).get("terminal"),
                    "icao_departure_airport": flight.get("movement", {}).get("airport",{}).get("icao"),
                    "departure_airport_name": flight.get("movement", {}).get("airport",{}).get("name")                      
                })
    all_flights= pd.DataFrame(all_flights)
    all_flights['scheduled_arrival_time']=all_flights['scheduled_arrival_time'].str[:-6]
    return all_flights    
    #return pd.DataFrame(all_flights) #if all_flights else pd.DataFrame(
        #columns=["departure_airport", "departure_airport_name", "scheduled_arrival_time", "arrival_terminal", "airport_icao"])



In [68]:
arrival_flight_info(airports_df)

Error 204 for airport EDDT
Error 204 for airport EDDT


Unnamed: 0,icao,scheduled_arrival_time,arrival_gate,arrival_terminal,icao_departure_airport,departure_airport_name
0,EDDB,2025-10-16 06:30,N01,1,LTBJ,İzmir
1,EDDB,2025-10-16 06:45,Y07,1,LPCV,Cukurova Mersin
2,EDDB,2025-10-16 06:45,X03,1,ZBAA,Beijing
3,EDDB,2025-10-16 07:10,A30,1,LOWW,Vienna
4,EDDB,2025-10-16 07:15,Y17,1,KEWR,Newark
...,...,...,...,...,...,...
969,EDDM,2025-10-16 23:20,,1,GCLP,Gran Canaria Island
970,EDDM,2025-10-16 23:20,,2,EGLL,London
971,EDDM,2025-10-16 23:20,,2,LPPT,Lisbon
972,EDDM,2025-10-16 23:20,,1,GCTS,Tenerife Island


In [46]:
flights_df = arrival_flight_info(airports_df)

Error 204 for airport EDDT
Error 204 for airport EDDT


In [45]:
flights_df.to_sql('arrival_flights',
                if_exists='append',
                con=connection_string,
                index=False)

974