## Collecting Flights data

In [None]:
#carga librerías para hacer peticiones y manejar json
# importa la clave de la api desde otro archivo
import requests
import json
from IPython.display import JSON
from keys import *

In [None]:
# hace la petición a la api 
# usa la clave para  autenticar y guarda la respuesta
url = "https://aerodatabox.p.rapidapi.com/flights/airports/icao/EGPD/2025-09-01T08:00/2025-09-01T20:00"

querystring = {"withLeg":"true","direction":"Arrival","withCancelled":"true","withCodeshared":"true","withCargo":"true","withPrivate":"true","withLocation":"false"}

headers = {
    'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
    'x-rapidapi-key': flights_key
    }

response = requests.request("GET", url, headers=headers, params=querystring)

In [None]:
# es un ciclo if si la respuesta fue exitosa guarda los datos en un archivo json
# si no existe el archivo lo crea y agrega los nuevos datos

if response.status_code == 200:
    new_data = response.json()

    try:
        with open("data.json", "r") as json_file:
            existing_data = json.load(json_file)
    except (FileNotFoundError, json.decoder.JSONDecodeError):
        existing_data = []

    existing_data.append(new_data)

    with open("data.json", "w") as json_file:
        json.dump(existing_data, json_file, indent=4)
        print("Data appended to data.json file.")
else:
    print("Failed to retrieve data from the API. Status code:", response.status_code)

Data appended to data.json file.


In [None]:
# recorre los vuelos que llegaron y accede a sus datos principales
flight_arrivals = response.json()
#accede a los dato como hora, número, aeropuerto, avion y aerolinea

for flight in flight_arrivals['arrivals']:
    flight_arrivals['arrivals'][0]['arrival']['scheduledTime']
    flight_arrivals['arrivals'][0]['number']
    flight_arrivals['arrivals'][0]['departure']['airport']['name']
    flight_arrivals['arrivals'][0]['aircraft']['model']
    flight_arrivals['arrivals'][0]['airline']['name']

In [None]:
#importa pandas para manejar tablas y numpy por si hay valores vacíos
import pandas as pd
from numpy import nan

#convierte la respuesta de la api en un diccionario de python y se guarda en flight_arrivals para poder trabajar con los datos

flight_arrivals = response.json()

def flight_extraction(flight):
    #la hora viene dentro de otro objeto llamado scheduledTime
    scheduled_time = flight['arrival'].get('scheduledTime', {})
    return {
        'scheduled_arrival_time_utc': scheduled_time.get('utc', 'N/A'),
        'scheduled_arrival_time_local': scheduled_time.get('local', 'N/A'),
        'flight_number': flight['number'],
        'from': flight['departure']['airport']['name'],
        'airline': flight['airline']['name'],
        'aircraft': flight['aircraft']['model']
    }

if 'arrivals' in flight_arrivals:
    #crea una lista con los datos de cada vuelo    
    flights_data = [flight_extraction(flight) for flight in flight_arrivals['arrivals']]
    
    #   convierte la lista en un dataframe
    flight_arrivals_df = pd.DataFrame(flights_data)
    
    #   muestra el tamaño y las primeras filas
    print("Shape of DataFrame:", flight_arrivals_df.shape)
    print("\nFirst few rows:")
    display(flight_arrivals_df.head())
else:
    print("No flight arrivals data found in the response")

    

Shape of DataFrame: (42, 6)

First few rows:


Unnamed: 0,scheduled_arrival_time_utc,scheduled_arrival_time_local,flight_number,from,airline,aircraft
0,2025-09-01 07:30Z,2025-09-01 08:30+01:00,U2 615,London,easyJet,Airbus A319
1,2025-09-01 07:20Z,2025-09-01 08:20+01:00,WF 392,Bergen,Wideroe,Bombardier Dash 8 Q400 / DHC-8-400
2,2025-09-01 07:30Z,2025-09-01 08:30+01:00,BA 1304,London,British,Airbus A320
3,2025-09-01 07:20Z,2025-09-01 08:20+01:00,LM 31,Orkney,Loganair,ATR 42-300
4,2025-09-01 07:40Z,2025-09-01 08:40+01:00,KL 911,Amsterdam,KLM,Embraer 190
