In [11]:
from download_day import download_day
from load_saved_fd4 import load_saved_fd4
from scrape_aircraft_list import scrape_aircraft_list
from get_perf_model_typecodes import get_perf_model_typecodes 
from match_icao_model import match_icao_model
from process_airport_list import process_airport_list
import pandas as pd
from time import sleep

In [12]:
# User Inputs:
start_time_str       = '2024-01-01T00:00:00Z'
stop_time_str        = '2024-12-31T23:59:59Z'
query_limit          = 30e4
send_notification    = True
make_plot            = False
output_dir           = "/scratch/omg28/Data/no_track_2024"

Section 1. Download flight information from Opensky

In [None]:
# Download data from OpenSky history database
download_day(start_time_str, stop_time_str, query_limit, send_notification, make_plot, output_dir)

In [None]:
# load the saved day of data from a .pkl file to a pandas dataframe
loaded_day = load_saved_fd4(stop_time_str, output_dir, query_limit)

Section 2. Scrape the EUCONTROL database for aircraft flight performance information

In [None]:
"""
# scrape the list of aircraft with performance models from the EUCONTROL website
perf_model_typecodes = get_perf_model_typecodes()
## WARNING: THIS CAN GET YOUR IP ADDRESS BLOCKED IF YOU RUN IT REPEATEDLY. IT IS A WEB SCRAPER.
aircraft_list = scrape_aircraft_list(perf_model_typecodes)

"""

TypeError: scrape_aircraft_list() missing 1 required positional argument: 'typecodes'

Section 3. Process the aircraft performance and flight data, retaining only flights we have takeoff, landing, plane type, and aircraft performance model information about. Further, drop all flights with aircraft codes that do not correspond to a code in the ourairports registry.

In [None]:
# process the loaded time period of data.
# - matches 24 bit transponder codes from Feb 2025 aircraft metadata database with the ICAO typecode
# - removes all flights for which the transponder code does not have a corresponding ICAO typecode
# - removes all flights for which the takeoff OR landing airport is unknown
all_2024_flights_with_typecode = match_icao_model(start_time_str, stop_time_str, query_limit)


NameError: name 'start_time_str' is not defined

In [5]:
# Load in the list of ICAO typecodes that have a performance model
all_2024_flights_with_typecode = pd.read_pickle('/scratch/omg28/Data/aircraftdb/2024-01-01_to_2024-12-31_300000.pkl')
# load in the list of all flights with typecode aand takeoff/landing airport from 2024
performance_model_typecodes = pd.read_pickle('/scratch/omg28/Data/aircraftdb/performance_models_typecodes.pkl')

# get info before the merge
print("Before merging:")
print(all_2024_flights_with_typecode.info())

# inner join the loaded year of data with the scraped aircraft list
# - this will remove all flights for which the ICAO typecode does not have a corresponding performance model
all_2024_flights_with_perf_model_and_typecode = pd.merge(all_2024_flights_with_typecode, performance_model_typecodes, how='inner', on = 'typecode')

print("After merging:")
print(all_2024_flights_with_perf_model_and_typecode.head())


Before merging:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24461374 entries, 0 to 24461373
Data columns (total 4 columns):
 #   Column               Dtype          
---  ------               -----          
 0   icao24               object         
 1   estdepartureairport  string[pyarrow]
 2   estarrivalairport    string[pyarrow]
 3   typecode             object         
dtypes: object(2), string[pyarrow](2)
memory usage: 746.5+ MB
None
After merging:
   icao24 estdepartureairport estarrivalairport typecode
0  a01f01                KPIE              KPTK     C680
1  a08174                GA20              KCRG     C182
2  a08174                KCRG              K09J     C182
3  a08174                KCRG              KCRG     C182
4  a08174                KCRG              KCRG     C182


In [8]:

# print a list of typecodes in complete_flights_perf_model that do not have a performance model
missing_perf_models = all_2024_flights_with_typecode[~all_2024_flights_with_typecode['typecode'].isin(performance_model_typecodes['typecode'])]
print("# of planes with observed flights that exist, but no performance model:" + str(len(missing_perf_models['typecode'].unique())))
print("total flights in 2024 with TOLD and typecode, but no performance model: " + str(len(missing_perf_models)))


NameError: name 'all_2024_flights_with_typecode' is not defined

In [7]:

# print a list of typecodes in performance_model_typecodes that do not correspond to a flight
missing_flights = performance_model_typecodes[~performance_model_typecodes['typecode'].isin(all_2024_flights_with_typecode['typecode'])]
print("# of performance models with no flights:" + str(len(missing_flights['typecode'].unique())))
print("total flights in 2024 with TOLD, typecode, and performance model: " + str(len(all_2024_flights_with_perf_model_and_typecode)))


NameError: name 'performance_model_typecodes' is not defined

In [6]:
# count all flights starting and ending at the same airport
num_flights_no_dist = len(all_2024_flights_with_perf_model_and_typecode[all_2024_flights_with_perf_model_and_typecode['estdepartureairport'] == all_2024_flights_with_perf_model_and_typecode['estarrivalairport']])
print("Number of flights with no distance: " + str(num_flights_no_dist))

# remove all flights starting or ending at the same airport
all_2024_flights_with_perf_model_and_typecode = all_2024_flights_with_perf_model_and_typecode[all_2024_flights_with_perf_model_and_typecode['estdepartureairport'] != all_2024_flights_with_perf_model_and_typecode['estarrivalairport']]


NameError: name 'all_2024_flights_with_perf_model_and_typecode' is not defined

In [5]:
# load the takeoff and landing airport location information to the dataframe.
all_airports = process_airport_list()

print(all_airports.info())

# add the estimated departure airport location information to the dataframe
# - this will remove all flights for which the takeoff OR landing airport is unknown
all_2024_flights_filtered = pd.merge(all_2024_flights_with_perf_model_and_typecode, all_airports, how='inner', left_on = 'estdepartureairport', right_on = 'ident')
all_2024_flights_filtered.rename(columns = {'latitude_deg': 'estdeparturelat', 'longitude_deg': 'estdeparturelong', 'elevation_ft': 'estdeparturealt_ft'}, inplace=True)
print(all_2024_flights_filtered.info())

# add the estimated arrival airport location information to the dataframe

Saved processed airports to /scratch/omg28/Data/airportdb/processed_airports.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82907 entries, 0 to 82906
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             82907 non-null  int64  
 1   ident          82907 non-null  object 
 2   latitude_deg   82907 non-null  float64
 3   longitude_deg  82907 non-null  float64
 4   elevation_ft   68440 non-null  float64
 5   gps_code       43202 non-null  object 
dtypes: float64(3), int64(1), object(2)
memory usage: 3.8+ MB
None


NameError: name 'all_2024_flights_with_perf_model_and_typecode' is not defined