# Which airports experience the highest, the lowest, and most consistent delay rates?

In [2]:
import pandas as pd

In [3]:
url = "https://raw.githubusercontent.com/Bas-82/TIL6022-Research-project/main/datasets/AA_ATFM_Delay.xlsx"
df = pd.read_excel(url)

In [4]:

#We only focus on the years 2023 and 2024
df = df[df['YEAR'].isin([2023, 2024])]

#We create a top 10 of airports with the highest and the lowest number of delayed flights below
df['delayed'] = df['FLT_ARR_1_DLY'] > 0

delays_per_airport = df.groupby(['APT_ICAO', 'YEAR', 'APT_NAME'])['delayed'].sum().reset_index()
delays_per_airport = delays_per_airport.sort_values('delayed', ascending=False)

print("The top 10 airports with most delayed flights are:")
print(delays_per_airport.head(10).to_string(index=False))

print("\nThe top 10 airports with fewest delayed flights are:")
print(delays_per_airport.tail(10).to_string(index=False))

The top 10 airports with most delayed flights are:
APT_ICAO  YEAR                            APT_NAME  delayed
    LPPT  2023                              Lisbon      361
    EHAM  2024                Amsterdam - Schiphol      358
    LLBG  2023 Tel Aviv - Ben Gurion International      353
    LPPT  2024                              Lisbon      349
    EGLL  2024                   London - Heathrow      334
    LSZH  2024                              Zürich      312
    EHAM  2023                Amsterdam - Schiphol      302
    LEMD  2023                    Madrid - Barajas      284
    LGAV  2024                              Athens      275
    LFPO  2023                          Paris-Orly      270

The top 10 airports with fewest delayed flights are:
APT_ICAO  YEAR             APT_NAME  delayed
    GCHI  2024               Hierro        0
    GCLA  2023             La Palma        0
    GCLA  2024             La Palma        0
    GEML  2023              Melilla        0
    GEML  

Same dataframe but with average minutes

In [5]:
#We only focus on the years 2023 and 2024
df = df[df['YEAR'].isin([2023, 2024])]

#Now we create a top 10 of airports with the highest and the lowest amount of delay minnutes
df['delayed'] = df['FLT_ARR_1_DLY'] > 0

delay_minutes_per_airport = df.groupby(['APT_ICAO', 'APT_NAME'])['FLT_ARR_1_DLY'].sum().reset_index()
delay_minutes_per_airport = delay_minutes_per_airport.sort_values('FLT_ARR_1_DLY', ascending=False)

print("The top 10 airports with larrgest total amount of delay minutes:")
print(delay_minutes_per_airport.head(10))

print("\nAThe top 10 airports with smallest total amount of delay minutes:")
print(delay_minutes_per_airport.tail(10))


The top 10 airports with larrgest total amount of delay minutes:
    APT_ICAO                             APT_NAME  FLT_ARR_1_DLY
61      EHAM                 Amsterdam - Schiphol        93661.0
47      EGLL                    London - Heathrow        65314.0
355     LPPT                               Lisbon        58685.0
263     LGAV                               Athens        50419.0
44      EGKK                     London - Gatwick        38960.0
379     LTAI                              Antalya        27453.0
9       EDDF                            Frankfurt        27388.0
374     LSZH                               Zürich        26796.0
337     LLBG  Tel Aviv - Ben Gurion International        22209.0
167     LEMD                     Madrid - Barajas        18858.0

AThe top 10 airports with smallest total amount of delay minutes:
    APT_ICAO               APT_NAME  FLT_ARR_1_DLY
130     GMAD             Al Massira            0.0
282     LIBF     Foggia - Gino Lisa            0.0


In [6]:
#We calculate the standard deviation of the montly delays in each airport. This informs us about the consistency in delay rates. The lower the standard deviation, the more consistent the delay rates are.
monthly_delays = df.groupby(['APT_ICAO', 'APT_NAME', 'MONTH_NUM'])['delayed'].sum().reset_index()

stats_per_airport = monthly_delays.groupby(['APT_ICAO', 'APT_NAME'])['delayed'].agg(['mean', 'std']).reset_index()
most_consistent_airports = stats_per_airport.sort_values('std')

print("The top 10 airports with most consistent delayed flight rates are:")
print(most_consistent_airports.head(10))

print("\nThe top 10 airports with least consistent delayed flight rates are:")
print(most_consistent_airports.tail(10))

The top 10 airports with most consistent delayed flight rates are:
    APT_ICAO            APT_NAME  mean  std
400     UKLL  Lviv International   0.0  0.0
122     GCGM           La Gomera   0.0  0.0
123     GCHI              Hierro   0.0  0.0
124     GCLA            La Palma   0.0  0.0
295     LIEA    Alghero-Fertilia   0.0  0.0
292     LICR     Reggio Calabria   0.0  0.0
290     LICG         Pantelleria   0.0  0.0
129     GEML             Melilla   0.0  0.0
130     GMAD          Al Massira   0.0  0.0
298     LIMA    Torino-Aeritalia   0.0  0.0

The top 10 airports with least consistent delayed flight rates are:
    APT_ICAO              APT_NAME       mean        std
51      EGNT             Newcastle  29.083333  20.531388
263     LGAV                Athens  41.416667  22.932543
269     LGKR                 Corfu  17.000000  24.866919
379     LTAI               Antalya  31.333333  27.463474
264     LGIR             Heraklion  26.333333  27.818677
265     LGKF            Kefallinia  21

In [7]:
#We calculate the standard deviation of the montly minutes of delay in each airport. It is the same approach as in the previous code, but now with the minutes of delay as input data.
monthly_delay_minutes = df.groupby(['APT_ICAO', 'APT_NAME', 'MONTH_NUM'])['FLT_ARR_1_DLY'].sum().reset_index()

stats_per_airport_minutes = monthly_delay_minutes.groupby(['APT_ICAO', 'APT_NAME'])['FLT_ARR_1_DLY'].agg(['mean', 'std']).reset_index()
most_consistent_minutes = stats_per_airport_minutes.sort_values('std', ascending=False)

print("The top 10 airports with most consistent monthly delay minutes are:")
print(most_consistent_minutes.head(10).to_string(index=False))

print("\nThe top 10 airports with least consistent monthly delay minutes are:")
print(most_consistent_minutes.tail(10).to_string(index=False))

The top 10 airports with most consistent monthly delay minutes are:
APT_ICAO                            APT_NAME        mean         std
    LGAV                              Athens 4201.583333 3613.106884
    EGKK                    London - Gatwick 3246.666667 2469.632447
    LTAI                             Antalya 2287.750000 2340.475364
    EHAM                Amsterdam - Schiphol 7805.083333 2181.953064
    EKCH                Copenhagen - Kastrup 1549.500000 1896.159444
    EGLL                   London - Heathrow 5442.833333 1860.643722
    EDDF                           Frankfurt 2282.333333 1763.901890
    LEPA                   Palma de Mallorca 1284.833333 1190.161015
    LLBG Tel Aviv - Ben Gurion International 1850.750000 1183.080734
    LPPT                              Lisbon 4890.416667 1164.003238

The top 10 airports with least consistent monthly delay minutes are:
APT_ICAO                 APT_NAME  mean  std
    LICG              Pantelleria   0.0  0.0
    LBWN     

Suggestions on how to interpret "efficiency":

1. Based on delay rates: the smaller the delay rate, the higher efficiency
a. Proportion of flights
b. Proportion of time 

2. Based on capacity: the higher the amount of flights handled compared to capacity of the airport, the higher efficiency

3. Based on consistency: the more consistent teh delay rates are, the higher efficiency.

4. Based on the context/external factors: consider factors such as weather, infrastructure quality, airspace congestion, political choices, etc. 