In [5]:

import pandas as pd
import plotly as p
import plotly.express as px 
import plotly.io as pio
import plotly.graph_objects as go
import numpy as np
pio.templates.default = "plotly_dark"

In [60]:
delays = pd.read_csv('..\\raw_data\\Airline_Delay_Cause_2003_2022.csv')

delays = delays[delays['airport'] == 'SEA']

delays['carrier_cause'] = round((delays['carrier_ct']/delays['arr_del15'])*100, 4)
delays['weather_cause'] = round((delays['weather_ct']/delays['arr_del15'])*100, 4)
delays['nas_cause'] = round((delays['nas_ct']/delays['arr_del15'])*100, 4)
delays['security_cause'] = round((delays['security_ct']/delays['arr_del15'])*100, 4)
delays['late_aircraft_cause'] = round((delays['late_aircraft_ct']/delays['arr_del15'])*100, 4)
delays['perc_total_delays'] = round((delays['arr_del15']/delays['arr_flights']),4)



delays.head(10)

Unnamed: 0,year,month,carrier,carrier_name,airport,airport_name,arr_flights,arr_del15,carrier_ct,weather_ct,...,weather_delay,nas_delay,security_delay,late_aircraft_delay,carrier_cause,weather_cause,nas_cause,security_cause,late_aircraft_cause,perc_total_delays
192,2022,11,AA,American Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",452.0,100.0,33.06,2.81,...,181.0,1364.0,41.0,1650.0,33.06,2.81,38.24,0.91,24.98,0.2212
280,2022,11,AS,Alaska Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",5816.0,1062.0,234.86,24.55,...,2668.0,19985.0,170.0,28663.0,22.1149,2.3117,43.6073,0.2731,31.693,0.1826
345,2022,11,B6,JetBlue Airways,SEA,"Seattle, WA: Seattle/Tacoma International",64.0,21.0,11.66,0.0,...,0.0,157.0,0.0,456.0,55.5238,0.0,21.5238,0.0,22.9048,0.3281
471,2022,11,DL,Delta Air Lines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",2603.0,326.0,104.33,7.26,...,860.0,4145.0,0.0,10697.0,32.0031,2.227,37.3742,0.0,28.3957,0.1252
564,2022,11,F9,Frontier Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",60.0,22.0,7.88,0.0,...,0.0,460.0,0.0,211.0,35.8182,0.0,47.0455,0.0,17.1364,0.3667
719,2022,11,HA,Hawaiian Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",63.0,27.0,20.8,0.0,...,0.0,0.0,0.0,387.0,77.037,0.0,0.0,0.0,22.963,0.4286
912,2022,11,NK,Spirit Air Lines,SEA,"Seattle, WA: Seattle/Tacoma International",89.0,25.0,8.88,0.0,...,0.0,900.0,19.0,37.0,35.52,0.0,56.48,1.8,6.2,0.2809
1213,2022,11,OO,SkyWest Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",1646.0,276.0,163.0,85.0,...,10561.0,1137.0,142.0,1246.0,59.058,30.7971,3.9855,1.087,5.0725,0.1677
1283,2022,11,QX,Horizon Air,SEA,"Seattle, WA: Seattle/Tacoma International",1915.0,454.0,87.96,34.96,...,3538.0,8234.0,0.0,9308.0,19.3744,7.7004,42.7709,0.0,30.1542,0.2371
1389,2022,11,UA,United Air Lines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",671.0,115.0,32.43,0.0,...,0.0,2365.0,0.0,3488.0,28.2,0.0,39.1043,0.0,32.6957,0.1714


In [61]:
# First, we'll create a pie chart breaking down the delays by cause

# Since we're only looking at delays, we'll ignore cancellations and diverted flights
# we'll also drop the columns that denote the cause of delay in minutes

pie_delays = delays.drop(['arr_cancelled', 'arr_diverted',
                     'arr_delay', 'carrier_delay', 'weather_delay',
                     'nas_delay', 'security_delay', 'late_aircraft_delay',
                     'carrier_cause', 'weather_cause', 'nas_cause',
                     'security_cause', 'late_aircraft_cause', 'perc_total_delays'], axis=1)

# We'll rename our columns to make them more interpretable

pie_delays = pie_delays.rename(columns = {'carrier_ct' : 'Air Carrier Delay', 
                                  'weather_ct' : 'Weather Delay',
                                 'nas_ct' : 'National Aviation System Delay',
                                 'security_ct' : 'Security Delay',
                                 'late_aircraft_ct' : 'Aircraft Arriving Late'})


pie_delays.head()

Unnamed: 0,year,month,carrier,carrier_name,airport,airport_name,arr_flights,arr_del15,Air Carrier Delay,Weather Delay,National Aviation System Delay,Security Delay,Aircraft Arriving Late
192,2022,11,AA,American Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",452.0,100.0,33.06,2.81,38.24,0.91,24.98
280,2022,11,AS,Alaska Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",5816.0,1062.0,234.86,24.55,463.11,2.9,336.58
345,2022,11,B6,JetBlue Airways,SEA,"Seattle, WA: Seattle/Tacoma International",64.0,21.0,11.66,0.0,4.52,0.0,4.81
471,2022,11,DL,Delta Air Lines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",2603.0,326.0,104.33,7.26,121.84,0.0,92.57
564,2022,11,F9,Frontier Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",60.0,22.0,7.88,0.0,10.35,0.0,3.77


In [64]:
# Creating tables for ranking of carrier
sea_delays = delays[delays['airport'] == 'SEA']

test = sea_delays.sort_values(['carrier_name', 
                               'perc_total_delays']).groupby('carrier_name').head()

test = test.groupby(['carrier_name']).mean().reset_index()

test['Delay Rank'] = test['perc_total_delays'].rank(ascending=False)
test = test.loc[:, ['carrier_name', 'perc_total_delays', 
                    'Delay Rank']].sort_values('Delay Rank', ascending = True)

test['perc_total_delays'] = round(test['perc_total_delays']*100,
                                 4)

test = test.rename(columns = {'carrier_name' : 'Carrier', 
                              'perc_total_delays' : 'Percent of Total Delays (%)',
                             'Delay Rank' : 'Rank'})

fig = go.Figure(data=[go.Table(
    header=dict(values=list(test.columns),
                #fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[test.Carrier, test['Percent of Total Delays (%)'], test.Rank],
               #fill_color='lavender',
               align='left')),
])

fig.update_layout(title='Ranking of Airline Carriers by Delay (2003 - 2022)')
fig.show()

In [13]:

delays_reshape = pd.melt(pie_delays, 
                                  id_vars = ['year'], 
                                  value_vars=['Air Carrier Delay', 'Weather Delay', 'National Aviation System Delay',
       'Security Delay', 'Aircraft Arriving Late'])


df = delays_reshape[['year', 'variable','value']].set_index(['year', 'variable'])['value']

# create a figure for each category
figs = {
    c: px.pie(df.loc[c].reset_index(), values="value", names="variable", 
              labels = {
                 'value':'Total Passengers', 
                 'variable':'Delay Cause'
             },
        title='Cause of Delay by Year',
        color_discrete_sequence=px.colors.qualitative.Dark24).update_traces(
        name=c, visible=False, 
    )
    for c in df.index.get_level_values("year").unique()
}

# integrate figures per category into one figure
defaultcat = df.index.get_level_values("year").unique()[0]
fig = figs[defaultcat].update_traces(visible=True)
for k in figs.keys():
    if k != defaultcat:
        fig.add_traces(figs[k].data)

# finally build dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": k,
                    "method": "update",
                    # list comprehension for which traces are visible
                    "args": [{"visible": [kk == k for kk in figs.keys()]},
                             {"title":go.layout.xaxis.Title(
            text=f"Denied Boardings by Airline Carrier <br><sup>{k} </sup>"
            )}],
                }
                for k in figs.keys()
            ]
        }
    ]
)


In [11]:
# We'll create a separate pie chart containing 
# the breakdown of the causes of the delays 


pie_delays = delays.drop(['arr_cancelled', 'arr_diverted',
                     'arr_del15', 'carrier_ct', 'weather_ct',
                     'nas_ct', 'security_ct', 'late_aircraft_ct',
                     'carrier_cause', 'weather_cause', 'nas_cause',
                     'security_cause', 'late_aircraft_cause', 'perc_total_delays'], axis=1)

# We'll rename our columns to make them more interpretable

pie_delays = pie_delays.rename(columns = {'carrier_delay' : 'Air Carrier Delay', 
                                  'weather_delay' : 'Weather Delay',
                                 'nas_delay' : 'National Aviation System Delay',
                                 'security_delay' : 'Security Delay',
                                 'late_aircraft_delay' : 'Aircraft Arriving Late'})

delays_reshape = pd.melt(pie_delays, 
                                  id_vars = ['year'], 
                                  value_vars=['Air Carrier Delay', 'Weather Delay', 'National Aviation System Delay',
       'Security Delay', 'Aircraft Arriving Late'])


df = delays_reshape[['year', 'variable','value']].set_index(['year', 'variable'])['value']

# create a figure for each category
figs = {
    c: px.pie(df.loc[c].reset_index(), values="value", names="variable", 
              labels = {
                 'value':'Total Passengers', 
                 'variable':'Delay Cause'
             },
        title='Cause of Delay by Yea (in minutes)',
        color_discrete_sequence=px.colors.qualitative.Dark24).update_traces(
        name=c, visible=False, 
    )
    for c in df.index.get_level_values("year").unique()
}

# integrate figures per category into one figure
defaultcat = df.index.get_level_values("year").unique()[0]
fig = figs[defaultcat].update_traces(visible=True)
for k in figs.keys():
    if k != defaultcat:
        fig.add_traces(figs[k].data)

# finally build dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": k,
                    "method": "update",
                    # list comprehension for which traces are visible
                    "args": [{"visible": [kk == k for kk in figs.keys()]},
                             {"title":go.layout.xaxis.Title(
            text=f"Denied Boardings by Airline Carrier <br><sup>{k} </sup>"
            )}],
                }
                for k in figs.keys()
            ]
        }
    ]
)


In [42]:
test = delays.sort_values('perc_total_delays', ascending=False)
test.head(10)

Unnamed: 0,year,month,carrier,carrier_name,airport,airport_name,arr_flights,arr_del15,carrier_ct,weather_ct,...,weather_delay,nas_delay,security_delay,late_aircraft_delay,carrier_cause,weather_cause,nas_cause,security_cause,late_aircraft_cause,perc_total_delays
272090,2006,10,YV,Mesa Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",4.0,3.0,1.0,0.0,...,0.0,34.0,0.0,0.0,33.3333,0.0,66.6667,0.0,0.0,0.75
301474,2005,1,B6,JetBlue Airways,SEA,"Seattle, WA: Seattle/Tacoma International",31.0,21.0,4.26,0.0,...,0.0,350.0,0.0,505.0,20.2857,0.0,40.4286,0.0,39.2857,0.6774
2412,2022,10,HA,Hawaiian Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",63.0,40.0,30.08,0.0,...,0.0,0.0,0.0,338.0,75.2,0.0,0.0,0.0,24.8,0.6349
93668,2018,2,HA,Hawaiian Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",56.0,35.0,29.13,0.0,...,0.0,21.0,0.0,300.0,83.2286,0.0,1.7143,0.0,15.0286,0.625
161942,2013,2,HA,Hawaiian Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",56.0,35.0,32.92,0.06,...,2.0,9.0,0.0,163.0,94.0571,0.1714,0.8,0.0,4.9429,0.625
145986,2014,2,F9,Frontier Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",90.0,55.0,8.3,0.0,...,0.0,1256.0,0.0,902.0,15.0909,0.0,63.2364,0.0,21.6727,0.6111
248153,2007,12,F9,Frontier Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",119.0,72.0,24.08,0.37,...,33.0,1623.0,0.0,635.0,33.4444,0.5139,51.7083,0.0,14.3333,0.605
103536,2017,5,HA,Hawaiian Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",62.0,37.0,27.41,0.0,...,0.0,35.0,0.0,454.0,74.0811,0.0,2.0541,0.0,23.8649,0.5968
92488,2018,3,HA,Hawaiian Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",62.0,37.0,24.4,0.0,...,0.0,8.0,19.0,459.0,65.9459,0.0,0.9459,3.4324,29.7027,0.5968
249017,2007,12,XE,ExpressJet Airlines Inc.,SEA,"Seattle, WA: Seattle/Tacoma International",88.0,51.0,14.64,0.0,...,0.0,693.0,0.0,956.0,28.7059,0.0,33.2941,0.0,38.0,0.5795


In [58]:
# Next, we'll breakdown the cause of delay by carrier for each year
# dir(px.colors.qualitative)

fig = px.bar(delays.sort_values('perc_total_delays', ascending=False), 
             x="carrier_name", y="perc_total_delays", 
                labels={
                    "carrier_name": "Airline Carrier",
                     "perc_total_delays":"Percent of Delays", 
                 },
                title="Percentage of Delays by Carrier", 
                color_discrete_sequence=px.colors.qualitative.Alphabet_r)
fig.update_xaxes(tickangle = 45)
fig.show()

In [64]:
# This next visualization will serve to provide the user 
# with the severity of each cause of delay over time by carrier

minute_delays = delays.drop(['arr_cancelled', 'arr_diverted',
                     'arr_del15', 'carrier_ct', 'weather_ct',
                     'nas_ct', 'security_ct', 'late_aircraft_ct',
                     'carrier_cause', 'weather_cause', 'nas_cause',
                     'security_cause', 'late_aircraft_cause', 'perc_total_delays'], axis=1)

minute_delays = minute_delays.rename(columns = {'carrier_delay' : 'Air Carrier Delay', 
                                  'weather_delay' : 'Weather Delay',
                                 'nas_delay' : 'National Aviation System Delay',
                                 'security_delay' : 'Security Delay',
                                 'late_aircraft_delay' : 'Aircraft Arriving Late'})

minute_reshape = pd.melt(minute_delays, 
                                  id_vars = ['year', 'carrier_name'], 
                                  value_vars=['Air Carrier Delay', 'Weather Delay', 'National Aviation System Delay',
       'Security Delay', 'Aircraft Arriving Late'])


df = minute_reshape[['year', 'carrier_name', 'variable','value']].set_index(['year', 'carrier_name', 'variable'])['value']

df.head(10)

year  carrier_name            variable         
2022  American Airlines Inc.  Air Carrier Delay     2663.0
      Alaska Airlines Inc.    Air Carrier Delay    16159.0
      JetBlue Airways         Air Carrier Delay      933.0
      Delta Air Lines Inc.    Air Carrier Delay    13111.0
      Frontier Airlines Inc.  Air Carrier Delay      416.0
      Hawaiian Airlines Inc.  Air Carrier Delay      702.0
      Spirit Air Lines        Air Carrier Delay      549.0
      SkyWest Airlines Inc.   Air Carrier Delay     9283.0
      Horizon Air             Air Carrier Delay     5251.0
      United Air Lines Inc.   Air Carrier Delay     2043.0
Name: value, dtype: float64

In [69]:
# create a figure for each category
figs = {
    c: px.bar(df.loc[c].reset_index(), x="carrier_name", y="value", 
              labels = {
                 'value':'Delay in minutes', 
                 'carrier_name':'Aircraft Carriers'
             },
        title='Severity of Delay by Carrier',
        color_discrete_sequence=px.colors.qualitative.Alphabet_r).update_traces(
        name=c, visible=False, 
    )
    for c in df.index.get_level_values("year").unique()
}

# integrate figures per category into one figure - sanity test
defaultcat = df.index.get_level_values("variable").unique()[0]
fig = figs[defaultcat].update_traces(visible=True)
for k in figs.keys():
    if k != defaultcat:
        fig.add_traces(figs[k].data)

# finally build dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": k,
                    "method": "update",
                    # list comprehension for which traces are visible
                    "args": [{"visible": [kk == k for kk in figs.keys()]},
                             {"title":go.layout.xaxis.Title(
            text=f"Length of Delay by Cause <br><sup>{k} </sup>"
            )}],
                }
                for k in figs.keys()
            ]
        }
    ]
)


KeyError: 'Air Carrier Delay'

In [67]:
defaultcat = df.index.get_level_values("variable").unique()
defaultcat

Index(['Air Carrier Delay', 'Weather Delay', 'National Aviation System Delay',
       'Security Delay', 'Aircraft Arriving Late'],
      dtype='object', name='variable')

In [None]:
# Create two visualizations: pie chart for minutes & line/bar plot on minutes for each carrier

