Project by:

- Jack Chen 4427737
- Joost Litjes 4540700
- Felicia Hung 7568479

In [1]:
import os

import pandas as pd
import numpy as np

import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

import dash
from dash import dcc, html
from dash.dependencies import Input, Output

In [2]:
db = pd.read_csv("airlinedelaycauses_DelayedFlights_Filtered.csv")
db.head()

Unnamed: 0,Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,...,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
0,2008,1,3,4,1829.0,1755,1959.0,1925,WN,3920,...,3.0,10.0,0,N,0,2.0,0.0,0.0,0.0,32.0
1,2008,1,3,4,1937.0,1830,2037.0,1940,WN,509,...,3.0,7.0,0,N,0,10.0,0.0,0.0,0.0,47.0
2,2008,1,3,4,1644.0,1510,1845.0,1725,WN,1333,...,6.0,8.0,0,N,0,8.0,0.0,0.0,0.0,72.0
3,2008,1,3,4,1452.0,1425,1640.0,1625,WN,675,...,7.0,8.0,0,N,0,3.0,0.0,0.0,0.0,12.0
4,2008,1,3,4,1323.0,1255,1526.0,1510,WN,4,...,4.0,9.0,0,N,0,0.0,0.0,0.0,0.0,16.0


In [3]:
# Create a Dash app
app = dash.Dash(__name__)

# Calculate the average delay by carrier and delay type
avg_delays = db.groupby(['UniqueCarrier'])[['CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']].mean().reset_index()

# Create the bar chart for overall average arrival delay
delayed_carriers = db.groupby('UniqueCarrier')[['ArrDelay']].mean().reset_index()
delayed_carriers = delayed_carriers.sort_values(by='ArrDelay', ascending=False)

fig1 = px.bar(
    delayed_carriers,
    x='UniqueCarrier',
    y='ArrDelay',
    title='Carriers with Highest Average Arrival Delay'
)
fig1.update_xaxes(title='Carrier')
fig1.update_yaxes(title='Average Arrival Delay (minutes)')

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the dashboard
app.layout = html.Div([
    dcc.Graph(id='bar-chart', figure=fig1),
    dcc.Graph(id='delay-causes-chart')
])

# Define callback to update the delay causes chart
@app.callback(
    Output('delay-causes-chart', 'figure'),
    [Input('bar-chart', 'clickData')]
)
def update_delay_causes_chart(clickData):
    if clickData is None:
        return px.bar(title='Click on a carrier to see delay causes')
    else:
        selected_carrier = clickData['points'][0]['x']
        carrier_data = avg_delays[avg_delays['UniqueCarrier'] == selected_carrier]
        carrier_data = carrier_data[['CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']]
        carrier_data = carrier_data.mean().reset_index()
        carrier_data.columns = ['Cause', 'Average Delay']
        
        fig2 = px.bar(
            carrier_data,
            x='Cause',
            y='Average Delay',
            title=f'Delay Causes for Carrier: {selected_carrier}'
        )
        fig2.update_xaxes(title='Delay Cause')
        fig2.update_yaxes(title='Average Delay (minutes)')
        
        return fig2

if __name__ == '__main__':
    app.run_server(debug=True)


In [4]:
# Create a Dash app
app = dash.Dash(__name__)

# Calculate the average delay by carrier and delay type
db['DepHour'] = db['DepTime'].astype(str).str[:-4].str.zfill(2)
db['ArrHour'] = db['ArrTime'].astype(str).str[:-4].str.zfill(2)

avg_delays_dep = db.groupby(['DepHour'])[['CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']].mean().reset_index()
avg_delays_arr = db.groupby(['ArrHour'])[['CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']].mean().reset_index()

# Create the bar chart for overall average arrival delay
delayed_carriers_dep = db.groupby('DepHour')[['ArrDelay']].mean().reset_index()
delayed_carriers_arr = db.groupby('ArrHour')[['ArrDelay']].mean().reset_index()

fig1_dep = px.bar(
    delayed_carriers_dep,
    x='DepHour',
    y='ArrDelay',
    title='Departure Hours and their Average Arrival Delay'
)
fig1_dep.update_xaxes(title='Departure Hour')
fig1_dep.update_yaxes(title='Average Arrival Delay (minutes)')

fig1_arr = px.bar(
    delayed_carriers_arr,
    x='ArrHour',
    y='ArrDelay',
    title='Arrival Hours and their Average Arrival Delay'
)
fig1_arr.update_xaxes(title='Arrival Hour')
fig1_arr.update_yaxes(title='Average Arrival Delay (minutes)')

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the dashboard
app.layout = html.Div([
    dcc.Graph(id='bar-chart', figure=fig1_dep),  # Default to Departure Hour
    dcc.Dropdown(
        id='hour-dropdown',
        options=[
            {'label': 'Departure Hour', 'value': 'DepHour'},
            {'label': 'Arrival Hour', 'value': 'ArrHour'}
        ],
        value='DepHour',  # Default selection is 'Departure Hour'
        style={'width': '50%'}
    ),
    dcc.Graph(id='delay-causes-chart')
])

# Define callback to update both charts
@app.callback(
    [Output('bar-chart', 'figure'), Output('delay-causes-chart', 'figure')],
    [Input('bar-chart', 'clickData'),
     Input('hour-dropdown', 'value')]
)
def update_charts(clickData, selected_hour_type):
    if clickData is None:
        if selected_hour_type == 'DepHour':
            delayed_carriers = delayed_carriers_dep
            title1 = 'Departure Hours and their Average Arrival Delay'
            xaxis_title1 = 'Departure Hour'
            title2 = 'Click on a Departure Hour to see delay causes'
        else:
            delayed_carriers = delayed_carriers_arr
            title1 = 'Arrival Hours and their Average Arrival Delay'
            xaxis_title1 = 'Arrival Hour'
            title2 = 'Click on an Arrival Hour to see delay causes'
        fig1 = px.bar(
            delayed_carriers,
            x=selected_hour_type,
            y='ArrDelay',
            title=title1
        )
        fig1.update_xaxes(title=xaxis_title1)
        fig1.update_yaxes(title='Average Arrival Delay (minutes)')

        fig2 = px.bar(title=title2)
        return fig1, fig2
    else:
        selected_hour = clickData['points'][0]['x']
        if selected_hour_type == 'DepHour':
            hour_data = avg_delays_dep[avg_delays_dep['DepHour'] == selected_hour]
        else:
            hour_data = avg_delays_arr[avg_delays_arr['ArrHour'] == selected_hour]
        hour_data = hour_data[['CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']]
        hour_data = hour_data.mean().reset_index()
        hour_data.columns = ['Cause', 'Average Delay']

        if selected_hour_type == 'DepHour':
            title2 = f'Delay Causes during Departure Hour: {selected_hour}'
        else:
            title2 = f'Delay Causes during Arrival Hour: {selected_hour}'
        fig2 = px.bar(
            hour_data,
            x='Cause',
            y='Average Delay',
            title=title2
        )
        fig2.update_xaxes(title='Delay Cause')
        fig2.update_yaxes(title='Average Delay (minutes)')

        return None, fig2

if __name__ == '__main__':
    app.run_server(debug=True)
