## Final Assignment
* Create a dashboard to monitor and report US domestic airline flights performance.
* The key report items:
  - Yearly airline performance report
      - For the chosen year provide:
        - Number of flights (each Month) under different cancellation categories using bar chart.
        - Average flight time by reporting airline using line chart.
        - Percentage of diverted airport landings per reporting airline using pie chart.
        - Number of flights flying from each state using choropleth map.
        - Number of flights flying to each state from each reporting airline using treemap chart.
        
  - Yearly average flight delay statistics
      - For the chosen year provide,
        - Monthly average carrier delay by reporting airline for the given year.
        - Monthly average weather delay by reporting airline for the given year.
        - Monthly average national air system delay by reporting airline for the given year.
        - Monthly average security delay by reporting airline for the given year.
        - Monthly average late aircraft delay by reporting airline for the given year.
  
  - NOTE: You have worked created the same dashboard components in Flight Delay Time Statistics Dashboard section. We will be reusing the same.
  - NOTE: Year range is between 2005 and 2020

In [1]:
# 1. import necessary libraries/packages
import pandas as pd 
import plotly.express as px

from jupyter_dash import JupyterDash
import dash
from dash import dcc, html
from dash.dependencies import Output, Input, State

In [2]:
# 2. Read the airline data into a dataframe

path = r'C:\Users\Wilson Wei\Downloads\Data analysis\8. Data Visualisation with Python\airline_data.csv'

airline_data = pd.read_csv(path, 
                           encoding = "ISO-8859-1",
                           dtype={'Div1Airport': str, 'Div1TailNum': str, # This sets the data type of the columns to str
                                  'Div2Airport': str, 'Div2TailNum': str}
                        )
#airline_data.drop('Unnamed: 0', axis=1, inplace=True)

# Create a list of years
year_list = list(range(2005, 2021))

In [3]:
airline_data.head(100)

Unnamed: 0.1,Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,IATA_CODE_Reporting_Airline,...,Div4WheelsOff,Div4TailNum,Div5Airport,Div5AirportID,Div5AirportSeqID,Div5WheelsOn,Div5TotalGTime,Div5LongestGTime,Div5WheelsOff,Div5TailNum
0,1295781,1998,2,4,2,4,1998-04-02,AS,19930,AS,...,,,,,,,,,,
1,1125375,2013,2,5,13,1,2013-05-13,EV,20366,EV,...,,,,,,,,,,
2,118824,1993,3,9,25,6,1993-09-25,UA,19977,UA,...,,,,,,,,,,
3,634825,1994,4,11,12,6,1994-11-12,HP,19991,HP,...,,,,,,,,,,
4,1888125,2017,3,8,17,4,2017-08-17,UA,19977,UA,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,435399,2016,4,10,7,5,2016-10-07,AA,19805,AA,...,,,,,,,,,,
96,1512179,2005,4,11,8,2,2005-11-08,NW,19386,NW,...,,,,,,,,,,
97,105654,1995,4,11,22,3,1995-11-22,US,20355,US,...,,,,,,,,,,
98,1434824,1988,1,2,22,1,1988-02-22,WN,19393,WN,...,,,,,,,,,,


In [4]:
airline_data['DivAirportLandings'].describe()

count    10108.000000
mean         0.003067
std          0.100904
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          9.000000
Name: DivAirportLandings, dtype: float64

In [5]:
pd.set_option('display.max_columns', None)
airline_data.columns.tolist()

['Unnamed: 0',
 'Year',
 'Quarter',
 'Month',
 'DayofMonth',
 'DayOfWeek',
 'FlightDate',
 'Reporting_Airline',
 'DOT_ID_Reporting_Airline',
 'IATA_CODE_Reporting_Airline',
 'Tail_Number',
 'Flight_Number_Reporting_Airline',
 'OriginAirportID',
 'OriginAirportSeqID',
 'OriginCityMarketID',
 'Origin',
 'OriginCityName',
 'OriginState',
 'OriginStateFips',
 'OriginStateName',
 'OriginWac',
 'DestAirportID',
 'DestAirportSeqID',
 'DestCityMarketID',
 'Dest',
 'DestCityName',
 'DestState',
 'DestStateFips',
 'DestStateName',
 'DestWac',
 'CRSDepTime',
 'DepTime',
 'DepDelay',
 'DepDelayMinutes',
 'DepDel15',
 'DepartureDelayGroups',
 'DepTimeBlk',
 'TaxiOut',
 'WheelsOff',
 'WheelsOn',
 'TaxiIn',
 'CRSArrTime',
 'ArrTime',
 'ArrDelay',
 'ArrDelayMinutes',
 'ArrDel15',
 'ArrivalDelayGroups',
 'ArrTimeBlk',
 'Cancelled',
 'CancellationCode',
 'Diverted',
 'CRSElapsedTime',
 'ActualElapsedTime',
 'AirTime',
 'Flights',
 'Distance',
 'DistanceGroup',
 'CarrierDelay',
 'WeatherDelay',
 'NASDela

In [10]:
# 3. Create a dash application

app = JupyterDash(__name__)

# 4. Create a structure for the layout of the application

app.layout = html.Div([html.H1('US Domestic Airline Flights Performance',
                               style={'textAlign':'center', 'color':'#503D36', 'font-size':24}),
                       # Add a division that contains of two divisions, one for the header of the dropdown (Performance report), and the other for the dropdown options
                       html.Br(),
                       html.Div([html.Div([
                                           html.H2('Report Type:',
                                           style={'margin-right': '2em'}
                                                  )
                                          ]),
                                 dcc.Dropdown(id='report-options',
                                              placeholder='Select a report type',
                                              options=[{'label':'Yearly Airline Performance Report', 'value':'Yearly Airline Performance Report'},
                                                       {'label':'Yearly Airline Delay Report', 'value':'Yearly Airline Delay Report'}],
                                              value=None,
                                              style={'width':'80%', 'padding':'3px', 'font-size': '20px', 'text-align-last' : 'center'}
                                             )                                          
                                ], style={'display': 'flex'}
                               ),
                       
                       # Add a 2nd division consisting of two divisions, one for the header of the dropdown (Year), and the other for the dropdown options
                       html.Div([html.Div([
                                           html.H2('Choose Year:',
                                           style={'margin-right': '2em'}
                                                  )
                                          ]),                      
                                 dcc.Dropdown(id='year-options', 
                                              placeholder='Select a year',
                                              options=[{'label': i, 'value': i} for i in year_list],
                                              value=None,
                                              style={'width':'80%', 'padding':'3px', 'font-size': '20px', 'text-align-last' : 'center'}
                                             )                                           
                                ], style={'display': 'flex'}
                               ), 
                       
                       # Add a 3rd division containing one chart
                       html.Div([], id='plot1',
                                style={'width': '100%', 'display': 'inline-block', 'vertical-align': 'middle'}),
                       
                       # Add a 4th division cosisting of two divisions, both for placing charts
                       html.Div([
                                 html.Div([], id='plot2'),
                                 html.Div([], id='plot3'),                                                    
                                ], 
                                 style={'display':'flex'}
                               ),                       

                       # Add a 5th division comprised of two divisions, both for placing charts (for the callback function)
                       html.Div([
                                 html.Div([], id='plot4'),
                                 html.Div([], id='plot5'),                                                    
                                ], 
                                 style={'display':'flex'}
                               )
                       
                      ])

# Define 1st function that selects data and return the dataframe for creating 5 charts for 'Yearly Airline Performance Report' according to the selected year.
# This function will be used for obtaining dataframes at the begining of the callback function

def performance(df, selected_year): 
    #df1 = airline_data[airline_data['Year']==int(selected_year)]
    df1=df
    # plot1 - Number of flights (each Month) under different cancellation categories using bar chart.
    bar_data = df1[['Month', 'CancellationCode', 'Flights']].groupby(['Month', 'CancellationCode'], as_index=False).sum()
    bar_plot1 = px.bar(bar_data, x='Month', y='Flights', color='CancellationCode', title='Number of Flight Cancellation in Year ' + str(selected_year))
   
    # plot2 - Average flight time by reporting airline using line chart.
    line_data = df1[['Month', 'Reporting_Airline', 'AirTime']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    line_plot2 = px.line(line_data, x='Month', y='AirTime', color='Reporting_Airline', title='Monthly Average Flight Time (mins) by Airline in Year ' + str(selected_year))
    
    # plot3 - Percentage of diverted airport landings per reporting airline using pie chart.
    pie_data = df1[df1['DivAirportLandings']!=int(0.0)]
    pie_plot3 = px.pie(pie_data, values='Flights', names='Reporting_Airline', title='Percentage of Diverted Airport Landings by Airline in Year ' + str(selected_year))
    
    # plot4 - Number of flights (Yearly) flying from each state using choropleth map.
    choropleth_data = df1[['OriginState', 'Flights']].groupby(['OriginState'], as_index=False).sum()
    choropleth_plot4 = px.choropleth(choropleth_data, 
                                     locations='OriginState',
                                     color='Flights',
                                     hover_data=['OriginState', 'Flights'],
                                     locationmode='USA-states', # To use the USA States geometry, set locationmode='USA-states' and provide locations as two-letter state abbreviations
                                     color_continuous_scale='GnBu', # Assign a specific colors to marks corresponding with specific values
                                     range_color=[0, choropleth_data['Flights'].max()] # If provided, overrides auto-scaling on the continuous color scale
                                     )
    choropleth_plot4.update_layout(title_text='Number of flights from origin state in Year ' + str(selected_year),
                                   geo_scope='usa') # Limit map scope to USA only
    
    # plot5 - Number of flights 'flying to each state' from each reporting airline using treemap chart.
    tree_data = df1[['DestState', 'Reporting_Airline', 'Flights']].groupby(['DestState','Reporting_Airline'], as_index=False).sum()
    tree_plot5 = px.treemap(tree_data,
                            path=['DestState','Reporting_Airline'],
                            values='Flights', # Values from 'values' column or array_like are used to set values associated to sectors(squares in the treemap).
                            color='Flights', # Values from 'color'  column or array_like are used to assign colour to marks.
                            color_continuous_scale='RdBu',
                            title='Number of Flights by Airline to each Destination State in Year ' + str(selected_year))                    
     
    return [tree_plot5, pie_plot3, choropleth_plot4, bar_plot1, line_plot2] # The order was changed by the questions

# Define 2nd function that selects data and return the dataframe for creating 5 charts for 'Yearly Airline Delay Report' according to the selected year.

def delay(df, selected_year):
    #df2 = airline_data[airline_data['Year']==int(selected_year)]
    df2=df
    # plot1
    carrier_data = df2[['Month', 'Reporting_Airline', 'CarrierDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    carrier_plot1 = px.line(carrier_data, x='Month', y='CarrierDelay', color='Reporting_Airline', title='Mean Carrier delay time (mins) by Airline in Year ' + str(selected_year))
    
    # plot2
    weather_data = df2[['Month', 'Reporting_Airline', 'WeatherDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    weather_plot2 = px.line(weather_data, x='Month', y='WeatherDelay', color='Reporting_Airline', title='Mean Weather delay time (mins) by Airline in Year ' + str(selected_year))
    
    # plot3
    nas_data = df2[['Month', 'Reporting_Airline', 'NASDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    nas_plot3 = px.line(nas_data, x='Month', y='NASDelay', color='Reporting_Airline', title='Mean National Air System delay time (mins) by Airline in Year ' + str(selected_year))

    # plot4
    security_data = df2[['Month', 'Reporting_Airline', 'SecurityDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()    
    security_plot4 = px.line(security_data, x='Month', y='SecurityDelay', color='Reporting_Airline', title='Mean Security delay time (mins) by Airline in Year ' + str(selected_year))
    
    # plot5
    late_data = df2[['Month', 'Reporting_Airline', 'LateAircraftDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()     
    late_plot5 = px.line(late_data, x='Month', y='LateAircraftDelay', color='Reporting_Airline', title='Mean Late Aircraft delay time (mins) by Airline in Year ' + str(selected_year))
    
    return [carrier_plot1, weather_plot2, nas_plot3, security_plot4, late_plot5]

# 5. Add the @app.callback decorator
@app.callback([Output('plot1', 'children'), Output('plot2', 'children'), Output('plot3', 'children'), Output('plot4', 'children'), Output('plot5', 'children')],
              [Input('report-options', 'value'), Input('year-options', 'value')],
              
            
               

              # Use State to hold outputs until options for the inputs have been selected
              # Remember to import 'State' from dash.dependencies
              [State('plot1', 'children'), State('plot2', 'children'), State('plot3', 'children'), State('plot4', 'children'), State('plot5', 'children')])

            

# Define the callback function

def get_graph(selected_report, selected_year, children1, children2, children3, children4, children5):
    
    # The bracket is in the wrong place, which causes KeyError
    df = airline_data[airline_data['Year']==int(selected_year)]
    
    # Use if statement to get the graphs depending on the selected report
    if selected_report == 'Yearly Airline Performance Report':
        
        #df = airline_data[airline_data['Year']==int(selected_year)]
        # Call the performance function
        [bar_plot1, line_plot2, pie_plot3, choropleth_plot4, tree_plot5] = performance(df, selected_year)
        
        return [dcc.Graph(figure=bar_plot1),
                dcc.Graph(figure=line_plot2),
                dcc.Graph(figure=pie_plot3),
                dcc.Graph(figure=choropleth_plot4),
                dcc.Graph(figure=tree_plot5)]
        
        
    elif selected_report == 'Yearly Airline Delay Report':
        
        #df = airline_data[airline_data['Year']==selected_year]
        # Call the delay function
        [carrier_plot1, weather_plot2, nas_plot3, security_plot4, late_plot5] = delay(df, selected_year)
        
        return [dcc.Graph(figure=carrier_plot1),
                dcc.Graph(figure=weather_plot2),
                dcc.Graph(figure=nas_plot3),
                dcc.Graph(figure=security_plot4),
                dcc.Graph(figure=late_plot5)]
    
    else:
        return dash.no_update
    
# 6. Run the application
if __name__ == '__main__':
    app.run_server(mode='external')

Dash app running on http://127.0.0.1:8050/


# Test
- Use a while loop to hold outputs until both report type and year are selected.
- prevent_initial_call=True to prevent the callback function to be triggered at the start of the application.

In [14]:
# 3. Create a dash application

app = JupyterDash(__name__)

# 4. Create a structure for the layout of the application

app.layout = html.Div([html.H1('US Domestic Airline Flights Performance',
                               style={'textAlign':'center', 'color':'#503D36', 'font-size':24}),
                       # Add a division that contains of two divisions, one for the header of the dropdown (Performance report), and the other for the dropdown options
                       html.Br(),
                       html.Div([html.Div([
                                           html.H2('Report Type:',
                                           style={'margin-right': '2em'}
                                                  )
                                          ]),
                                 dcc.Dropdown(id='report-options',
                                              placeholder='Select a report type',
                                              options=[{'label':'Yearly Airline Performance Report', 'value':'Yearly Airline Performance Report'},
                                                       {'label':'Yearly Airline Delay Report', 'value':'Yearly Airline Delay Report'}],
                                              value=None,
                                              style={'width':'80%', 'padding':'3px', 'font-size': '20px', 'text-align-last' : 'center'}
                                             )                                          
                                ], style={'display': 'flex'}
                               ),
                       
                       # Add a 2nd division consisting of two divisions, one for the header of the dropdown (Year), and the other for the dropdown options
                       html.Div([html.Div([
                                           html.H2('Choose Year:',
                                           style={'margin-right': '2em'}
                                                  )
                                          ]),                      
                                 dcc.Dropdown(id='year-options', 
                                              placeholder='Select a year',
                                              options=[{'label': i, 'value': i} for i in year_list],
                                              value=None,
                                              style={'width':'80%', 'padding':'3px', 'font-size': '20px', 'text-align-last' : 'center'}
                                             )                                           
                                ], style={'display': 'flex'}
                               ), 
                       
                       # Add a 3rd division containing one chart
                       html.Div([], id='plot1',
                                style={'width': '100%', 'display': 'inline-block', 'vertical-align': 'middle'}),
                       
                       # Add a 4th division cosisting of two divisions, both for placing charts
                       html.Div([
                                 html.Div([], id='plot2'),
                                 html.Div([], id='plot3'),                                                    
                                ], 
                                 style={'display':'flex'}
                               ),                       

                       # Add a 5th division comprised of two divisions, both for placing charts (for the callback function)
                       html.Div([
                                 html.Div([], id='plot4'),
                                 html.Div([], id='plot5'),                                                    
                                ], 
                                 style={'display':'flex'}
                               )
                       
                      ])

# Define 1st function that selects data and return the dataframe for creating 5 charts for 'Yearly Airline Performance Report' according to the selected year.
# This function will be used for obtaining dataframes at the begining of the callback function

def performance(df, selected_year): 
    #df1 = airline_data[airline_data['Year']==int(selected_year)]
    df1=df
    # plot1 - Number of flights (each Month) under different cancellation categories using bar chart.
    bar_data = df1[['Month', 'CancellationCode', 'Flights']].groupby(['Month', 'CancellationCode'], as_index=False).sum()
    bar_plot1 = px.bar(bar_data, x='Month', y='Flights', color='CancellationCode', title='Number of Flight Cancellation in Year ' + str(selected_year))
   
    # plot2 - Average flight time by reporting airline using line chart.
    line_data = df1[['Month', 'Reporting_Airline', 'AirTime']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    line_plot2 = px.line(line_data, x='Month', y='AirTime', color='Reporting_Airline', title='Monthly Average Flight Time (mins) by Airline in Year ' + str(selected_year))
    
    # plot3 - Percentage of diverted airport landings per reporting airline using pie chart.
    pie_data = df1[df1['DivAirportLandings']!=int(0.0)]
    pie_plot3 = px.pie(pie_data, values='Flights', names='Reporting_Airline', title='Percentage of Diverted Airport Landings by Airline in Year ' + str(selected_year))
    
    # plot4 - Number of flights (Yearly) flying from each state using choropleth map.
    choropleth_data = df1[['OriginState', 'Flights']].groupby(['OriginState'], as_index=False).sum()
    choropleth_plot4 = px.choropleth(choropleth_data, 
                                     locations='OriginState',
                                     color='Flights',
                                     hover_data=['OriginState', 'Flights'],
                                     locationmode='USA-states', # To use the USA States geometry, set locationmode='USA-states' and provide locations as two-letter state abbreviations
                                     color_continuous_scale='GnBu', # Assign a specific colors to marks corresponding with specific values
                                     range_color=[0, choropleth_data['Flights'].max()] # If provided, overrides auto-scaling on the continuous color scale
                                     )
    choropleth_plot4.update_layout(title_text='Number of flights from origin state in Year ' + str(selected_year),
                                   geo_scope='usa') # Limit map scope to USA only
    
    # plot5 - Number of flights 'flying to each state' from each reporting airline using treemap chart.
    tree_data = df1[['DestState', 'Reporting_Airline', 'Flights']].groupby(['DestState','Reporting_Airline'], as_index=False).sum()
    tree_plot5 = px.treemap(tree_data,
                            path=['DestState','Reporting_Airline'],
                            values='Flights', # Values from 'values' column or array_like are used to set values associated to sectors(squares in the treemap).
                            color='Flights', # Values from 'color'  column or array_like are used to assign colour to marks.
                            color_continuous_scale='RdBu',
                            title='Number of Flights by Airline to each Destination State in Year ' + str(selected_year))                    
     
    return [tree_plot5, pie_plot3, choropleth_plot4, bar_plot1, line_plot2] # The order was changed by the questions

# Define 2nd function that selects data and return the dataframe for creating 5 charts for 'Yearly Airline Delay Report' according to the selected year.

def delay(df, selected_year):
    #df2 = airline_data[airline_data['Year']==int(selected_year)]
    df2=df
    # plot1
    carrier_data = df2[['Month', 'Reporting_Airline', 'CarrierDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    carrier_plot1 = px.line(carrier_data, x='Month', y='CarrierDelay', color='Reporting_Airline', title='Mean Carrier delay time (mins) by Airline in Year ' + str(selected_year))
    
    # plot2
    weather_data = df2[['Month', 'Reporting_Airline', 'WeatherDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    weather_plot2 = px.line(weather_data, x='Month', y='WeatherDelay', color='Reporting_Airline', title='Mean Weather delay time (mins) by Airline in Year ' + str(selected_year))
    
    # plot3
    nas_data = df2[['Month', 'Reporting_Airline', 'NASDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()
    nas_plot3 = px.line(nas_data, x='Month', y='NASDelay', color='Reporting_Airline', title='Mean National Air System delay time (mins) by Airline in Year ' + str(selected_year))

    # plot4
    security_data = df2[['Month', 'Reporting_Airline', 'SecurityDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()    
    security_plot4 = px.line(security_data, x='Month', y='SecurityDelay', color='Reporting_Airline', title='Mean Security delay time (mins) by Airline in Year ' + str(selected_year))
    
    # plot5
    late_data = df2[['Month', 'Reporting_Airline', 'LateAircraftDelay']].groupby(['Month', 'Reporting_Airline'], as_index=False).mean()     
    late_plot5 = px.line(late_data, x='Month', y='LateAircraftDelay', color='Reporting_Airline', title='Mean Late Aircraft delay time (mins) by Airline in Year ' + str(selected_year))
    
    return [carrier_plot1, weather_plot2, nas_plot3, security_plot4, late_plot5]

# 5. Add the @app.callback decorator
@app.callback([Output('plot1', 'children'),
               Output('plot2', 'children'),
               Output('plot3', 'children'),
               Output('plot4', 'children'),
               Output('plot5', 'children')],
              [Input('report-options', 'value'),
               Input('year-options', 'value')],
              prevent_initial_call=True)
                        

              # Use State to hold outputs until options for the inputs have been selected
              # Remember to import 'State' from dash.dependencies
              #[State('plot1', 'children'), State('plot2', 'children'), State('plot3', 'children'), State('plot4', 'children'), State('plot5', 'children')])

            

# Define the callback function

def get_graph(selected_report, selected_year):
    
    while selected_report!=None and selected_year!=None:
        # The bracket is in the wrong place, which causes KeyError
        df = airline_data[airline_data['Year']==int(selected_year)]
    
        # Use if statement to get the graphs depending on the selected report
        if selected_report == 'Yearly Airline Performance Report':
        
            #df = airline_data[airline_data['Year']==int(selected_year)]
            # Call the performance function
            [bar_plot1, line_plot2, pie_plot3, choropleth_plot4, tree_plot5] = performance(df, selected_year)
        
            return [dcc.Graph(figure=bar_plot1),
                    dcc.Graph(figure=line_plot2),
                    dcc.Graph(figure=pie_plot3),
                    dcc.Graph(figure=choropleth_plot4),
                    dcc.Graph(figure=tree_plot5)]
        
        
        elif selected_report == 'Yearly Airline Delay Report':
        
            #df = airline_data[airline_data['Year']==selected_year]
            # Call the delay function
            [carrier_plot1, weather_plot2, nas_plot3, security_plot4, late_plot5] = delay(df, selected_year)
            
            return [dcc.Graph(figure=carrier_plot1),
                    dcc.Graph(figure=weather_plot2),
                    dcc.Graph(figure=nas_plot3),
                    dcc.Graph(figure=security_plot4),
                    dcc.Graph(figure=late_plot5)]
        
        else:
            return dash.no_update
    
# 6. Run the application
if __name__ == '__main__':
    # Adding dev_tools_ui=False, dev_tools_props_check=False can prevent error appearing before calling callback function
    app.run_server(mode='jupyterlab', debug=False, dev_tools_ui=False, dev_tools_props_check=False)