In [1]:
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import plotly.express as px
import webbrowser

import webbrowser
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Updated imports for Dash 2.x
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
from IPython.display import display, Javascript

import webbrowser
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px


In [2]:
df1 = pd.read_csv('airlines_group1.csv')
df2 = pd.read_csv('airlines_group2.csv')

In [3]:
df1.rename(columns={'origin': 'airport_origin'}, inplace=True)
df2.rename(columns={'origin': 'airport_origin'}, inplace=True)

In [4]:
# Clean column names by stripping any extra spaces
df1.columns = df1.columns.str.strip()
df2.columns = df2.columns.str.strip()

# Convert 'date' column to datetime format
df1['date'] = pd.to_datetime(df1['date'])
df2['date'] = pd.to_datetime(df2['date'])

# Filter data based on date range for all airlines (2020-10-19 to 2023-04-23)
df1_filtered = df1[(df1['date'] >= '2020-10-19') & (df1['date'] <= '2023-04-23')]
df2_filtered = df2[(df2['date'] >= '2020-10-19') & (df2['date'] <= '2023-04-23')]

In [5]:
# Group by date and airline to sum arrival and departure delays
delays_df1 = df1_filtered.groupby(['date', 'airline'])[['arr_delay', 'dep_delay', 'airport_origin']].sum().reset_index()
delays_df2 = df2_filtered.groupby(['date', 'airline'])[['arr_delay', 'dep_delay', 'airport_origin']].sum().reset_index()

In [6]:
delays_df1 

Unnamed: 0,date,airline,arr_delay,dep_delay,airport_origin
0,2020-10-19,American Airlines,-7043.0,2292.0,"El Paso, TXHouston, TXDallas/Fort Worth, TXMon..."
1,2020-10-19,Frontier Airlines,-1314.0,-25.0,"Denver, COFresno, CADenver, COFort Myers, FLDe..."
2,2020-10-19,Hawaiian Airlines,-375.0,-248.0,"Honolulu, HILos Angeles, CALas Vegas, NVSan Fr..."
3,2020-10-19,JetBlue Airways,-2238.0,-451.0,"Boston, MAOrlando, FLWashington, DCBoston, MAR..."
4,2020-10-19,United Airlines,-16466.0,984.0,"Portland, MEHouston, TXCharlotte, NCIndianapol..."
...,...,...,...,...,...
4580,2023-04-23,American Airlines,27882.0,54128.0,"Phoenix, AZDallas/Fort Worth, TXRoswell, NMLar..."
4581,2023-04-23,Frontier Airlines,4050.0,5278.0,"Denver, COSan Francisco, CAOrlando, FLCincinna..."
4582,2023-04-23,Hawaiian Airlines,6768.0,6723.0,"Los Angeles, CAHonolulu, HILos Angeles, CAHono..."
4583,2023-04-23,JetBlue Airways,14590.0,18495.0,"New York, NYJacksonville, FLJacksonville, FLFo..."


In [7]:
# Start the app
app = dash.Dash(__name__, suppress_callback_exceptions=True)

In [8]:
# Update airline and airport dropdowns based on selected group
@app.callback(
    [Output("airline-dropdown", "options"),
     Output("airport-dropdown", "options")],
    [Input("group-select", "value"),
     Input("airline-dropdown", "value")]  # Added Input for the airline dropdown
)
def update_dropdowns(group, selected_airline):
    # Select the appropriate dataset based on the group
    df = df1_filtered if group == "group1" else df2_filtered

    # Update airline options based on the selected group
    airline_options = [{"label": airline, "value": airline} for airline in sorted(df["airline"].unique())]

    # If an airline is selected, filter airports based on the selected airline
    if selected_airline:
        filtered_airports = df[df['airline'] == selected_airline]['airport_origin'].unique()
    else:
        filtered_airports = df['airport_origin'].unique()

    # Update airport options based on the filtered airports
    airport_options = [{"label": airport, "value": airport} for airport in sorted(filtered_airports)]

    return airline_options, airport_options


In [9]:
@app.callback(
    Output('arrival-departure-output', 'children'),
    [Input('airline-dropdown', 'value'),
     Input('airport-dropdown', 'value'),
     Input('date-range', 'start_date'),
     Input('date-range', 'end_date'),
     Input('trend-line', 'value'),
     Input('flight-type', 'value'),
     Input('group-select', 'value')]  # Group selection added here
)
def update_arrival_departure_graph(selected_airline, selected_airport, start_date, end_date, trend_line_type, flight_type, selected_group):
    if not selected_airline or not start_date or not end_date:
        return html.Div("Please select all filters (Airline, Airport, Date Range).")
    
    # Select the appropriate dataset based on the group
    if selected_group == "group1":
        df_filtered = delays_df1
    else:
        df_filtered = delays_df2
    
    # Filter the data based on selected values
    df_filtered = df_filtered[
        (df_filtered['date'] >= start_date) &
        (df_filtered['date'] <= end_date) &
        (df_filtered['airline'] == selected_airline)
    ]
    
    # If an airport is selected, filter based on airport as well
    if selected_airport:
        # Check if selected_airport is part of the 'airport_origin' string
        df_filtered = df_filtered[df_filtered['airport_origin'].str.contains(selected_airport, case=False, na=False)]

    # Check if the filtered DataFrame is empty
    if df_filtered.empty:
        return html.Div("No data available for the selected filters.")

    # Create the plotly figure
    fig = go.Figure()

    # Plot based on flight type (arrival or departure)
    if flight_type == "arrivals":
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=df_filtered['arr_delay'],
                                 mode='lines', name='Arrival Delay', line=dict(color='blue')))
    else:
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=df_filtered['dep_delay'],
                                 mode='lines', name='Departure Delay', line=dict(color='orange')))
    
    # Create trend lines based on selected trend type (linear or polynomial)
    if trend_line_type == "linear":
        x = np.arange(len(df_filtered)).reshape(-1, 1)
        # Linear regression for trend line
        trend_model = LinearRegression().fit(x, df_filtered['arr_delay'])
        trend_line = trend_model.predict(x)
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=trend_line, mode='lines', name='Linear Trend Line', line=dict(color='red')))
    else:  # Polynomial regression for trend line
        x = np.arange(len(df_filtered)).reshape(-1, 1)
        poly_features = PolynomialFeatures(degree=2)
        X_poly = poly_features.fit_transform(x)
        trend_model = LinearRegression().fit(X_poly, df_filtered['arr_delay'])
        trend_line = trend_model.predict(X_poly)
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=trend_line, mode='lines', name='Polynomial Trend Line', line=dict(color='purple')))
    
    fig.update_layout(
        title=f"Arrival and Departure Delays for {selected_airline} {f'at {selected_airport}' if selected_airport else ''}",
        xaxis_title="Date",
        yaxis_title="Delay (minutes)",
        template="plotly"  # Light background, not "plotly_dark"
    )

    return dcc.Graph(figure=fig)


In [10]:
@app.callback(
    Output('trends-graph', 'children'),
    [Input('airline-dropdown', 'value'),
     Input('airport-dropdown', 'value'),
     Input('date-range', 'start_date'),
     Input('date-range', 'end_date'),
     Input('trend-line', 'value'),
     Input('flight-type', 'value'),
     Input('group-select', 'value')]  # Group selection added here
)
def update_trends_graph(selected_airline, selected_airport, start_date, end_date, trend_line_type, flight_type, selected_group):
    if not selected_airline or not start_date or not end_date:
        return html.Div("Please select all filters (Airline, Airport, Date Range).")
    
    # Select the appropriate dataset based on the group
    if selected_group == "group1":
        df_filtered = delays_df1
    else:
        df_filtered = delays_df2
    
    # Filter the data based on selected values
    df_filtered = df_filtered[
        (df_filtered['date'] >= start_date) &
        (df_filtered['date'] <= end_date) &
        (df_filtered['airline'] == selected_airline)
    ]
    
    # If an airport is selected, filter based on airport as well
    if selected_airport:
        df_filtered = df_filtered[df_filtered['airport_origin'].str.contains(selected_airport, case=False, na=False)]

    # Check if the filtered DataFrame is empty
    if df_filtered.empty:
        return html.Div("No data available for the selected filters.")

    # Create the plotly figure
    fig = go.Figure()

    # Plot based on flight type (arrival or departure)
    if flight_type == "arrivals":
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=df_filtered['arr_delay'],
                                 mode='lines', name='Actual Arrival Delay', line=dict(color='blue')))
    else:
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=df_filtered['dep_delay'],
                                 mode='lines', name='Actual Departure Delay', line=dict(color='orange')))
    
    # Create trend lines based on selected trend type (linear or polynomial)
    if trend_line_type == "linear":
        x = np.arange(len(df_filtered)).reshape(-1, 1)
        # Linear regression for trend line
        trend_model = LinearRegression().fit(x, df_filtered['arr_delay'] if flight_type == "arrivals" else df_filtered['dep_delay'])
        trend_line = trend_model.predict(x)
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=trend_line, mode='lines', name='Linear Trend Line', line=dict(color='red' if flight_type == "arrivals" else 'purple')))

        # Confidence Interval for Linear (Larger scale and more transparency)
        confidence_interval = 1.96 * np.std(df_filtered['arr_delay'] if flight_type == "arrivals" else df_filtered['dep_delay']) / np.sqrt(len(df_filtered))  # 95% Confidence
        upper_bound = trend_line + confidence_interval
        lower_bound = trend_line - confidence_interval
        
        # Choose colors based on flight type
        if flight_type == "arrivals":
            confidence_color = 'rgba(255, 255, 0, 0.2)'  # Yellow Transparent
        else:
            confidence_color = 'rgba(169, 169, 169, 0.2)'  # Grey Transparent
        
        # Adding a single confidence interval as a shaded area with thicker line width
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=upper_bound,
                                 fill='tonexty', line=dict(color=confidence_color, width=2), name="Confidence Interval"))

    else:  # Polynomial regression for trend line
        x = np.arange(len(df_filtered)).reshape(-1, 1)
        poly_features = PolynomialFeatures(degree=3)
        X_poly = poly_features.fit_transform(x)
        trend_model = LinearRegression().fit(X_poly, df_filtered['arr_delay'] if flight_type == "arrivals" else df_filtered['dep_delay'])
        trend_line = trend_model.predict(X_poly)
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=trend_line, mode='lines', name='Polynomial Trend Line', line=dict(color='purple')))

        # Confidence Interval for Polynomial Regression
        # Using the same approach, compute the confidence interval for the polynomial model
        poly_confidence_interval = 1.96 * np.std(df_filtered['arr_delay'] if flight_type == "arrivals" else df_filtered['dep_delay']) / np.sqrt(len(df_filtered))
        upper_bound_poly = trend_line + poly_confidence_interval
        lower_bound_poly = trend_line - poly_confidence_interval

        # Choose colors based on flight type
        if flight_type == "arrivals":
            confidence_color = 'rgba(255, 255, 0, 0.2)'  # Yellow Transparent
        else:
            confidence_color = 'rgba(169, 169, 169, 0.2)'  # Grey Transparent
        
        # Adding a single confidence interval as a shaded area with thicker line width
        fig.add_trace(go.Scatter(x=df_filtered['date'], y=upper_bound_poly,
                                 fill='tonexty', line=dict(color=confidence_color, width=8), name="Polynomial Confidence Interval"))

    # Add 28-day forecast (Linear Regression model to predict future)
    forecast_days = 28
    future_dates = pd.date_range(df_filtered['date'].max(), periods=forecast_days+1, freq='D')[1:]
    future_x = np.arange(len(df_filtered), len(df_filtered) + forecast_days).reshape(-1, 1)
    forecast_values = trend_model.predict(poly_features.transform(future_x)) if trend_line_type == "polynomial" else trend_model.predict(future_x)

    # Adding the forecast line as a solid line (not dotted)
    fig.add_trace(go.Scatter(x=future_dates, y=forecast_values, mode='lines', name='28-Day Forecast', line=dict(color='green')))
    
    fig.update_layout(
        title=f"Arrival and Departure Delays for {selected_airline} {f'at {selected_airport}' if selected_airport else ''}",
        xaxis_title="Date",
        yaxis_title="Delay (minutes)",
        template="plotly",  # Light background
    )

    return dcc.Graph(figure=fig)


In [11]:


# Start the app
#app = dash.Dash(__name__, suppress_callback_exceptions=True)
app.title = "Airline Dashboard"

app.layout = html.Div([
    html.H1("Airline Delay Dashboard", style={"textAlign": "center"}),

    dcc.Tabs(id="tabs", value='tab1', children=[
        dcc.Tab(label='Arrival and Departures Delay', value='tab1'),
        dcc.Tab(label='Trends', value='tab2'),
    ]),

    html.Div(id='tab-content')
])


# Common layout options for controls
def common_controls():
    return html.Div([
        html.Div([
            html.Label("Select Group:"),
            dcc.RadioItems(
                id="group-select",
                options=[
                    {"label": "Group 1", "value": "group1"},
                    {"label": "Group 2", "value": "group2"}
                ],
                value="group1",
                labelStyle={"display": "inline-block", "margin-right": "15px"}
            )
        ], style={"margin-bottom": "15px"}),

        html.Div([
            html.Label("Select Flight Type:"),
            dcc.RadioItems(
                id="flight-type",
                options=[
                    {"label": "Arrivals", "value": "arrivals"},
                    {"label": "Departures", "value": "departures"}
                ],
                value="arrivals",
                labelStyle={"display": "inline-block", "margin-right": "15px"}
            )
        ], style={"margin-bottom": "15px"}),

        html.Label("Select Airline:"),
        dcc.Dropdown(id="airline-dropdown"),

        html.Label("Select Original Airport:"),
        dcc.Dropdown(id="airport-dropdown"),

        html.Label("Select Date Range:"),
        dcc.DatePickerRange(
            id="date-range",
            display_format="YYYY-MM-DD"
        ),

        html.Br(), html.Br(),

        html.Div([
            html.Label("Select Trend Line Type:"),
            dcc.RadioItems(
                id="trend-line",
                options=[
                    {"label": "Linear", "value": "linear"},
                    {"label": "Polynomial", "value": "polynomial"}
                ],
                value="linear",
                labelStyle={"display": "inline-block", "margin-right": "15px"}
            )
        ])
    ])



@app.callback(Output('tab-content', 'children'), Input('tabs', 'value'))
def render_content(tab):
    if tab == 'tab1':
        return html.Div([
            common_controls(),
            html.Div(id='arrival-departure-output')  # Placeholder for Tab 1 graph
        ])
    elif tab == 'tab2':
        return html.Div([
            common_controls(),
            html.Div(id='trends-graph')  # Placeholder for Tab 2 graph
        ])


# Dynamic date range update
@app.callback(
    Output("date-range", "min_date_allowed"),
    Output("date-range", "max_date_allowed"),
    Output("date-range", "start_date"),
    Output("date-range", "end_date"),
    Input("group-select", "value")
)
def update_date_range(group):
    df = df1_filtered if group == "group1" else df2_filtered
    min_date = df["date"].min()
    max_date = df["date"].max()
    return min_date, max_date, min_date, max_date


In [12]:
# Launch the browser and run the app
if __name__ == '__main__':
    webbrowser.open_new_tab("http://127.0.0.1:8050/")
    app.run_server(debug=True, use_reloader=False)