In [1]:
import pandas as pd
import plotly as p
import plotly.express as px 
import plotly.io as pio
import plotly.graph_objects as go
import numpy as np
pio.templates.default = "plotly_dark"

In [7]:
consumer_fares = pd.read_csv("..\\raw_data\\Consumer_Airfare_Report__Table_7_-_Fare_Premiums_for_Select_Cities_with_More_Than_20_Passengers_per_Day.csv")
consumer_fares = consumer_fares.drop(['tbl', 'tbl7pk'], axis = 1) # dropping unncessary columns from data source

# We'll subset the dataframe to look only at flights leaving Seattle
consumer_fares = consumer_fares[consumer_fares['cityname'] == 'Seattle, WA']

# We'll subset this dataframe further to only look at the year 2021
# consumer_fares = consumer_fares[consumer_fares['year'] == 2021]

# Renaming columns for more practical use and interpretation 
consumer_fares = consumer_fares.rename(columns={'SHAvgHubFare' : 'Short_haul_avg_fare', 
                                               'LHAvgHubFare' : 'Long_haul_avg_fare',
                                               'TotalAvgHubFare' : 'Total_avg_hub_fare',
                                               'TotalPerPrem' : 'Total_percent_premium',
                                               'SHPerPrem' : 'Short_haul_percent_premium',
                                               'LHPerPrem' : 'Long_haul_percent_premium'})

consumer_fares['perc_sh_passengers'] = round((consumer_fares['SHPax']/consumer_fares['TotalFaredPax']), 4)
consumer_fares['perc_lh_passengers'] = round((consumer_fares['LHPax']/consumer_fares['TotalFaredPax']), 4)


#list(consumer_fares.columns)
consumer_fares.head(10)

Unnamed: 0,year,quarter,citymarketid,cityname,airportid,apt,TotalMkts,TotalFaredPax,TotalPerLFMkts,Total_avg_hub_fare,...,SHPerLFMkts,Short_haul_avg_fare,Short_haul_percent_premium,LHMkts,LHPax,LHPerLFMkts,Long_haul_avg_fare,Long_haul_percent_premium,perc_sh_passengers,perc_lh_passengers
6,2021,4,30559,"Seattle, WA",14747,SEA,149,49198600,0.4446,205.67,...,0.3204,154.68,-0.0836,120.0,39096600.0,0.4767,218.84,-0.0822,0.2053,0.7947
293,2022,1,30559,"Seattle, WA",14747,SEA,134,38896100,0.4265,201.32,...,0.2919,161.23,-0.0664,104.0,30743700.0,0.4622,211.96,-0.0996,0.2096,0.7904
474,2018,3,30559,"Seattle, WA",14747,SEA,162,69610000,0.6915,213.08,...,0.3851,164.73,-0.1363,132.0,54017600.0,0.7799,227.04,-0.0581,0.224,0.776
865,2003,3,30559,"Seattle, WA",14747,SEA,152,41996700,0.6481,171.32,...,0.7259,116.97,-0.2108,121.0,31428900.0,0.6219,189.59,-0.0386,0.2516,0.7484
942,2012,3,30559,"Seattle, WA",14747,SEA,165,47197400,0.822,227.69,...,0.707,159.54,-0.1218,136.0,36779900.0,0.8546,246.99,-0.0189,0.2207,0.7793
1077,2021,1,30559,"Seattle, WA",14747,SEA,107,17364300,0.4496,154.59,...,0.2709,136.69,-0.0889,82.0,13760800.0,0.4964,159.28,-0.1284,0.2075,0.7925
1316,2014,2,30559,"Seattle, WA",14747,SEA,151,48443800,0.7806,220.1,...,0.6942,153.41,-0.2422,123.0,36838600.0,0.8078,241.11,-0.1082,0.2396,0.7604
1490,1997,1,30559,"Seattle, WA",14747,SEA,120,24479300,0.5438,157.2,...,0.6087,89.54,-0.3564,91.0,16878500.0,0.5145,187.68,-0.1207,0.3105,0.6895
1649,2003,4,30559,"Seattle, WA",14747,SEA,147,37014900,0.6241,168.53,...,0.6908,116.5,-0.1945,115.0,27039500.0,0.5995,187.72,-0.0504,0.2695,0.7305
1670,2001,4,30559,"Seattle, WA",14747,SEA,138,33590000,0.6355,157.28,...,0.6816,103.48,-0.2717,107.0,24108700.0,0.6173,178.43,-0.0739,0.2823,0.7177


In [6]:
# First we'll visualize the average fare for long vs. short haul flights
fig = px.bar(consumer_fares.rename(columns = { "Short_haul_avg_fare":"Short Haul Average Fare", 'Long_haul_avg_fare':"Long Haul Average Fare"}), 
             x="year", y=['Short Haul Average Fare', "Long Haul Average Fare"], 
                labels={
                    "year": "Year",
                     "value":"Average Fare", 
                     "variable":"Flight Fares"
                 },
                title="Average Fare for Flights from Seattle by Year", 
                color_discrete_sequence=px.colors.qualitative.Dark24)
fig.update_xaxes(tickangle = 45)
fig.show()

In [13]:
# This visualization will serve to observe the s
# percent of short and long haul passenger based on the average fare price

consumer_fares_reshape = pd.melt(consumer_fares.rename(columns = {
    'perc_sh_passengers':'Percent of Short Haul Passengers', 
    'perc_lh_passengers':'Percent of Long Haul Passengers', 
    'Total_avg_hub_fare':'Average Fare Across all Flights'
    }), 
                                  id_vars = ['year', 'quarter'], 
                                  value_vars=['Percent of Short Haul Passengers', 
                                              'Percent of Long Haul Passengers',
                                              'Average Fare Across all Flights'
                                              ])

df = consumer_fares_reshape[['year', 'quarter','value']].set_index(['year', 'quarter'])['value']

figs = {
    c: px.bar(df.loc[c].reset_index(), x="Average Fare Across all Flights", 
              y=["Percent of Short Haul Passengers", "Percent of Long Haul Passengers"], 
              labels = {
                 'value':'Percent of Passengers Flying', 
                 'variable':'Type of Flight'
             }, 
             title="Percent of Passengers Flying based on the Average Fare Price",
        color_discrete_sequence=px.colors.qualitative.Dark24).update_traces(
        name=c, visible=False, 
    )
    for c in df.index.get_level_values("year").unique()
}


ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['quarter', 'value'] but received: Average Fare Across all Flights