# Exploratory Data Analysis for BTS Baggage Revenue Data

In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import os

In [33]:
# Initialize path for all csv files
bag_revenues_path = r'C:\Users\Nav\Documents\BaggageRevenueModel\BaggageRevenueModel\data\bag-rev-2015-2024'
all_files = glob.glob(os.path.join(bag_revenues_path, "*.csv"))

In [None]:
dfs = []

for f in all_files:
    # Extract year from filename
    year = ''.join([c for c in os.path.basename(f) if c.isdigit()])
    
    # Read CSV
    df = pd.read_csv(f, thousands=',')
    
    # Clean column names
    df.columns = df.columns.str.strip()
    
    # Drop total year column
    df = df.drop(df.columns[-1], axis=1)
    
    # Drop "All" total row if present
    df = df[df['Airline'].str.lower() != 'all']
    
    # Rename quarter columns to include the year
    rename_map = {
        '1Q': f'{year}-Q1',
        '2Q': f'{year}-Q2',
        '3Q': f'{year}-Q3',
        '4Q': f'{year}-Q4'
    }
    df = df.rename(columns=rename_map)
    
    # Keep only airline + the four quarters
    keep_cols = ['Airline'] + [rename_map[q] for q in ['1Q', '2Q', '3Q', '4Q'] if q in rename_map]
    df = df[keep_cols]
    
    dfs.append(df)


In [None]:
# Merge all yearly data on Airline
combined_df = dfs[0]
for df in dfs[1:]:
    combined_df = pd.merge(combined_df, df, on='Airline', how='outer')

# combined_df = combined_df.rename(columns={'Airline': 'airline'})
combined_df = combined_df.sort_values(by='airline').reset_index(drop=True)

# Drop rows with any missing values
combined_df = combined_df.dropna()

print(combined_df.head())

          airline   2015-Q1   2015-Q2   2015-Q3   2015-Q4   2016-Q1   2016-Q2  \
0  AA/US Combined  271449.0  295015.0  292089.0  267293.0       NaN       NaN   
1          Alaska   23686.0   30215.0   34127.0   24787.0   30484.0   35022.0   
2       Allegiant   40444.0   42463.0   39934.0   38523.0   43759.0   45934.0   
3        American  151758.0  165660.0  292089.0  267293.0  262464.0  288836.0   
4          Breeze       NaN       NaN       NaN       NaN       NaN       NaN   

    2016-Q3   2016-Q4   2017-Q1  ...   2022-Q3   2022-Q4   2023-Q1   2023-Q2  \
0       NaN       NaN       NaN  ...       NaN       NaN       NaN       NaN   
1   39297.0   30811.0   30602.0  ...   92981.0   80917.0   74482.0   97943.0   
2   45168.0   42468.0   46556.0  ...   83634.0   80586.0   88685.0   96696.0   
3  288124.0  278048.0  272805.0  ...  351356.0  348944.0  325296.0  352279.0   
4       NaN       NaN       NaN  ...    3278.0    5318.0    6068.0   10288.0   

    2023-Q3   2023-Q4   2024-Q1 