# Tourism Volumes

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# First look at the 2013 Louisiana dataset

In [None]:
#First look at the doc

filepath = #insert here your path to Louisiana_2013.csv

df_louisiana_2013 = pd.read_csv(filepath)
df_louisiana_2013.info()

In [None]:
df_louisiana_2013.head()

In [None]:
df_louisiana_2013["PASSENGERS"].describe()

In [None]:
df_louisiana_2013["PASSENGERS"].plot(kind="hist")

# Inbound MSY 2013

In [None]:
inbound_MSY_2013 = df_louisiana_2013[df_louisiana_2013['DEST'] == 'MSY'].copy()

In [None]:
len(inbound_MSY_2013)

In [None]:
monthly_passengers_MSY_2013 = inbound_MSY_2013.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
monthly_passengers_MSY_2013.head(20)

In [None]:
monthly_passengers_MSY_2013['DATE'] = pd.to_datetime(
    monthly_passengers_MSY_2013['YEAR'].astype(str) + '-' +
    monthly_passengers_MSY_2013['MONTH'].astype(str) + '-01'
)

In [None]:
monthly_passengers_MSY_2013['DATE'].info

In [None]:
# Calculate load factor (occupancy rate)
monthly_passengers_MSY_2013['LOAD_FACTOR'] = (
    monthly_passengers_MSY_2013['PASSENGERS'] / monthly_passengers_MSY_2013['SEATS'] * 100
)

In [None]:
monthly_passengers_MSY_2013['LOAD_FACTOR'].info

In [None]:
monthly_passengers_MSY_2013[['PASSENGERS', 'DEPARTURES_PERFORMED', 'LOAD_FACTOR']].describe()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend 2013
axes[0].plot(monthly_passengers_MSY_2013['DATE'], monthly_passengers_MSY_2013['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to MSY New Orleans 2013', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time
axes[1].plot(monthly_passengers_MSY_2013['DATE'], monthly_passengers_MSY_2013['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) 2013', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=monthly_passengers_MSY_2013['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {monthly_passengers_MSY_2013['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
monthly_passengers_MSY_2013.info()

# Inbound MSY 2016

In [None]:
filepath_2016 = #insert here your path to Louisiana_2016.csv

df_louisiana_2016 = pd.read_csv(filepath_2016)

In [None]:
df_louisiana_2016["PASSENGERS"].describe()

In [None]:
inbound_MSY_2016 = df_louisiana_2016[df_louisiana_2016['DEST'] == 'MSY'].copy()

In [None]:
len(inbound_MSY_2016)

In [None]:
monthly_passengers_MSY_2016 = inbound_MSY_2016.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
monthly_passengers_MSY_2016['DATE'] = pd.to_datetime(
    monthly_passengers_MSY_2016['YEAR'].astype(str) + '-' +
    monthly_passengers_MSY_2016['MONTH'].astype(str) + '-01'
)

In [None]:
monthly_passengers_MSY_2016['DATE'].info

In [None]:
# Calculate load factor (occupancy rate)
monthly_passengers_MSY_2016['LOAD_FACTOR'] = (
    monthly_passengers_MSY_2016['PASSENGERS'] / monthly_passengers_MSY_2016['SEATS'] * 100
)

In [None]:
monthly_passengers_MSY_2016.info()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend Over Time 2016
axes[0].plot(monthly_passengers_MSY_2016['DATE'], monthly_passengers_MSY_2016['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to MSY New Orleans 2016', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time 2016
axes[1].plot(monthly_passengers_MSY_2016['DATE'], monthly_passengers_MSY_2016['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) 2016', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=monthly_passengers_MSY_2016['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {monthly_passengers_MSY_2016['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Inbound MSY 2018

In [None]:
filepath_2018 = #insert here your path to Louisiana_2018.csv

df_louisiana_2018 = pd.read_csv(filepath_2018)

In [None]:
df_louisiana_2018["PASSENGERS"].describe()

In [None]:
inbound_MSY_2018 = df_louisiana_2018[df_louisiana_2018['DEST'] == 'MSY'].copy()

In [None]:
len(inbound_MSY_2018)

In [None]:
monthly_passengers_MSY_2018 = inbound_MSY_2018.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
monthly_passengers_MSY_2018['DATE'] = pd.to_datetime(
    monthly_passengers_MSY_2018['YEAR'].astype(str) + '-' +
    monthly_passengers_MSY_2018['MONTH'].astype(str) + '-01'
)

In [None]:
monthly_passengers_MSY_2018['DATE'].info

In [None]:
# Calculate load factor (occupancy rate)
monthly_passengers_MSY_2018['LOAD_FACTOR'] = (
    monthly_passengers_MSY_2018['PASSENGERS'] / monthly_passengers_MSY_2018['SEATS'] * 100
)

In [None]:
monthly_passengers_MSY_2018.info()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend Over Time 2018
axes[0].plot(monthly_passengers_MSY_2018['DATE'], monthly_passengers_MSY_2018['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to MSY New Orleans 2018', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time 2018
axes[1].plot(monthly_passengers_MSY_2018['DATE'], monthly_passengers_MSY_2018['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) 2018', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=monthly_passengers_MSY_2018['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {monthly_passengers_MSY_2018['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Creating a combined dataset for New Orleans (MSY - 2013, 2016, 2018)

In [None]:
df_2013 = monthly_passengers_MSY_2013
df_2016 = monthly_passengers_MSY_2016
df_2018 = monthly_passengers_MSY_2018

print("Dataset shapes:")
print(f"2013: {df_2013.shape}")
print(f"2016: {df_2016.shape}")
print(f"2018: {df_2018.shape}")

# ============================================
# STEP 2: Combine them vertically (stack rows)
# ============================================

# pd.concat stacks dataframes on top of each other
msy_combined = pd.concat([df_2013, df_2016, df_2018], ignore_index=True)

print(f"\nCombined dataset shape: {msy_combined.shape}")
print(f"Total months: {len(msy_combined)}")

# ============================================
# STEP 3: Sort by date for proper time series
# ============================================

msy_combined = msy_combined.sort_values('DATE').reset_index(drop=True)

# ============================================
# STEP 4: Verify the data
# ============================================

print("\n" + "="*50)
print("COMBINED DATASET SUMMARY")
print("="*50)
print(msy_combined.head(36))

print("\n" + "="*50)
print("DATA RANGE")
print("="*50)
print(f"Start: {msy_combined['DATE'].min()}")
print(f"End: {msy_combined['DATE'].max()}")
print(f"Years included: {sorted(msy_combined['YEAR'].unique())}")

print("\n" + "="*50)
print("CHECK FOR GAPS")
print("="*50)
print("Months per year:")
print(msy_combined.groupby('YEAR')['MONTH'].count())

# ============================================
# STEP 5: Basic statistics across all years
# ============================================

print("\n" + "="*50)
print("OVERALL STATISTICS")
print("="*50)
print(msy_combined[['PASSENGERS', 'LOAD_FACTOR']].describe())

# ============================================
# STEP 6: Save combined dataset
# ============================================

msy_combined.to_csv('New_Orleans_MSY_combined_2013_2016_2018.csv', index=False)
print("\n✅ Combined dataset saved as 'New_Orleans_MSY_combined_2013_2016_2018.csv'")

In [None]:
import matplotlib.pyplot as plt

# Create month names for X-axis
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

fig, ax = plt.subplots(figsize=(14, 6))

# Plot each year as a separate line
colors = {'2013': 'steelblue', '2016': 'green', '2018': 'orange'}
markers = {'2013': 'o', '2016': 's', '2018': '^'}

for year in sorted(msy_combined['YEAR'].unique()):
    year_data = msy_combined[msy_combined['YEAR'] == year].sort_values('MONTH')

    ax.plot(year_data['MONTH'], year_data['PASSENGERS'],
            marker=markers[str(year)],
            linewidth=2.5,
            markersize=8,
            color=colors[str(year)],
            label=f'{year}',
            alpha=0.8)

ax.set_title('MSY Inbound Passengers: Seasonal Patterns by Year',
             fontsize=14, fontweight='bold')
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Total Passengers', fontsize=12)
ax.set_xticks(range(1, 13))
ax.set_xticklabels(month_names)
ax.legend(title='Year', fontsize=11, title_fontsize=12)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('MSY_seasonal_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Inbound Chicago (ORD airport) 2022

In [None]:
filepath = #insert here your path to Illinois_2022.csv

Illinois_2022 = pd.read_csv(filepath)

In [None]:
len(Illinois_2022)

In [None]:
inbound_ORD_2022 = Illinois_2022[Illinois_2022['DEST'] == 'ORD'].copy()

In [None]:
len(inbound_ORD_2022)

In [None]:
monthly_passengers_ORD_2022 = inbound_ORD_2022.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
monthly_passengers_ORD_2022['DATE'] = pd.to_datetime(
    monthly_passengers_ORD_2022['YEAR'].astype(str) + '-' +
    monthly_passengers_ORD_2022['MONTH'].astype(str) + '-01'
)

In [None]:
monthly_passengers_ORD_2022['DATE'].info

In [None]:
print(f"Min: {monthly_passengers_ORD_2022['PASSENGERS'].min()}")
print(f"Max: {monthly_passengers_ORD_2022['PASSENGERS'].max()}")

In [None]:
# Calculate load factor (occupancy rate)
monthly_passengers_ORD_2022['LOAD_FACTOR'] = (
    monthly_passengers_ORD_2022['PASSENGERS'] / monthly_passengers_ORD_2022['SEATS'] * 100
)

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend Over Time
axes[0].plot(monthly_passengers_ORD_2022['DATE'], monthly_passengers_ORD_2022['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to ORD Chicago 2022', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time
axes[1].plot(monthly_passengers_ORD_2022['DATE'], monthly_passengers_ORD_2022['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) ORD Chicago 2022', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=monthly_passengers_ORD_2022['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {monthly_passengers_ORD_2022['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Inbound Chicago (ORD airport) 2024

In [None]:
filepath = #insert here your path to Illinois_2024.csv

Illinois_2024 = pd.read_csv(filepath)

In [None]:
len(Illinois_2024)

In [None]:
inbound_ORD_2024 = Illinois_2024[Illinois_2024['DEST'] == 'ORD'].copy()

In [None]:
len(inbound_ORD_2024)

In [None]:
monthly_passengers_ORD_2024 = inbound_ORD_2024.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
monthly_passengers_ORD_2024['DATE'] = pd.to_datetime(
    monthly_passengers_ORD_2024['YEAR'].astype(str) + '-' +
    monthly_passengers_ORD_2024['MONTH'].astype(str) + '-01'
)

In [None]:
monthly_passengers_ORD_2024['DATE'].info

In [None]:
# Calculate load factor (occupancy rate)
monthly_passengers_ORD_2024['LOAD_FACTOR'] = (
    monthly_passengers_ORD_2024['PASSENGERS'] / monthly_passengers_ORD_2024['SEATS'] * 100
)

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend Over Time
axes[0].plot(monthly_passengers_ORD_2024['DATE'], monthly_passengers_ORD_2024['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to ORD Chicago 2024', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time
axes[1].plot(monthly_passengers_ORD_2024['DATE'], monthly_passengers_ORD_2024['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) ORD Chicago 2024', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=monthly_passengers_ORD_2024['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {monthly_passengers_ORD_2024['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Creating a combined dataset for Chicago (ORD - 2022, 2024)

In [None]:
df_ORD_2022 = monthly_passengers_ORD_2022
df_ORD_2024 = monthly_passengers_ORD_2024


print("Dataset shapes:")
print(f"2022: {df_ORD_2022.shape}")
print(f"2024: {df_ORD_2024.shape}")

# ============================================
# STEP 2: Combine them vertically (stack rows)
# ============================================

# pd.concat stacks dataframes on top of each other
Chicago_ORD_combined = pd.concat([df_ORD_2022, df_ORD_2024], ignore_index=True)

print(f"\nCombined dataset shape: {Chicago_ORD_combined.shape}")
print(f"Total months: {len(Chicago_ORD_combined)}")

# ============================================
# STEP 3: Sort by date for proper time series
# ============================================

Chicago_ORD_combined = Chicago_ORD_combined.sort_values('DATE').reset_index(drop=True)

# ============================================
# STEP 4: Verify the data
# ============================================

print("\n" + "="*50)
print("COMBINED DATASET SUMMARY")
print("="*50)
print(Chicago_ORD_combined.head(36))

print("\n" + "="*50)
print("DATA RANGE")
print("="*50)
print(f"Start: {Chicago_ORD_combined['DATE'].min()}")
print(f"End: {Chicago_ORD_combined['DATE'].max()}")
print(f"Years included: {sorted(Chicago_ORD_combined['YEAR'].unique())}")

print("\n" + "="*50)
print("CHECK FOR GAPS")
print("="*50)
print("Months per year:")
print(Chicago_ORD_combined.groupby('YEAR')['MONTH'].count())

# ============================================
# STEP 5: Basic statistics across all years
# ============================================

print("\n" + "="*50)
print("OVERALL STATISTICS")
print("="*50)
print(Chicago_ORD_combined[['PASSENGERS', 'LOAD_FACTOR']].describe())

# ============================================
# STEP 6: Save combined dataset
# ============================================

Chicago_ORD_combined.to_csv("Chicago_ORD_combined_2022_2024.csv", index=False)
print("\n✅ Combined dataset saved as 'Chicago_ORD_combined_2022_2024.csv")

In [None]:
#Combined ORD Chicago chart

# Create month names for X-axis
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

fig, ax = plt.subplots(figsize=(14, 6))

# Plot each year as a separate line
colors = {'2022': 'steelblue', '2024':'orange'}
markers = {'2022': 'o', '2024': 's'}

for year in sorted(Chicago_ORD_combined['YEAR'].unique()):
    year_data = Chicago_ORD_combined[Chicago_ORD_combined['YEAR'] == year].sort_values('MONTH')

    ax.plot(year_data['MONTH'], year_data['PASSENGERS'],
            marker=markers[str(year)],
            linewidth=2.5,
            markersize=8,
            color=colors[str(year)],
            label=f'{year}',
            alpha=0.8)

ax.set_title('Chicago (ORD) Inbound Passengers: Seasonal Patterns by Year',
             fontsize=14, fontweight='bold')
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Total Passengers', fontsize=12)
ax.set_xticks(range(1, 13))
ax.set_xticklabels(month_names)
ax.legend(title='Year', fontsize=11, title_fontsize=12)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('Chicago_ORD_seasonal_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Inbound NY 2022 - JFK, LGA, EWR

In [None]:
filepath = #insert here your path to NY_2022.csv

NY_2022 = pd.read_csv(filepath)

In [None]:
len(NY_2022)

In [None]:
inbound_JFK_LGA_EWR_2022 = NY_2022[NY_2022['DEST'].isin(['JFK', 'LGA', 'EWR'])].copy()

In [None]:
len(inbound_JFK_LGA_EWR_2022)

In [None]:
inbound_JFK_LGA_EWR_2022 = inbound_JFK_LGA_EWR_2022.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
inbound_JFK_LGA_EWR_2022['DATE'] = pd.to_datetime(
    inbound_JFK_LGA_EWR_2022['YEAR'].astype(str) + '-' +
    inbound_JFK_LGA_EWR_2022['MONTH'].astype(str) + '-01'
)

In [None]:
inbound_JFK_LGA_EWR_2022['DATE'].info

In [None]:
inbound_JFK_LGA_EWR_2022['LOAD_FACTOR'] = (
    inbound_JFK_LGA_EWR_2022['PASSENGERS'] / inbound_JFK_LGA_EWR_2022['SEATS'] * 100
)

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend Over Time
axes[0].plot(inbound_JFK_LGA_EWR_2022['DATE'], inbound_JFK_LGA_EWR_2022['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to NY (JFK, LGA, EWR) 2022', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time 2022
axes[1].plot(inbound_JFK_LGA_EWR_2022['DATE'], inbound_JFK_LGA_EWR_2022['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) to NY (JFK, LGA, EWR) 2022', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=inbound_JFK_LGA_EWR_2022['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {inbound_JFK_LGA_EWR_2022['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig('Inbound Passengers to NY (JFK, LGA, EWR) 2022.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
inbound_JFK_LGA_EWR_2022.to_csv("New_York_inbound_JFK_LGA_EWR_2022.csv", index=False)
print("\n✅ Combined dataset saved as 'New_York_inbound_JFK_LGA_EWR_2022.csv")

# Inbound LA 2022

In [None]:
filepath = #insert here your path to California_2022.csv

California_2022 = pd.read_csv(filepath)

In [None]:
len(California_2022)

In [None]:
inbound_LA_2022 = California_2022[California_2022['DEST'].isin(['LAX', 'BUR', 'LGB', 'SNA', 'ONT'])].copy()

In [None]:
len(inbound_LA_2022)

In [None]:
monthly_passengers_LA_2022 = inbound_LA_2022.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
monthly_passengers_LA_2022['DATE'] = pd.to_datetime(
    monthly_passengers_LA_2022['YEAR'].astype(str) + '-' +
    monthly_passengers_LA_2022['MONTH'].astype(str) + '-01'
)

In [None]:
monthly_passengers_LA_2022['DATE'].info

In [None]:
# Calculate load factor (occupancy rate)
monthly_passengers_LA_2022['LOAD_FACTOR'] = (
    monthly_passengers_LA_2022['PASSENGERS'] / monthly_passengers_LA_2022['SEATS'] * 100
)

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend Over Time
axes[0].plot(monthly_passengers_LA_2022['DATE'], monthly_passengers_LA_2022['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to Los Angeles 2022', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time
axes[1].plot(monthly_passengers_LA_2022['DATE'], monthly_passengers_LA_2022['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) Inbound LA 2022', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=monthly_passengers_LA_2022['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {monthly_passengers_LA_2022['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Inbound LA 2024

In [None]:
filepath = #insert here your path to California_2024.csv

California_2024 = pd.read_csv(filepath)

In [None]:
len(California_2024)

In [None]:
inbound_LA_2024 = California_2024[California_2024['DEST'].isin(['LAX', 'BUR', 'LGB', 'SNA', 'ONT'])].copy()

In [None]:
len(inbound_LA_2024)

In [None]:
monthly_passengers_LA_2024 = inbound_LA_2024.groupby(['YEAR', 'MONTH']).agg({
    'PASSENGERS': 'sum',
    'DEPARTURES_PERFORMED': 'sum',
    'SEATS': 'sum'
}).reset_index()

In [None]:
monthly_passengers_LA_2024['DATE'] = pd.to_datetime(
    monthly_passengers_LA_2024['YEAR'].astype(str) + '-' +
    monthly_passengers_LA_2024['MONTH'].astype(str) + '-01'
)

In [None]:
monthly_passengers_LA_2024['DATE'].info

In [None]:
# Calculate load factor (occupancy rate)
monthly_passengers_LA_2024['LOAD_FACTOR'] = (
    monthly_passengers_LA_2024['PASSENGERS'] / monthly_passengers_LA_2024['SEATS'] * 100
)

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# Plot 1: Monthly Passenger Trend Over Time
axes[0].plot(monthly_passengers_LA_2024['DATE'], monthly_passengers_LA_2024['PASSENGERS'],
             marker='o', linewidth=2, markersize=4)
axes[0].set_title('Monthly Inbound Passengers to Los Angeles 2024', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Total Passengers')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Plot 3: Load Factor Over Time
axes[1].plot(monthly_passengers_LA_2024['DATE'], monthly_passengers_LA_2024['LOAD_FACTOR'],
             color='green', marker='o', linewidth=2, markersize=4)
axes[1].set_title('Aircraft Load Factor (Occupancy Rate) Inbound LA 2024', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Load Factor (%)')
axes[1].axhline(y=monthly_passengers_LA_2024['LOAD_FACTOR'].mean(),
                color='red', linestyle='--', label=f"Average: {monthly_passengers_LA_2024['LOAD_FACTOR'].mean():.1f}%")
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Combined dataset for LA - 2022, 2024

In [None]:
df_LA_2022 = monthly_passengers_LA_2022
df_LA_2024 = monthly_passengers_LA_2024


print("Dataset shapes:")
print(f"2022: {df_LA_2022.shape}")
print(f"2024: {df_LA_2024.shape}")

# ============================================
# STEP 2: Combine them vertically (stack rows)
# ============================================

# pd.concat stacks dataframes on top of each other
LA_combined = pd.concat([df_LA_2022, df_LA_2024], ignore_index=True)

print(f"\nCombined dataset shape: {LA_combined.shape}")
print(f"Total months: {len(LA_combined)}")

# ============================================
# STEP 3: Sort by date for proper time series
# ============================================

LA_combined = LA_combined.sort_values('DATE').reset_index(drop=True)

# ============================================
# STEP 4: Verify the data
# ============================================

print("\n" + "="*50)
print("COMBINED DATASET SUMMARY")
print("="*50)
print(LA_combined.head(36))

print("\n" + "="*50)
print("DATA RANGE")
print("="*50)
print(f"Start: {LA_combined['DATE'].min()}")
print(f"End: {LA_combined['DATE'].max()}")
print(f"Years included: {sorted(LA_combined['YEAR'].unique())}")

print("\n" + "="*50)
print("CHECK FOR GAPS")
print("="*50)
print("Months per year:")
print(LA_combined.groupby('YEAR')['MONTH'].count())

# ============================================
# STEP 5: Basic statistics across all years
# ============================================

print("\n" + "="*50)
print("OVERALL STATISTICS")
print("="*50)
print(LA_combined[['PASSENGERS', 'LOAD_FACTOR']].describe())

# ============================================
# STEP 6: Save combined dataset
# ============================================

LA_combined.to_csv("LA_combined_2022_2024.csv", index=False)
print("\n✅ Combined dataset saved as 'LA_combined_2022_2024.csv")

In [None]:
#Combined LA chart

# Create month names for X-axis
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

fig, ax = plt.subplots(figsize=(14, 6))

# Plot each year as a separate line
colors = {'2022': 'steelblue', '2024':'orange'}
markers = {'2022': 'o', '2024': 's'}

for year in sorted(LA_combined['YEAR'].unique()):
    year_data = LA_combined[LA_combined['YEAR'] == year].sort_values('MONTH')

    ax.plot(year_data['MONTH'], year_data['PASSENGERS'],
            marker=markers[str(year)],
            linewidth=2.5,
            markersize=8,
            color=colors[str(year)],
            label=f'{year}',
            alpha=0.8)

ax.set_title('LA Inbound Passengers: Seasonal Patterns by Year',
             fontsize=14, fontweight='bold')
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Total Passengers', fontsize=12)
ax.set_xticks(range(1, 13))
ax.set_xticklabels(month_names)
ax.legend(title='Year', fontsize=11, title_fontsize=12)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('LA_inbound_seasonal_comparison.png', dpi=300, bbox_inches='tight')
plt.show()