#### Flights 

In [1]:
import pandas as pd
import glob
import os

In [2]:
files = glob.glob("T_T100_SEGMENT_ALL_CARRIER_*/T_T100_SEGMENT_ALL_CARRIER*.csv", recursive=True)
print(f"Found {len(files)} files")

# combine files
df_list = [pd.read_csv(f, dtype=str) for f in files]
t100 = pd.concat(df_list, ignore_index=True)

# convert columns to numeric values
t100['DEPARTURES_PERFORMED'] = pd.to_numeric(t100['DEPARTURES_PERFORMED'], errors='coerce')
t100['YEAR'] = pd.to_numeric(t100['YEAR'], errors='coerce')
t100['MONTH'] = pd.to_numeric(t100['MONTH'], errors='coerce')

# remove rows with missing year, month, state
t100 = t100.dropna(subset=['YEAR', 'MONTH', 'ORIGIN_STATE_NM', 'DEST_STATE_NM'])

# departures by origin state
origin = (t100.groupby(['YEAR', 'MONTH', 'ORIGIN_STATE_NM'], as_index=False)['DEPARTURES_PERFORMED']
    .sum().rename(columns={'ORIGIN_STATE_NM': 'STATE', 'DEPARTURES_PERFORMED': 'FLIGHTS_ORIGIN'}))

# arrivals by destination state
dest = (t100.groupby(['YEAR', 'MONTH', 'DEST_STATE_NM'], as_index=False)['DEPARTURES_PERFORMED']
        .sum().rename(columns={'DEST_STATE_NM': 'STATE', 'DEPARTURES_PERFORMED': 'FLIGHTS_DEST'}))

# merge & sum both directions
merged = pd.merge(origin, dest, on=['YEAR', 'MONTH', 'STATE'], how='outer').fillna(0)
merged['FLIGHTS_TOTAL'] = merged['FLIGHTS_ORIGIN'] + merged['FLIGHTS_DEST']

#clean dataset
clean = (
    merged[['YEAR', 'MONTH', 'STATE', 'FLIGHTS_TOTAL']]
    .sort_values(['YEAR', 'MONTH', 'STATE'])
)

# save as csv
clean.to_csv("US_state_monthly_flights.csv", index=False)


Found 8 files
