In [None]:
import pandas as pd

In [3]:
sfo_df = pd.read_csv("Air_Traffic_Passenger_Statistics.csv")

sfo_df["Date"] = pd.to_datetime(sfo_df["Activity Period"], format="%Y%m")
sfo_df["Year-Month"] = sfo_df["Date"].dt.to_period("M").astype(str)

# drop incomplete rows
df = sfo_df[["Year-Month", "Operating Airline", "GEO Region", "Passenger Count"]].dropna()

df = df.groupby(["Operating Airline", "Year-Month", "GEO Region"], as_index=False).agg({
    "Passenger Count": "sum"
})

# normalise per airline for heatmap colour scale for D3
df["Normalised Count"] = df.groupby("Operating Airline")["Passenger Count"].transform(
    lambda x: x / x.max()
)

# limit to top 10 airlines by total passenger volume
top_airlines = df.groupby("Operating Airline")["Passenger Count"].sum().nlargest(10).index.tolist()
df = df[df["Operating Airline"].isin(top_airlines)]

# export for D3 usage
df.to_csv("d3_data.csv", index=False)
print("Exported: d3_data.csv")


Exported: d3_data.csv
