# 0) Imports & Paths

In [1]:
from pathlib import Path
import re
import pandas as pd
pd.set_option("display.max_columns", 200)

BASE = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
RAW  = BASE / "data" / "raw"
CLEAN = BASE / "data" / "clean"
CLEAN.mkdir(parents=True, exist_ok=True)

FILES = {
    "hist_t1": RAW / "International_airline_activity_Table1_2009toCurrent_0525_Data - Sheet1.csv",
    "t1_may":  RAW / "International_airline_activity_0525_Table1.csv",
    "t2_may":  RAW / "International_airline_activity_0525_Table2.csv",
    "t3_may":  RAW / "International_airline_activity_0525_Table3.csv",
    "t4_may":  RAW / "International_airline_activity_0525_Table4.csv",
    "t5_may":  RAW / "International_airline_activity_0525_Table5.csv",
}

for k,p in FILES.items():
    print(f"{k:8s} -> {p.name} | exists={p.exists()}")

hist_t1  -> International_airline_activity_Table1_2009toCurrent_0525_Data - Sheet1.csv | exists=True
t1_may   -> International_airline_activity_0525_Table1.csv | exists=True
t2_may   -> International_airline_activity_0525_Table2.csv | exists=True
t3_may   -> International_airline_activity_0525_Table3.csv | exists=True
t4_may   -> International_airline_activity_0525_Table4.csv | exists=True
t5_may   -> International_airline_activity_0525_Table5.csv | exists=True


# 1) Helpers: parsing & cleaning

In [2]:
NULL_TOKENS = {"..", "-", "", None, "nan", "NaN", "None"}

def to_number(s: pd.Series) -> pd.Series:
    """Coerce strings like '1,234' or '1 234' or '..' to floats. Returns float with NaN on bad."""
    x = s.astype(str).str.strip()
    x = x.where(~x.isin(NULL_TOKENS), None)
    x = x.str.replace(",", "", regex=False).str.replace(" ", "", regex=False)
    return pd.to_numeric(x, errors="coerce")

def to_percent_decimal(s: pd.Series) -> pd.Series:
    """Converts '12.3%' -> 0.123, '12,3' -> 0.123 if suffixed with %, else 12.3 -> 12.3 (assumed already decimal).
       If the column is known to be percentage values without %, divide by 100 yourself before calling this."""
    x = s.astype(str).str.strip()
    x = x.where(~x.isin(NULL_TOKENS), None)
    # Replace comma decimal -> dot
    x = x.str.replace(",", ".", regex=False)
    pct_mask = x.str.endswith("%", na=False)
    x = x.str.rstrip("%")
    x = pd.to_numeric(x, errors="coerce")
    x[pct_mask] = x[pct_mask] / 100.0
    return x

def clean_cols(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = (
        df.columns.astype(str)
          .str.strip()
          .str.replace(r"\s+", "_", regex=True)
          .str.replace(r"[()]", "", regex=True)
    )
    return df

# 2) Clean HISTORICAL (Table 1, 2009→current)


In [3]:
def clean_table1_hist(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = clean_cols(df_raw)

    # Expected columns (case-insensitive): Month, Year, Scheduled Operator, Country to/from, Passengers In/Out, Freight In/Out, Mail In/Out
    ren = {
        "Scheduled_Operator": "Airline_Name",
        "Country_to/from": "Country",
        "Passengers_In": "Passengers_In",
        "Passengers_Out": "Passengers_Out",
        "Freight_In": "Freight_In",
        "Freight_Out": "Freight_Out",
        "Mail_In": "Mail_In",
        "Mail_Out": "Mail_Out",
        "Year": "Year",
        "Month": "Month"
    }
    # Ensure the keys exist even if case differs
    rename_map = {c: ren.get(c, c) for c in df.columns}
    df = df.rename(columns=rename_map)

    # Normalise month text like 'Jan-09'
    # Two cases: Month column exists OR the month text is in first column named 'Month'
    # We derive numeric month from the Month string if needed
    if "Month" in df:
        # Month may look like 'Jan-09' or 'May-25' etc. Let's parse:
        mtxt = df["Month"].astype(str).str.strip()
        # Extract month name (first 3 letters) and year (last 2 if present)
        # We already have a numeric Year column, so we rely on Year for year.
        month_map = {
            "jan":1,"feb":2,"mar":3,"apr":4,"may":5,"jun":6,
            "jul":7,"aug":8,"sep":9,"oct":10,"nov":11,"dec":12
        }
        df["Month_num"] = mtxt.str[:3].str.lower().map(month_map)
    else:
        df["Month_num"] = None

    # Numbers
    for c in ["Passengers_In","Passengers_Out","Freight_In","Freight_Out","Mail_In","Mail_Out"]:
        if c in df: df[c] = to_number(df[c])

    # Melt inbound/outbound
    cols_keep = ["Year","Month_num","Airline_Name","Country"]
    for c in cols_keep:
        if c not in df: df[c] = None

    inbound = df[cols_keep + ["Passengers_In","Freight_In","Mail_In"]].copy()
    inbound["Direction"] = "Inbound"
    inbound = inbound.rename(columns={
        "Passengers_In":"Passengers",
        "Freight_In":"Freight_tonnes",
        "Mail_In":"Mail_tonnes",
        "Month_num":"Month"
    })

    outbound = df[cols_keep + ["Passengers_Out","Freight_Out","Mail_Out"]].copy()
    outbound["Direction"] = "Outbound"
    outbound = outbound.rename(columns={
        "Passengers_Out":"Passengers",
        "Freight_Out":"Freight_tonnes",
        "Mail_Out":"Mail_tonnes",
        "Month_num":"Month"
    })

    out = pd.concat([inbound, outbound], ignore_index=True)

    # Clean airline names & country strings
    out["Airline_Name"] = out["Airline_Name"].astype(str).str.strip()
    out["Country"]      = out["Country"].astype(str).str.strip()

    # Drop rows with no airline or no direction (junk)
    out = out.dropna(subset=["Airline_Name","Direction"])

    # Ensure ints for Year/Month
    out["Year"]  = pd.to_numeric(out["Year"], errors="coerce").astype("Int64")
    out["Month"] = pd.to_numeric(out["Month"], errors="coerce").astype("Int64")

    # Flag 'ALL SERVICES' totals if present
    out["Is_Total_AllServices"] = out["Country"].str.upper().eq("ALL SERVICES")

    # Final sort & columns
    return out[[
        "Year","Month","Airline_Name","Country","Direction",
        "Passengers","Freight_tonnes","Mail_tonnes","Is_Total_AllServices"
    ]].sort_values(["Year","Month","Airline_Name","Country","Direction"]).reset_index(drop=True)

# 3) Clean May 2025 Table 1 (per-airline country inbound/outbound PAX/Freight/Mail)

In [5]:
def clean_table1_may(df_raw: pd.DataFrame, year=2025, month=5) -> pd.DataFrame:
    df = clean_cols(df_raw)
    # Expected columns like:
    # Scheduled Operator, Country to/from, Inbound_Passengers, Inbound_Freight, Inbound_Mail, Outbound_Passengers, Outbound_Freight, Outbound_Mail
    ren = {
        "Scheduled_Operator":"Airline_Name",
        "Country_to/from":"Country",
        "Inbound_Passengers":"Inbound_Passengers",
        "Inbound_Freight":"Inbound_Freight_tonnes",
        "Inbound_Mail":"Inbound_Mail_tonnes",
        "Outbound_Passengers":"Outbound_Passengers",
        "Outbound_Freight":"Outbound_Freight_tonnes",
        "Outbound_Mail":"Outbound_Mail_tonnes",
    }
    df = df.rename(columns={c: ren.get(c, c) for c in df.columns})

    # Numbers
    for c in ["Inbound_Passengers","Outbound_Passengers",
              "Inbound_Freight_tonnes","Outbound_Freight_tonnes",
              "Inbound_Mail_tonnes","Outbound_Mail_tonnes"]:
        if c in df: df[c] = to_number(df[c])

    # Build long form (two rows per operator-country: inbound/outbound)
    keep = ["Airline_Name","Country"]
    inbound = df[keep + ["Inbound_Passengers","Inbound_Freight_tonnes","Inbound_Mail_tonnes"]].copy()
    inbound["Direction"] = "Inbound"
    inbound = inbound.rename(columns={
        "Inbound_Passengers":"Passengers",
        "Inbound_Freight_tonnes":"Freight_tonnes",
        "Inbound_Mail_tonnes":"Mail_tonnes"
    })

    outbound = df[keep + ["Outbound_Passengers","Outbound_Freight_tonnes","Outbound_Mail_tonnes"]].copy()
    outbound["Direction"] = "Outbound"
    outbound = outbound.rename(columns={
        "Outbound_Passengers":"Passengers",
        "Outbound_Freight_tonnes":"Freight_tonnes",
        "Outbound_Mail_tonnes":"Mail_tonnes"
    })

    out = pd.concat([inbound, outbound], ignore_index=True)
    out["Year"]  = year
    out["Month"] = month
    out["Is_Total_AllServices"] = out["Country"].str.upper().eq("ALL SERVICES")

    return out[[
        "Year","Month","Airline_Name","Country","Direction",
        "Passengers","Freight_tonnes","Mail_tonnes","Is_Total_AllServices"
    ]].sort_values(["Airline_Name","Country","Direction"]).reset_index(drop=True)

# 4) Clean May 2025 Table 2 (market share & YoY)

In [6]:
def clean_table2_may(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = clean_cols(df_raw)
    # Examples: Scheduled Operators, Country to/from, Total Passengers_2024, Total Passengers_2025, Total Passengers_Perc_TOTAL, Total Passengers_Perc_Change
    ren = {
        "Scheduled_Operators":"Airline_Name",
        "Country_to/from":"Country",
        "Total_Passengers_2024":"Passengers_May_LY",
        "Total_Passengers_2025":"Passengers_May",
        "Total_Passengers_Perc_TOTAL":"MarketShare_Passengers",
        "Total_Passengers_Perc_Change":"Passengers_YoY",
        "Total_Freight_tonnes_2024":"Freight_2024_t",
        "Total_Freight_tonnes_2025":"Freight_2025_t",
        "Total_Freight_tonnes_Perc_TOTAL":"MarketShare_Freight",
        "Total_Freight_tonnes_Perc_Change":"Freight_YoY",
        "Total_Mail_tonnes_2024":"Mail_2024_t",
        "Total_Mail_tonnes_2025":"Mail_2025_t",
        "Total_Mail_tonnes_Perc_TOTAL":"MarketShare_Mail",
        "Total_Mail_tonnes_Perc_Change":"Mail_YoY",
    }
    df = df.rename(columns={c: ren.get(c, c) for c in df.columns})

    # Numbers
    for c in ["Passengers_May_LY","Passengers_May","Freight_2024_t","Freight_2025_t","Mail_2024_t","Mail_2025_t"]:
        if c in df: df[c] = to_number(df[c])

    for c in ["MarketShare_Passengers","Passengers_YoY","MarketShare_Freight","Freight_YoY","MarketShare_Mail","Mail_YoY"]:
        if c in df: df[c] = to_percent_decimal(df[c])

    return df.sort_values(["Airline_Name","Country"]).reset_index(drop=True)

# 5) Clean May 2025 Table 3 (flights, seats, utilisation)

In [7]:
def clean_table3_may(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = clean_cols(df_raw)
    ren = {
        "Scheduled_Operator":"Airline_Name",
        "Service_to/from":"Service_Region",
        "Inbound_No._of_Flights":"Inbound_Flights",
        "Inbound_Pax_Carried":"Inbound_Passengers",
        "Inbound_Seats_Available":"Inbound_Seats_Available",
        "Inbound_Seat_Utilisation_%":"Inbound_Seat_Utilisation",
        "Outbound_No._of_Flights":"Outbound_Flights",
        "Outbound_Pax_Carried":"Outbound_Passengers",
        "Outbound_Seats_Available":"Outbound_Seats_Available",
        "Outbound_Seat_Utilisation_%":"Outbound_Seat_Utilisation",
    }
    df = df.rename(columns={c: ren.get(c, c) for c in df.columns})

    nums = ["Inbound_Flights","Inbound_Passengers","Inbound_Seats_Available",
            "Outbound_Flights","Outbound_Passengers","Outbound_Seats_Available"]
    for c in nums:
        if c in df: df[c] = to_number(df[c])

    for c in ["Inbound_Seat_Utilisation","Outbound_Seat_Utilisation"]:
        if c in df: df[c] = to_percent_decimal(df[c])

    # Long two directions
    keep = ["Airline_Name","Service_Region"]
    inbound = df[keep + ["Inbound_Flights","Inbound_Passengers","Inbound_Seats_Available","Inbound_Seat_Utilisation"]].copy()
    inbound["Direction"] = "Inbound"
    inbound = inbound.rename(columns={
        "Inbound_Flights":"Flights",
        "Inbound_Passengers":"Passengers",
        "Inbound_Seats_Available":"Seats_Available",
        "Inbound_Seat_Utilisation":"Seat_Utilisation"
    })

    outbound = df[keep + ["Outbound_Flights","Outbound_Passengers","Outbound_Seats_Available","Outbound_Seat_Utilisation"]].copy()
    outbound["Direction"] = "Outbound"
    outbound = outbound.rename(columns={
        "Outbound_Flights":"Flights",
        "Outbound_Passengers":"Passengers",
        "Outbound_Seats_Available":"Seats_Available",
        "Outbound_Seat_Utilisation":"Seat_Utilisation"
    })

    out = pd.concat([inbound, outbound], ignore_index=True)
    out["Year"] = 2025
    out["Month"] = 5

    return out[["Year","Month","Airline_Name","Service_Region","Direction",
                "Flights","Passengers","Seats_Available","Seat_Utilisation"]].reset_index(drop=True)

# 6) Clean May 2025 Table 4 (airport totals)

In [8]:
def clean_table4_may(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = clean_cols(df_raw)
    # City, Indicator, Inbound_2024, Inbound_2025, Inbound_Perc_Change, Outbound_2024, Outbound_2025, Outbound_Perc_Change
    for c in [c for c in df.columns if re.search(r"(Inbound|Outbound)_\d{4}$", c)]:
        df[c] = to_number(df[c])
    for c in [c for c in df.columns if c.endswith("_Perc_Change")]:
        df[c] = to_percent_decimal(df[c])

    # Long by direction & metric
    def melt_dir(prefix):
        cols = [c for c in df.columns if c.startswith(prefix)]
        out = pd.DataFrame({
            "City": df["City"],
            "Metric": df["Indicator"].str.replace(" ", "_", regex=False),
            "Direction": "Inbound" if prefix=="Inbound" else "Outbound",
            "Year_2024": df[f"{prefix}_2024"],
            "Year_2025": df[f"{prefix}_2025"],
            "YoY": df[f"{prefix}_Perc_Change"],
        })
        return out

    inbound  = melt_dir("Inbound")
    outbound = melt_dir("Outbound")
    long = pd.concat([inbound, outbound], ignore_index=True)
    # Create long rows for each Year separately (useful for Power BI)
    y24 = long[["City","Metric","Direction","Year_2024"]].rename(columns={"Year_2024":"Value"}); y24["Year"] = 2024
    y25 = long[["City","Metric","Direction","Year_2025"]].rename(columns={"Year_2025":"Value"}); y25["Year"] = 2025
    out = pd.concat([y24, y25], ignore_index=True)
    return out[["Year","City","Metric","Direction","Value"]].sort_values(["City","Metric","Direction","Year"]).reset_index(drop=True)

# 7) Clean May 2025 Table 5 (city pairs)

In [9]:
def clean_table5_may(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = clean_cols(df_raw)
    # Foreign_Port, Australian_Port, Passengers_2024_Inbound, Passengers_2024_Outbound, ... , Freight (tonnes)_2025_Inbound ...
    # Keep only PAX (focus), but keep freight if you want later.
    def pull(year, direction):
        c = f"Passengers_{year}_{direction}"
        if c in df: return to_number(df[c])
        return pd.Series([None]*len(df))

    out_rows = []
    for yr in (2024, 2025):
        for d in ("Inbound","Outbound"):
            tmp = pd.DataFrame({
                "Year": yr,
                "Australian_City": df["Australian_Port"],
                "Foreign_City": df["Foreign_Port"],
                "Direction": d,
                "Metric": "Passengers",
                "Value": pull(yr, d)
            })
            out_rows.append(tmp)
    out = pd.concat(out_rows, ignore_index=True)

    # Drop "Total, Broome"–like summary rows if present
    bad = out["Australian_City"].astype(str).str.lower().str.startswith("total")
    out = out[~bad].reset_index(drop=True)
    return out

# 8) RUN CLEANING PIPELINE

In [10]:
hist_raw = pd.read_csv(FILES["hist_t1"], dtype=str)
t1_hist  = clean_table1_hist(hist_raw)
t1_hist.to_csv(CLEAN / "fact_airline_monthly_hist.csv", index=False)
print("✅ fact_airline_monthly_hist:", t1_hist.shape)

t1_may_raw = pd.read_csv(FILES["t1_may"], dtype=str)
t1_may = clean_table1_may(t1_may_raw, year=2025, month=5)
t1_may.to_csv(CLEAN / "Table1_May2025_clean.csv", index=False)
print("✅ Table1_May2025_clean:", t1_may.shape)

t2_may_raw = pd.read_csv(FILES["t2_may"], dtype=str)
t2_may = clean_table2_may(t2_may_raw)
t2_may.to_csv(CLEAN / "Table2_May2025_clean.csv", index=False)
print("✅ Table2_May2025_clean:", t2_may.shape)

t3_may_raw = pd.read_csv(FILES["t3_may"], dtype=str)
t3_may = clean_table3_may(t3_may_raw)
t3_may.to_csv(CLEAN / "Table3_May2025_clean.csv", index=False)
print("✅ Table3_May2025_clean:", t3_may.shape)

t4_may_raw = pd.read_csv(FILES["t4_may"], dtype=str)
t4_long = clean_table4_may(t4_may_raw)
t4_long.to_csv(CLEAN / "Table4_Cities_long.csv", index=False)
print("✅ Table4_Cities_long:", t4_long.shape)

t5_may_raw = pd.read_csv(FILES["t5_may"], dtype=str)
t5_long = clean_table5_may(t5_may_raw)
t5_long.to_csv(CLEAN / "Table5_pairs_long.csv", index=False)
print("✅ Table5_pairs_long:", t5_long.shape)

✅ fact_airline_monthly_hist: (41664, 9)
✅ Table1_May2025_clean: (270, 9)
✅ Table2_May2025_clean: (144, 14)
✅ Table3_May2025_clean: (244, 9)
✅ Table4_Cities_long: (144, 5)
✅ Table5_pairs_long: (1028, 6)


# 9) QUICK SANITY CHECKS

In [11]:
def check_not_null(df, cols, name):
    missing = {c: df[c].isna().sum() for c in cols if c in df}
    print(f"🔎 NA check [{name}]:", missing)

def check_ranges(df, col, name):
    if col in df:
        print(f"🔎 Range [{name}.{col}]: min={pd.to_numeric(df[col], errors='coerce').min()}  max={pd.to_numeric(df[col], errors='coerce').max()}")

print("\n--- SANITY: Historical ---")
check_not_null(t1_hist, ["Year","Month","Airline_Name","Direction","Passengers"], "fact_airline_monthly_hist")
check_ranges(t1_hist, "Passengers", "fact_airline_monthly_hist")

print("\n--- SANITY: May T1 ---")
check_not_null(t1_may, ["Year","Month","Airline_Name","Country","Direction","Passengers"], "Table1_May2025_clean")
check_ranges(t1_may, "Passengers", "Table1_May2025_clean")

print("\n--- SANITY: May T2 ---")
check_not_null(t2_may, ["Airline_Name","Passengers_May","Passengers_YoY","MarketShare_Passengers"], "Table2_May2025_clean")

print("\n--- SANITY: May T3 ---")
check_not_null(t3_may, ["Year","Month","Airline_Name","Service_Region","Direction","Flights","Passengers"], "Table3_May2025_clean")

print("\n--- SANITY: May T4 ---")
check_not_null(t4_long, ["Year","City","Metric","Direction","Value"], "Table4_Cities_long")

print("\n--- SANITY: May T5 ---")
check_not_null(t5_long, ["Year","Australian_City","Foreign_City","Direction","Metric","Value"], "Table5_pairs_long")

print("\n✅ Done. Clean CSVs written to:", CLEAN)


--- SANITY: Historical ---
🔎 NA check [fact_airline_monthly_hist]: {'Year': 0, 'Month': 0, 'Airline_Name': 0, 'Direction': 0, 'Passengers': 7859}
🔎 Range [fact_airline_monthly_hist.Passengers]: min=0.0  max=187429.0

--- SANITY: May T1 ---
🔎 NA check [Table1_May2025_clean]: {'Year': 0, 'Month': 0, 'Airline_Name': 0, 'Country': 0, 'Direction': 0, 'Passengers': 36}
🔎 Range [Table1_May2025_clean.Passengers]: min=214.0  max=286281.0

--- SANITY: May T2 ---
🔎 NA check [Table2_May2025_clean]: {'Airline_Name': 0, 'Passengers_May': 27, 'Passengers_YoY': 28, 'MarketShare_Passengers': 27}

--- SANITY: May T3 ---
🔎 NA check [Table3_May2025_clean]: {'Year': 0, 'Month': 0, 'Airline_Name': 0, 'Service_Region': 0, 'Direction': 0, 'Flights': 7, 'Passengers': 28}

--- SANITY: May T4 ---
🔎 NA check [Table4_Cities_long]: {'Year': 0, 'City': 0, 'Metric': 0, 'Direction': 0, 'Value': 21}

--- SANITY: May T5 ---
🔎 NA check [Table5_pairs_long]: {'Year': 0, 'Australian_City': 0, 'Foreign_City': 0, 'Direction'

# Finalisation/ Renaming

In [12]:
import pandas as pd
from pathlib import Path

CLEAN = Path("../data/clean")
PBI   = Path("../data/powerbi")
PBI.mkdir(parents=True, exist_ok=True)

# --- Load cleaned intermediates ---
t1_hist  = pd.read_csv(CLEAN / "fact_airline_monthly_hist.csv")
t1_may   = pd.read_csv(CLEAN / "Table1_May2025_clean.csv")
t2_may   = pd.read_csv(CLEAN / "Table2_May2025_clean.csv")
t3_may   = pd.read_csv(CLEAN / "Table3_May2025_clean.csv")
t4_long  = pd.read_csv(CLEAN / "Table4_Cities_long.csv")
t5_long  = pd.read_csv(CLEAN / "Table5_pairs_long.csv")

# ---------- Passenger-focused “final” slices ----------
# Historical passenger fact
fact_airline_country_monthly_hist = t1_hist.loc[~t1_hist["Passengers"].isna()].copy()
fact_airline_country_monthly_hist.to_csv(PBI / "fact_airline_country_monthly_hist.csv", index=False)

# May Table 1 passenger slice
fact_airline_country_monthly_may2025 = t1_may.loc[~t1_may["Passengers"].isna()].copy()
fact_airline_country_monthly_may2025.to_csv(PBI / "fact_airline_country_monthly_may2025.csv", index=False)

# May Table 2 (with fill0 for dashboards if needed)
t2_fill0 = t2_may.copy()
for c in ["Passengers_May","Passengers_May_LY","MarketShare_Passengers","Passengers_YoY"]:
    if c in t2_fill0: 
        t2_fill0[c] = t2_fill0[c].fillna(0)
t2_fill0.to_csv(PBI / "fact_airline_marketshare_monthly_may2025.csv", index=False)

# May Table 3 (ops: flights, seats, passengers by region)
fact_airline_region_ops_monthly_may2025 = t3_may.loc[~t3_may["Flights"].isna()].copy()
fact_airline_region_ops_monthly_may2025.to_csv(PBI / "fact_airline_region_ops_monthly_may2025.csv", index=False)

# May Table 4 (airport totals)
fact_airport_totals_monthly_may2025 = t4_long.loc[~t4_long["Value"].isna()].copy()
fact_airport_totals_monthly_may2025.to_csv(PBI / "fact_airport_totals_monthly_may2025.csv", index=False)

# May Table 5 (city pairs, already clean)
fact_citypair_monthly_may2025 = t5_long.copy()
fact_citypair_monthly_may2025.to_csv(PBI / "fact_citypair_monthly_may2025.csv", index=False)

# -------- Confirmation --------
print("✅ fact_airline_country_monthly_hist:", fact_airline_country_monthly_hist.shape)
print("✅ fact_airline_country_monthly_may2025:", fact_airline_country_monthly_may2025.shape)
print("✅ fact_airline_marketshare_monthly_may2025:", t2_fill0.shape)
print("✅ fact_airline_region_ops_monthly_may2025:", fact_airline_region_ops_monthly_may2025.shape)
print("✅ fact_airport_totals_monthly_may2025:", fact_airport_totals_monthly_may2025.shape)
print("✅ fact_citypair_monthly_may2025:", fact_citypair_monthly_may2025.shape)

✅ fact_airline_country_monthly_hist: (33805, 9)
✅ fact_airline_country_monthly_may2025: (234, 9)
✅ fact_airline_marketshare_monthly_may2025: (144, 14)
✅ fact_airline_region_ops_monthly_may2025: (237, 9)
✅ fact_airport_totals_monthly_may2025: (123, 5)
✅ fact_citypair_monthly_may2025: (1028, 6)
