In [2]:
import pandas as pd
import holidays
from typing import Iterable, Optional, Tuple, List, Dict, Any

def _discover_subdivisions(country: str) -> list[Optional[str]]:
    """Discover subdivisions (states/provinces) dynamically from the holidays package."""
    try:
        from holidays.utils import country_subdivisions
        subs = list(country_subdivisions(country) or [])
        return [None] + subs if subs else [None]
    except Exception:
        try:
            cls = getattr(holidays, country, None)  # e.g., holidays.US
            subs = list(getattr(cls, "subdivisions", [])) if cls else []
            return [None] + subs if subs else [None]
        except Exception:
            return [None]

def build_holiday_df(
    countries: Iterable[str] = ("US", "CA"),
    years: Iterable[int] = (2025,),
    observed: bool = True,
    force_subdivs: Dict[str, list[Optional[str]]] = None,
) -> pd.DataFrame:
    """
    Build a tidy holidays table with separate flags for national vs provincial/state.
    """
    rows: list[dict[str, Any]] = []
    years_list = list(years)
    force_subdivs = force_subdivs or {}

    for country in countries:
        subdivs = force_subdivs.get(country) or _discover_subdivisions(country)
        for subdiv in subdivs:
            try:
                h = holidays.CountryHoliday(country, years=years_list, subdiv=subdiv, observed=observed)
            except Exception:
                continue

            for dt, name in h.items():
                rows.append({
                    "Date": pd.to_datetime(dt),
                    "Country": country,
                    "Province/State": subdiv if subdiv is not None else "NATIONAL_HOLIDAY",
                    "provincial_state_holiday_flag": 0 if subdiv is None else 1,
                    "national_holiday_flag": 1 if subdiv is None else 0,
                    "holiday_description": name,
                })

    df = pd.DataFrame(
        rows,
        columns=[
            "Date",
            "Country",
            "Province/State",
            "provincial_state_holiday_flag",
            "national_holiday_flag",
            "holiday_description",
        ],
    )
    if not df.empty:
        df = df.sort_values(["Date", "Country", "Province/State"]).reset_index(drop=True)
    return df

# ---- Example ----
holidays_2025 = build_holiday_df(countries=("US","CA"), years=(2025,))
display(holidays_2025.head())

# B) US + Canada for multiple years (e.g., 2024–2026)
holidays_multi = build_holiday_df(countries=("US","CA"), years=range(2024, 2027))
display(holidays_multi.head())


Unnamed: 0,Date,Country,Province/State,provincial_state_holiday_flag,national_holiday_flag,holiday_description
0,2025-01-01,CA,AB,1,0,New Year's Day
1,2025-01-01,CA,BC,1,0,New Year's Day
2,2025-01-01,CA,MB,1,0,New Year's Day
3,2025-01-01,CA,NATIONAL_HOLIDAY,0,1,New Year's Day
4,2025-01-01,CA,NB,1,0,New Year's Day


Unnamed: 0,Date,Country,Province/State,provincial_state_holiday_flag,national_holiday_flag,holiday_description
0,2024-01-01,CA,AB,1,0,New Year's Day
1,2024-01-01,CA,BC,1,0,New Year's Day
2,2024-01-01,CA,MB,1,0,New Year's Day
3,2024-01-01,CA,NATIONAL_HOLIDAY,0,1,New Year's Day
4,2024-01-01,CA,NB,1,0,New Year's Day


In [3]:
holidays_2025.to_csv(r"C:\Users\jverc\Downloads\holidays_dim_US_CA_2025.csv", index=False)