# HDD-CDD
- HDD/CDD do not directly drive energy stock prices
- They do influence demand, which can move prices
- Stocks respond only when weather alters expectations or cash flows
- The signal is real but subtle
- Poor modeling makes it look useless; good modeling makes it valuable



HDD-CDD of several cities is collected using the Meteostat library
#### Source: Meteostat – public data aggregation service.
- Meteostat collects and harmonizes
- NOAA’s Global Historical Climatology Network (GHCN),
- Environment and Climate Change Canada,
- Deutscher Wetterdienst (DWD, Germany), and
- MET Norway, among others

Accessed via the official meteostat Python library.

In [3]:
# ===================== CONFIG: HDD & CDD (Population-weighted CONUS) =====================
from pathlib import Path
import requests
import pandas as pd

START_DATE   = "2010-01-01"
CURRENT_YEAR = pd.Timestamp.today().year

BASE_DIR   = Path(r"D:\MS_Data_Science_Thesis\Data_Extraction")
RAW_DIR    = BASE_DIR / "Downloaded_datasets"
CLEAN_DIR  = BASE_DIR / "Raw_Data_Folder"

RAW_POP_HDD_TEMPLATE = RAW_DIR / "cpc_population_heating_{year}.txt"
RAW_POP_CDD_TEMPLATE = RAW_DIR / "cpc_population_cooling_{year}.txt"

CLEAN_HDDCDD_CSV = CLEAN_DIR / "hddcdd_daily.csv"

CPC_BASE = "https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data"
# ===============================================================================

In [5]:
# ---------- ensure folders exist ----------
RAW_DIR.mkdir(parents=True, exist_ok=True)
CLEAN_DIR.mkdir(parents=True, exist_ok=True)


def download_population_file(year: int, kind: str) -> Path:
    """
    Download CPC Population.* file for a given year.
    kind: 'Heating' or 'Cooling'
    """
    assert kind in {"Heating", "Cooling"}
    url = f"{CPC_BASE}/{year}/Population.{kind}.txt"

    local_template = RAW_POP_HDD_TEMPLATE if kind == "Heating" else RAW_POP_CDD_TEMPLATE
    local_path = local_template.with_name(local_template.name.format(year=year))

    print(f"[pop-{kind}] {year} → GET {url}")
    r = requests.get(url)
    r.raise_for_status()

    local_path.write_bytes(r.content)
    print(f"[pop-{kind}] Saved raw file to {local_path}")
    return local_path


In [7]:

def parse_population_file(path: Path, kind: str) -> pd.DataFrame:
    """
    Parse a Population.Heating/Cooling file into daily CONUS series.

    Returns columns: date, hdd_us_pop OR cdd_us_pop
    """
    # Files are pipe-delimited, with 3-line header
    df = pd.read_csv(
        path,
        sep=r"\|",
        skiprows=3,
        engine="python",
        dtype=str,
    )

    # Strip whitespace from column names and values
    df.columns = [c.strip() for c in df.columns]
    for c in df.columns:
        if df[c].dtype == object:
            df[c] = df[c].str.strip()

    # First column should be Region; last column is often 'Total' (sum of year)
    region_col = df.columns[0]

    # Select only the CONUS row (national population-weighted)
    conus = df[df[region_col] == "CONUS"]
    if conus.empty:
        raise ValueError(f"No CONUS row found in {path}")

    # The date columns look like YYYYMMDD; ignore non-date columns like 'Total'
    date_cols = [c for c in conus.columns if c.isdigit() and len(c) == 8]

    # Keep only Region + date columns
    conus = conus[[region_col] + date_cols].copy()

    # Melt to long format: Region, variable (YYYYMMDD), value
    long_df = conus.melt(
        id_vars=[region_col],
        value_vars=date_cols,
        var_name="date_str",
        value_name="value",
    )

    # Parse date and numeric value
    long_df["date"] = pd.to_datetime(long_df["date_str"], format="%Y%m%d", errors="coerce")
    long_df["value"] = pd.to_numeric(long_df["value"], errors="coerce")

    long_df = long_df.dropna(subset=["date", "value"]).sort_values("date")

    col_name = "hdd_us_pop" if kind.lower().startswith("h") else "cdd_us_pop"
    long_df = long_df[["date", "value"]].rename(columns={"value": col_name})

    return long_df

In [9]:


def build_daily_hddcdd_pop(start_year: int, end_year: int) -> pd.DataFrame:
    """
    Loop over years and build daily population-weighted CONUS HDD/CDD series.
    """
    all_hdd = []
    all_cdd = []

    for year in range(start_year, end_year + 1):
        # ---- HDD ----
        try:
            hdd_path = download_population_file(year, "Heating")
            hdd_df = parse_population_file(hdd_path, "Heating")
            all_hdd.append(hdd_df)
        except Exception as e:
            print(f"[HDD] WARNING: failed for {year}: {e!r}")

        # ---- CDD ----
        try:
            cdd_path = download_population_file(year, "Cooling")
            cdd_df = parse_population_file(cdd_path, "Cooling")
            all_cdd.append(cdd_df)
        except Exception as e:
            print(f"[CDD] WARNING: failed for {year}: {e!r}")

    if not all_hdd or not all_cdd:
        raise RuntimeError("No HDD or CDD data were successfully parsed from Population.* files.")

    hdd_full = pd.concat(all_hdd, ignore_index=True)
    cdd_full = pd.concat(all_cdd, ignore_index=True)

    # Merge on date
    daily = pd.merge(hdd_full, cdd_full, on="date", how="outer").sort_values("date")

    # Filter from START_DATE
    daily = daily[daily["date"] >= pd.to_datetime(START_DATE)]

    return daily

In [11]:
# ---------- run pipeline ----------
try:
    start_year = pd.to_datetime(START_DATE).year
    end_year = CURRENT_YEAR

    hddcdd_daily = build_daily_hddcdd_pop(start_year, end_year)

    # Save clean CSV
    hddcdd_daily.to_csv(CLEAN_HDDCDD_CSV, index=False)
    print(f"[hddcdd] Saved clean daily HDD/CDD to {CLEAN_HDDCDD_CSV}")

    # Quick QA
    if not hddcdd_daily.empty:
        dmin = hddcdd_daily["date"].min().date()
        dmax = hddcdd_daily["date"].max().date()
        print(f"[hddcdd] rows={len(hddcdd_daily)}  range={dmin} → {dmax}")
        print("[hddcdd] columns:", list(hddcdd_daily.columns))
    else:
        print("[hddcdd] WARNING: dataset is empty after filtering/cleaning.")

except Exception as e:
    print("[hddcdd] ERROR while building HDD/CDD dataset:", repr(e))


[pop-Heating] 2010 → GET https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/2010/Population.Heating.txt
[pop-Heating] Saved raw file to D:\MS_Data_Science_Thesis\Data_Extraction\Downloaded_datasets\cpc_population_heating_2010.txt
[pop-Cooling] 2010 → GET https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/2010/Population.Cooling.txt
[pop-Cooling] Saved raw file to D:\MS_Data_Science_Thesis\Data_Extraction\Downloaded_datasets\cpc_population_cooling_2010.txt
[pop-Heating] 2011 → GET https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/2011/Population.Heating.txt
[pop-Heating] Saved raw file to D:\MS_Data_Science_Thesis\Data_Extraction\Downloaded_datasets\cpc_population_heating_2011.txt
[pop-Cooling] 2011 → GET https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/2011/Population.Cooling.txt
[pop-Cooling] Saved raw file to D:\MS_Data_Science_Thesis\Data_Extraction\Downloaded_datasets\cpc_population_cooling_2011.txt
[pop-Hea