In [None]:
"""
Maryland Counties FRED Data Aggregator
======================================

This script pulls monthly and annual FRED and BLS metrics for all 24 Maryland
counties using the `fredapi` package. It creates:
1. A master dataset containing all counties.
2. One CSV per county, cleaned and date-indexed for easy Tableau ingestion.

Author: Joshua Kwan
Date: 11/5/25 
"""

from fredapi import Fred
import pandas as pd
import time
from functools import reduce
from tqdm import tqdm
import os

# ====================== CONFIGURATION ======================

API_KEY = "Fred API key"  # <-- replace with your own FRED key
fred = Fred(api_key=API_KEY)
SLEEP_TIME = 0.25  # To avoid FRED API rate limits

# Folder paths (created if they don't exist)
COUNTY_EXPORT_PATH = "data/counties/"
MASTER_EXPORT_PATH = "data/master/"
os.makedirs(COUNTY_EXPORT_PATH, exist_ok=True)
os.makedirs(MASTER_EXPORT_PATH, exist_ok=True)

# ==============================
# Series Map by County
# freq: "M" = monthly, "A" = annual
# ==============================

COUNTIES = {
    # Code -> name + series IDs
    # Employment Count is LAUS "Employment Level": LAUCN + state(24) + county(FIPS3) + area(0000000) + 05
    # Example: Allegany (FIPS 001) -> LAUCN240010000000005
    "AG": {
        "County": "Allegany",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24001A", "A"),
            "Civilian_Labor_Force": ("MDALLE0LFN", "M"),
            "Employment_Count": ("LAUCN240010000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24001A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24001", "M"),
            "Median_Listing_Price": ("MEDLISPRI24001", "M"),
            "Permits_Annual": ("BPPRIV024001", "A"),
            "Real_GDP": ("REALGDPALL24001", "A"),
            "Population_Annual": ("MDALLE0POP", "A"),
            "Unemployed_Persons": ("LAUCN240010000000004", "M"),
            "Unemployment_Rate": ("MDALLE0URN", "M"),
        },
    },
    "AA": {
        "County": "Anne Arundel",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24003A", "A"),
            "Civilian_Labor_Force": ("MDANNE5LFN", "M"),
            "Employment_Count": ("LAUCN240030000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24003A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24003", "M"),
            "Median_Listing_Price": ("MEDLISPRI24003", "M"),
            "Permits_Annual": ("BPPRIV024003", "A"),
            "Real_GDP": ("REALGDPALL24003", "A"),
            "Population_Annual": ("MDANNE5POP", "A"),
            "Unemployed_Persons": ("LAUCN240030000000004", "M"),
            "Unemployment_Rate": ("MDANNE5URN", "M"),
        },
    },
    "BALT": {
        "County": "Baltimore County",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24005A", "A"),
            "Civilian_Labor_Force": ("MDBALT0LFN", "M"),
            "Employment_Count": ("LAUCN240050000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24005A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24005", "M"),
            "Median_Listing_Price": ("MEDLISPRI24005", "M"),
            "Permits_Annual": ("BPPRIV024005", "A"),
            "Real_GDP": ("REALGDPALL24005", "A"),
            "Population_Annual": ("MDBALT0POP", "A"),
            "Unemployed_Persons": ("LAUCN240050000000004", "M"),
            "Unemployment_Rate": ("MDBALT0URN", "M"),
        },
    },
    "BALT_CITY": {
        "County": "Baltimore City",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24510A", "A"),
            "Civilian_Labor_Force": ("MDBALT5LFN", "M"),
            # client alt option was CES "SMS24925810000000001"
            "Employment_Count": ("SMS24925810000000001", "M"),
            "Poverty_All_Ages": ("PPAAMD24510A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24510", "M"),
            "Median_Listing_Price": ("MEDLISPRI24510", "M"),
            "Permits_Annual": ("BPPRIV024510", "A"),
            "Real_GDP": ("REALGDPALL24510", "A"),
            "Population_Annual": ("MDBALT5POP", "A"),
            "Unemployed_Persons": ("LAUCN245100000000004", "M"),
            "Unemployment_Rate": ("MDBALT5URN", "M"),
        },
    },
    "CAL": {
        "County": "Calvert",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24009A", "A"),
            "Civilian_Labor_Force": ("MDCALV9LFN", "M"),
            "Employment_Count": ("LAUCN240090000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24009A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24009", "M"),
            "Median_Listing_Price": ("MEDLISPRI24009", "M"),
            "Permits_Annual": ("BPPRIV024009", "A"),
            "Real_GDP": ("REALGDPALL24009", "A"),
            "Population_Annual": ("MDCALV9POP", "A"),
            "Unemployed_Persons": ("LAUCN240090000000004", "M"),
            "Unemployment_Rate": ("MDCALV9URN", "M"),
        },
    },
    "CAR": {
        "County": "Caroline",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24011A", "A"),
            "Civilian_Labor_Force": ("MDCARO1LFN", "M"),
            "Employment_Count": ("LAUCN240110000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24011A156NCEN", "A"),
            # No active listings / median listing price for Caroline
            "Permits_Annual": ("BPPRIV024011", "A"),
            "Real_GDP": ("REALGDPALL24011", "A"),
            "Population_Annual": ("MDCARO1POP", "A"),
            "Unemployed_Persons": ("LAUCN240110000000004", "M"),
            "Unemployment_Rate": ("MDCARO1URN", "M"),
        },
    },
    "CARR": {
        "County": "Carroll",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24013A", "A"),
            "Civilian_Labor_Force": ("MDCARR5LFN", "M"),
            "Employment_Count": ("LAUCN240130000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24013A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24013", "M"),
            "Median_Listing_Price": ("MEDLISPRI24013", "M"),
            "Permits_Annual": ("BPPRIV024013", "A"),
            "Real_GDP": ("REALGDPALL24013", "A"),
            "Population_Annual": ("MDCARR5POP", "A"),
            "Unemployed_Persons": ("LAUCN240130000000004", "M"),
            "Unemployment_Rate": ("MDCARR5URN", "M"),
        },
    },
    "CEC": {
        "County": "Cecil",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24015A", "A"),
            "Civilian_Labor_Force": ("MDCECI0LFN", "M"),
            "Employment_Count": ("LAUCN240150000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24015A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24015", "M"),
            "Median_Listing_Price": ("MEDLISPRI24015", "M"),
            "Permits_Annual": ("BPPRIV024015", "A"),
            "Real_GDP": ("REALGDPALL24015", "A"),
            "Population_Annual": ("MDCECI0POP", "A"),
            "Unemployed_Persons": ("LAUCN240150000000004", "M"),
            "Unemployment_Rate": ("MDCECI0URN", "M"),
        },
    },
    "CHA": {
        "County": "Charles",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24017A", "A"),
            "Civilian_Labor_Force": ("MDCHAR0LFN", "M"),
            "Employment_Count": ("LAUCN240170000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24017A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24017", "M"),
            "Median_Listing_Price": ("MEDLISPRI24017", "M"),
            "Permits_Annual": ("BPPRIV024017", "A"),
            "Real_GDP": ("REALGDPALL24017", "A"),
            "Population_Annual": ("MDCHAR0POP", "A"),
            "Unemployed_Persons": ("LAUCN240170000000004", "M"),
            "Unemployment_Rate": ("MDCHAR0URN", "M"),
        },
    },
    "DOR": {
        "County": "Dorchester",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24019A", "A"),
            "Civilian_Labor_Force": ("MDDORC9LFN", "M"),
            "Employment_Count": ("LAUCN240190000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24019A156NCEN", "A"),
            # No active listings / median listing price for Dorchester
            "Permits_Annual": ("BPPRIV024019", "A"),
            "Real_GDP": ("REALGDPALL24019", "A"),
            "Population_Annual": ("MDDORC9POP", "A"),
            "Unemployed_Persons": ("LAUCN240190000000004", "M"),
            "Unemployment_Rate": ("MDDORC9URN", "M"),
        },
    },
    "FRE": {
        "County": "Frederick",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24021A", "A"),
            "Civilian_Labor_Force": ("MDFRED5LFN", "M"),
            "Employment_Count": ("LAUCN240210000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24021A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24021", "M"),
            "Median_Listing_Price": ("MEDLISPRI24021", "M"),
            "Permits_Annual": ("BPPRIV024021", "A"),
            "Real_GDP": ("REALGDPALL24021", "A"),
            "Population_Annual": ("MDFRED5POP", "A"),
            "Unemployed_Persons": ("LAUCN240210000000004", "M"),
            "Unemployment_Rate": ("MDFRED5URN", "M"),
        },
    },
    "GAR": {
        "County": "Garrett",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24023A", "A"),
            "Civilian_Labor_Force": ("MDGARR3LFN", "M"),
            "Employment_Count": ("LAUCN240230000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24023A156NCEN", "A"),
            # No active listings / median listing price for Garrett
            "Permits_Annual": ("BPPRIV024023", "A"),
            "Real_GDP": ("REALGDPALL24023", "A"),
            "Population_Annual": ("MDGARR3POP", "A"),
            "Unemployed_Persons": ("LAUCN240230000000004", "M"),
            "Unemployment_Rate": ("MDGARR3URN", "M"),
        },
    },
    "HAR": {
        "County": "Harford",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24025A", "A"),
            "Civilian_Labor_Force": ("MDHARF0LFN", "M"),
            "Employment_Count": ("LAUCN240250000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24025A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24025", "M"),
            "Median_Listing_Price": ("MEDLISPRI24025", "M"),
            "Permits_Annual": ("BPPRIV024025", "A"),
            "Real_GDP": ("REALGDPALL24025", "A"),
            "Population_Annual": ("MDHARF0POP", "A"),
            "Unemployed_Persons": ("LAUCN240250000000004", "M"),
            "Unemployment_Rate": ("MDHARF0URN", "M"),
        },
    },
    "HOW": {
        "County": "Howard",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24027A", "A"),
            "Civilian_Labor_Force": ("MDHOWA0LFN", "M"),
            "Employment_Count": ("LAUCN240270000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24027A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24027", "M"),
            "Median_Listing_Price": ("MEDLISPRI24027", "M"),
            "Permits_Annual": ("BPPRIV024027", "A"),
            "Real_GDP": ("REALGDPALL24027", "A"),
            "Population_Annual": ("MDHOWA0POP", "A"),
            "Unemployed_Persons": ("LAUCN240270000000004", "M"),
            "Unemployment_Rate": ("MDHOWA0URN", "M"),
        },
    },
    "KENT": {
        "County": "Kent",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24029A", "A"),
            "Civilian_Labor_Force": ("MDKENT9LFN", "M"),
            "Employment_Count": ("LAUCN240290000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24029A156NCEN", "A"),
            # No active listings / median listing price for Kent
            "Permits_Annual": ("BPPRIV024029", "A"),
            "Real_GDP": ("REALGDPALL24029", "A"),
            "Population_Annual": ("MDKENT9POP", "A"),
            "Unemployed_Persons": ("LAUCN240290000000004", "M"),
            "Unemployment_Rate": ("MDKENT9URN", "M"),
        },
    },
    "MON": {
        "County": "Montgomery",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24031A", "A"),
            "Civilian_Labor_Force": ("MDMONT0LFN", "M"),
            "Employment_Count": ("LAUCN240310000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24031A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24031", "M"),
            "Median_Listing_Price": ("MEDLISPRI24031", "M"),
            "Permits_Annual": ("BPPRIV024031", "A"),
            "Real_GDP": ("REALGDPALL24031", "A"),
            "Population_Annual": ("MDMONT0POP", "A"),
            "Unemployed_Persons": ("LAUCN240310000000004", "M"),
            "Unemployment_Rate": ("MDMONT0URN", "M"),
        },
    },
    "PG": {
        "County": "Prince George's",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24033A", "A"),
            "Civilian_Labor_Force": ("MDPRIN5LFN", "M"),
            "Employment_Count": ("LAUCN240330000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24033A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24033", "M"),
            "Median_Listing_Price": ("MEDLISPRI24033", "M"),
            "Permits_Annual": ("BPPRIV024033", "A"),
            "Real_GDP": ("REALGDPALL24033", "A"),
            "Population_Annual": ("MDPRIN5POP", "A"),
            "Unemployed_Persons": ("LAUCN240330000000004", "M"),
            "Unemployment_Rate": ("MDPRIN5URN", "M"),
        },
    },
    "QA": {
        "County": "Queen Anne's",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24035A", "A"),
            "Civilian_Labor_Force": ("MDQUEE5LFN", "M"),
            "Employment_Count": ("LAUCN240350000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24035A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24035", "M"),
            "Median_Listing_Price": ("MEDLISPRI24035", "M"),
            "Permits_Annual": ("BPPRIV024035", "A"),
            "Real_GDP": ("REALGDPALL24035", "A"),
            "Population_Annual": ("MDQUEE5POP", "A"),
            "Unemployed_Persons": ("LAUCN240350000000004", "M"),
            "Unemployment_Rate": ("MDQUEE5URN", "M"),
        },
    },
    "SOM": {
        "County": "Somerset",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24039A", "A"),
            "Civilian_Labor_Force": ("MDSOME9LFN", "M"),
            "Employment_Count": ("LAUCN240390000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24039A156NCEN", "A"),
            # No active listings / median listing price for Somerset
            "Permits_Annual": ("BPPRIV024039", "A"),
            "Real_GDP": ("REALGDPALL24039", "A"),
            "Population_Annual": ("MDSOME9POP", "A"),
            "Unemployed_Persons": ("LAUCN240390000000004", "M"),
            "Unemployment_Rate": ("MDSOME9URN", "M"),
        },
    },
    "STM": {
        "County": "St. Mary's",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24037A", "A"),
            "Civilian_Labor_Force": ("MDSTMA5LFN", "M"),
            "Employment_Count": ("LAUCN240370000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24037A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24037", "M"),
            "Median_Listing_Price": ("MEDLISPRI24037", "M"),
            "Permits_Annual": ("BPPRIV024037", "A"),
            "Real_GDP": ("REALGDPALL24037", "A"),
            "Population_Annual": ("MDSTMA5POP", "A"),
            "Unemployed_Persons": ("LAUCN240370000000004", "M"),
            "Unemployment_Rate": ("MDSTMA5URN", "M"),
        },
    },
    "TAL": {
        "County": "Talbot",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24041A", "A"),
            "Civilian_Labor_Force": ("MDTALB1LFN", "M"),
            "Employment_Count": ("LAUCN240410000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24041A156NCEN", "A"),
            # No active listings / median listing price for Talbot
            "Permits_Annual": ("BPPRIV024041", "A"),
            "Real_GDP": ("REALGDPALL24041", "A"),
            "Population_Annual": ("MDTALB1POP", "A"),
            "Unemployed_Persons": ("LAUCN240410000000004", "M"),
            "Unemployment_Rate": ("MDTALB1URN", "M"),
        },
    },
    "WAS": {
        "County": "Washington",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24043A", "A"),
            "Civilian_Labor_Force": ("MDWASH5LFN", "M"),
            "Employment_Count": ("LAUCN240430000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24043A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24043", "M"),
            "Median_Listing_Price": ("MEDLISPRI24043", "M"),
            "Permits_Annual": ("BPPRIV024043", "A"),
            "Real_GDP": ("REALGDPALL24043", "A"),
            "Population_Annual": ("MDWASH5POP", "A"),
            "Unemployed_Persons": ("LAUCN240430000000004", "M"),
            "Unemployment_Rate": ("MDWASH5URN", "M"),
        },
    },
    "WIC": {
        "County": "Wicomico",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24045A", "A"),
            "Civilian_Labor_Force": ("MDWICO5LFN", "M"),
            "Employment_Count": ("LAUCN240450000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24045A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24045", "M"),  # confirmed Active Listings
            "Median_Listing_Price": ("MEDLISPRI24045", "M"),
            "Permits_Annual": ("BPPRIV024045", "A"),
            "Real_GDP": ("REALGDPALL24045", "A"),
            "Population_Annual": ("MDWICO5POP", "A"),
            "Unemployed_Persons": ("LAUCN240450000000004", "M"),
            "Unemployment_Rate": ("MDWICO5URN", "M"),
        },
    },
    "WOR": {
        "County": "Worcester",
        "series": {
            "HPI_AllTransactions": ("ATNHPIUS24047A", "A"),
            "Civilian_Labor_Force": ("MDWORC7LFN", "M"),
            "Employment_Count": ("LAUCN240470000000005", "M"),
            "Poverty_All_Ages": ("PPAAMD24047A156NCEN", "A"),
            "Active_Listings": ("ACTLISCOU24047", "M"),
            "Median_Listing_Price": ("MEDLISPRI24047", "M"),
            "Permits_Annual": ("BPPRIV024047", "A"),
            "Real_GDP": ("REALGDPALL24047", "A"),
            "Population_Annual": ("MDWORC7POP", "A"),
            "Unemployed_Persons": ("LAUCN240470000000004", "M"),
            "Unemployment_Rate": ("MDWORC7URN", "M"),
        },
    },
}


# ==============================
#            HELPERS
# ==============================

def period_to_month_end(series: pd.Series, freq: str) -> pd.DataFrame:
    """
    Converts a FRED series to a time-indexed DataFrame with month-end timestamps.

    Args:
        series (pd.Series): Raw FRED time-series object
        freq (str): 'M' for monthly or 'A' for annual data

    Returns:
        pd.DataFrame: DataFrame with columns ['Date', 'Value'] indexed by period
    """
    df = pd.DataFrame({"Date": pd.to_datetime(series.index, errors="coerce"), "Value": series.values})
    df = df.dropna(subset=["Date"])

    if freq == "M":
        df["Date"] = df["Date"].dt.to_period("M").dt.to_timestamp("M")
    else:
        df["Date"] = df["Date"].dt.to_period("Y").dt.to_timestamp("M")
        df = df.set_index("Date").resample("ME").ffill().reset_index()

    return df


def build_county_df(code: str, county_data: dict) -> pd.DataFrame:
    """
    Pulls and assembles all FRED series for one county into a single DataFrame.

    Args:
        code (str): 2-5 letter county code (e.g., 'HOW')
        county_data (dict): Metadata block from COUNTIES mapping

    Returns:
        pd.DataFrame: Combined DataFrame for county with all metrics
    """
    frames = []

    for col_name, (series_id, freq) in tqdm(county_data["series"].items(), desc=f"Loading {code}", leave=False):
        try:
            time.sleep(SLEEP_TIME)
            series = fred.get_series(series_id)
            if series is None or series.empty:
                print(f"‚ö†Ô∏è {code} {col_name}: empty or invalid series: {series_id}")
                continue

            df = period_to_month_end(series, freq).rename(columns={"Value": col_name})
            frames.append(df)

        except Exception as err:
            print(f"‚ö†Ô∏è {code} {col_name}: failed to load {series_id} -> {err}")

    if not frames:
        return pd.DataFrame()

    merged = reduce(lambda left, right: pd.merge(left, right, on="Date", how="outer"), frames)
    merged.insert(1, "County", county_data["County"])
    merged.insert(2, "County_Code", code)
    
    # Rearrange columns for readability: Date, County, County_Code, <metrics>
    metric_cols = sorted([col for col in merged.columns if col not in ["Date", "County", "County_Code"]])
    merged = merged[["Date", "County", "County_Code"] + metric_cols]

    return merged.sort_values("Date").reset_index(drop=True)


# ==============================
#            MAIN
# ==============================

def main():
    """
    Main driver function. Loops over all counties, exports per-county CSVs, and
    builds a global master dataset. Also logs total runtime.
    """
    start_time = time.time()
    print("üìä Fetching data for all Maryland counties...\n")
    all_dfs = []

    # Loop over all counties
    for code, meta in COUNTIES.items():
        df = build_county_df(code, meta)

        if df.empty:
            print(f"‚ùó Skipped {meta['County']} ({code}) - no data collected.\n")
            continue

        # Check for missing critical fields
        required_cols = ["Employment_Count", "Active_Listings", "Median_Listing_Price"]
        missing = [col for col in required_cols if col not in df.columns]
        if missing:
            print(f"‚ö†Ô∏è {meta['County']} missing: {missing}. Verify if unavailable in FRED.")

        # Export each county individually
        file_name = f"{meta['County'].replace(' ', '_')}.csv"
        csv_path = os.path.join(COUNTY_EXPORT_PATH, file_name)
        df.to_csv(csv_path, index=False)
        print(f"‚úÖ Exported {meta['County']} to: {csv_path}")

        all_dfs.append(df)
        print(f"‚úÖ Completed: {meta['County']} ({code})\n")

    # Build and export master dataset
    if all_dfs:
        master_df = pd.concat(all_dfs, ignore_index=True).sort_values(["County", "Date"])
        master_path = os.path.join(MASTER_EXPORT_PATH, "maryland_master.csv")
        master_df.to_csv(master_path, index=False)
        print(f"\nüéâ Master dataset created: {master_df.shape[0]} rows, {master_df.shape[1]} columns")
        print(f"üìÅ Saved at: {master_path}")
    else:
        print("\n‚ùó No data collected for any county. Check API or series IDs.")

    print(f"\n‚è±Ô∏è Total run time: {time.time() - start_time:.2f} seconds")


# ==============================
#         ENTRY POINT
# ==============================

if __name__ == "__main__":
    main()

üìä Fetching data for all Maryland counties...



                                                                                                                       

‚úÖ Exported Allegany to: data/counties/Allegany.csv
‚úÖ Completed: Allegany (AG)



                                                                                                                       

‚úÖ Exported Anne Arundel to: data/counties/Anne_Arundel.csv
‚úÖ Completed: Anne Arundel (AA)



                                                                                                                       

‚úÖ Exported Baltimore County to: data/counties/Baltimore_County.csv
‚úÖ Completed: Baltimore County (BALT)



                                                                                                                       

‚úÖ Exported Baltimore City to: data/counties/Baltimore_City.csv
‚úÖ Completed: Baltimore City (BALT_CITY)



                                                                                                                       

‚úÖ Exported Calvert to: data/counties/Calvert.csv
‚úÖ Completed: Calvert (CAL)



                                                                                                                       

‚ö†Ô∏è Caroline missing: ['Active_Listings', 'Median_Listing_Price']. Verify if unavailable in FRED.
‚úÖ Exported Caroline to: data/counties/Caroline.csv
‚úÖ Completed: Caroline (CAR)



                                                                                                                       

‚úÖ Exported Carroll to: data/counties/Carroll.csv
‚úÖ Completed: Carroll (CARR)



                                                                                                                       

‚úÖ Exported Cecil to: data/counties/Cecil.csv
‚úÖ Completed: Cecil (CEC)



                                                                                                                       

‚úÖ Exported Charles to: data/counties/Charles.csv
‚úÖ Completed: Charles (CHA)



                                                                                                                       

‚ö†Ô∏è Dorchester missing: ['Active_Listings', 'Median_Listing_Price']. Verify if unavailable in FRED.
‚úÖ Exported Dorchester to: data/counties/Dorchester.csv
‚úÖ Completed: Dorchester (DOR)



                                                                                                                       

‚úÖ Exported Frederick to: data/counties/Frederick.csv
‚úÖ Completed: Frederick (FRE)



                                                                                                                       

‚ö†Ô∏è Garrett missing: ['Active_Listings', 'Median_Listing_Price']. Verify if unavailable in FRED.
‚úÖ Exported Garrett to: data/counties/Garrett.csv
‚úÖ Completed: Garrett (GAR)



                                                                                                                       

‚úÖ Exported Harford to: data/counties/Harford.csv
‚úÖ Completed: Harford (HAR)



                                                                                                                       

‚úÖ Exported Howard to: data/counties/Howard.csv
‚úÖ Completed: Howard (HOW)



                                                                                                                       

‚ö†Ô∏è Kent missing: ['Active_Listings', 'Median_Listing_Price']. Verify if unavailable in FRED.
‚úÖ Exported Kent to: data/counties/Kent.csv
‚úÖ Completed: Kent (KENT)



Loading MON:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé            | 9/11 [00:04<00:00,  2.17it/s]

‚ö†Ô∏è MON Population_Annual: failed to load MDMONT0POP -> Too Many Requests.  Exceeded Rate Limit


Loading MON:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã      | 10/11 [00:04<00:00,  2.14it/s]

‚ö†Ô∏è MON Unemployed_Persons: failed to load LAUCN240310000000004 -> Too Many Requests.  Exceeded Rate Limit


                                                                                                                       

‚ö†Ô∏è MON Unemployment_Rate: failed to load MDMONT0URN -> Too Many Requests.  Exceeded Rate Limit
‚úÖ Exported Montgomery to: data/counties/Montgomery.csv
‚úÖ Completed: Montgomery (MON)



Loading PG:   9%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                                                                | 1/11 [00:00<00:04,  2.37it/s]

‚ö†Ô∏è PG HPI_AllTransactions: failed to load ATNHPIUS24033A -> Too Many Requests.  Exceeded Rate Limit


Loading PG:  18%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 2/11 [00:00<00:03,  2.27it/s]

‚ö†Ô∏è PG Civilian_Labor_Force: failed to load MDPRIN5LFN -> Too Many Requests.  Exceeded Rate Limit


Loading PG:  27%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                   | 3/11 [00:01<00:03,  2.31it/s]

‚ö†Ô∏è PG Employment_Count: failed to load LAUCN240330000000005 -> Too Many Requests.  Exceeded Rate Limit


Loading PG:  36%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                             | 4/11 [00:01<00:03,  2.33it/s]

‚ö†Ô∏è PG Poverty_All_Ages: failed to load PPAAMD24033A156NCEN -> Too Many Requests.  Exceeded Rate Limit


                                                                                                                       

‚ö†Ô∏è Prince George's missing: ['Employment_Count']. Verify if unavailable in FRED.
‚úÖ Exported Prince George's to: data/counties/Prince_George's.csv
‚úÖ Completed: Prince George's (PG)



                                                                                                                       

‚úÖ Exported Queen Anne's to: data/counties/Queen_Anne's.csv
‚úÖ Completed: Queen Anne's (QA)



                                                                                                                       

‚ö†Ô∏è Somerset missing: ['Active_Listings', 'Median_Listing_Price']. Verify if unavailable in FRED.
‚úÖ Exported Somerset to: data/counties/Somerset.csv
‚úÖ Completed: Somerset (SOM)



                                                                                                                       

‚úÖ Exported St. Mary's to: data/counties/St._Mary's.csv
‚úÖ Completed: St. Mary's (STM)



                                                                                                                       

‚ö†Ô∏è Talbot missing: ['Active_Listings', 'Median_Listing_Price']. Verify if unavailable in FRED.
‚úÖ Exported Talbot to: data/counties/Talbot.csv
‚úÖ Completed: Talbot (TAL)



                                                                                                                       

‚úÖ Exported Washington to: data/counties/Washington.csv
‚úÖ Completed: Washington (WAS)



                                                                                                                       

‚úÖ Exported Wicomico to: data/counties/Wicomico.csv
‚úÖ Completed: Wicomico (WIC)



                                                                                                                       

‚úÖ Exported Worcester to: data/counties/Worcester.csv
‚úÖ Completed: Worcester (WOR)


üéâ Master dataset created: 16008 rows, 14 columns
üìÅ Saved at: data/master/maryland_master.csv

‚è±Ô∏è Total run time: 119.47 seconds




In [12]:
from fredapi import Fred
import pandas as pd
import time
from tqdm import tqdm

API_KEY = "2ccf5b794d310f8cde1d30c463f8d2d4"
fred = Fred(api_key=API_KEY)

def validate_series_ids(counties_dict):
    """
    Validates all series IDs in the COUNTIES dataset by trying to pull the first data point.
    Handles FRED API rate limits with retry logic and exponential backoff.
    """
    invalid_series = []
    rate_limit_issues = []

    total_series_count = sum(len(data["series"]) for data in counties_dict.values())
    print(f"===== STARTING VALIDATION =====\nTotal series to check: {total_series_count}\n")

    # Loop through each county and each series within it
    for county_code, meta in tqdm(counties_dict.items(), desc="Validating Counties"):
        for series_name, (series_id, _) in meta["series"].items():
            retries = 3
            success = False

            for attempt in range(retries):
                try:
                    time.sleep(1)  # Slow down to avoid rate limit
                    data = fred.get_series(series_id)
                    if data is not None and not data.empty:
                        success = True
                        break
                    else:
                        print(f"‚ùå {county_code} / {series_name}: EMPTY data for series '{series_id}'")
                        invalid_series.append((county_code, series_name, series_id))
                        break

                except Exception as e:
                    error_msg = str(e)

                    # If rate limit error, retry with exponential backoff
                    if "Too Many Requests" in error_msg and attempt < retries - 1:
                        wait = 2 ** attempt
                        print(f"‚ö†Ô∏è Rate limit hit for {series_id}. Retrying in {wait}s...")
                        time.sleep(wait)
                    else:
                        print(f"‚ùå {county_code} / {series_name}: ERROR for '{series_id}' -> {error_msg}")
                        if "Too Many Requests" in error_msg:
                            rate_limit_issues.append((county_code, series_name, series_id))
                        else:
                            invalid_series.append((county_code, series_name, series_id))
                        break

            if not success:
                continue

    # Summary
    print("\n===== VALIDATION SUMMARY =====")
    print(f"Total series IDs checked: {total_series_count}")
    print(f"Invalid/missing series IDs: {len(invalid_series)}")
    for item in invalid_series:
        print(f"- {item[0]} / {item[1]}: {item[2]}")

    print(f"\nSeries with rate-limit issues (re-run with slower pacing): {len(rate_limit_issues)}")
    for item in rate_limit_issues:
        print(f"- {item[0]} / {item[1]}: {item[2]}")

    return invalid_series, rate_limit_issues

if __name__ == "__main__":
    invalid, rate_limited = validate_series_ids(COUNTIES)

===== STARTING VALIDATION =====
Total series to check: 252



Validating Counties: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 24/24 [05:17<00:00, 13.23s/it]


===== VALIDATION SUMMARY =====
Total series IDs checked: 252
Invalid/missing series IDs: 0

Series with rate-limit issues (re-run with slower pacing): 0



