In [7]:
"""
fetch_and_store.py

Usage:
    python fetch_and_store.py

Description:
    - Fetches total nonfarm (seasonally adjusted) data from BLS for both national
      (CES0000000001) and the specified MSAs.
    - Inserts the raw data into a PostgreSQL table named 'raw_nonfarm_jobs'.
"""

import requests
import json
import psycopg2
import pandas as pd

# ------------------------------
# 1) BLS API & Series Config
# ------------------------------
BLS_API_KEY = "f232fdbb532d456b8ed8ca7ce2a1cbb2"

# NATIONAL series (seasonally adjusted, total nonfarm)
NATIONAL_SERIES_ID = "CES0000000001"

# MSA Series IDs (seasonally adjusted, total nonfarm)
MSA_SERIES_IDS = [
    # 1) New York-Newark-Jersey City, NY-NJ-PA
    "SMS36356200000000001",
    # 2) Los Angeles-Long Beach-Anaheim, CA
    "SMS06310800000000001",
    # 3) Chicago-Naperville-Elgin, IL-IN-WI
    "SMS17169800000000001",
    # 4) Dallas-Fort Worth-Arlington, TX
    "SMS48191000000000001",
    # 5) Houston-The Woodlands-Sugar Land, TX
    "SMS48264200000000001",
    # 6) Washington-Arlington-Alexandria, DC-VA-MD-WV
    "SMS11479000000000001",
    # 7) Philadelphia-Camden-Wilmington, PA-NJ-DE-MD
    "SMS42979610000000001",
    # 8) Miami-Fort Lauderdale-West Palm Beach, FL
    "SMS12331000000000001",
    # 9) Atlanta-Sandy Springs-Roswell, GA
    "SMS13120600000000001",
    # 10) Phoenix-Mesa-Scottsdale, AZ
    "SMS04380600000000001",
    # 11) Boston–Cambridge–Newton, MA–NH
    "SMS25716540000000026",
    # 12) San Francisco–Oakland–Berkeley, CA
    "SMS06418840000000001",
    # 13) Riverside–San Bernardino–Ontario, CA
    "SMS06401400000000001",
    # 14) Detroit–Warren–Dearborn, MI
    "SMS26198200000000001",
    # 15) Seattle–Tacoma–Bellevue, WA
    "SMS53426600000000001",
    # 16) Minneapolis–St. Paul–Bloomington, MN–WI
    "SMS27334600000000001",
    # 17) San Diego–Chula Vista–Carlsbad, CA
    "SMS06417400000000001",
    # 18) Tampa–St. Petersburg–Clearwater, FL
    "SMS12453000000000001",
    # 19) Denver–Aurora–Lakewood, CO
    "SMS08197400000000001",
    # 20) St. Louis, MO–IL
    "SMS29411800000000001",
    # 21) Baltimore–Columbia–Towson, MD
    "SMS24925810000000001",
    # 22) Charlotte–Concord–Gastonia, NC–SC
    "SMS37167400000000001",
    # 23) Orlando–Kissimmee–Sanford, FL
    "SMS12367400000000001",
    # 24) San Antonio–New Braunfels, TX
    "SMS48417000000000001",
    # 25) Portland–Vancouver–Hillsboro, OR–WA
    "SMS41389000000000001",
    # 26) Pittsburgh, PA
    "SMS42383000000000001",
    # 27) Sacramento–Roseville–Arden-Arcade, CA
    "SMS06409000000000001",
    # 28) Las Vegas–Henderson–Paradise, NV
    "SMS32298200000000001",
    # 29) Cincinnati, OH–KY–IN
    "SMS39171400000000001",
    # 30) Kansas City, MO–KS
    "SMS20928120000000001",
    # 31) Columbus, OH
    "SMS18180200000000001",
    # 32) Indianapolis–Carmel–Anderson, IN
    "SMS18269000000000001",
    # 33) Cleveland–Elyria, OH
    "SMS39174600000000001",
    # 34) San Jose–Sunnyvale–Santa Clara, CA
    "SMS06419400000000001",
    # 35) Nashville–Davidson–Murfreesboro–Franklin, TN
    "SMS47349800000000001",
    # 36) Virginia Beach–Norfolk–Newport News, VA–NC
    "SMS51472600000000001",
    # 37) Providence–Warwick, RI–MA
    "SMS44772000000000001",
    # 38) Milwaukee–Waukesha-West Allis, WI
    "SMS55333400000000001",
    # 39) Jacksonville, FL
    "SMS12272600000000001",
    # 40) Memphis, TN–MS–AR
    "SMS47328200000000001",
    # 41) Richmond, VA
    "SMS51400600000000001",
    # 42) Oklahoma City, OK
    "SMS40364200000000001",
    # 43) Hartford–East Hartford–West Hartford, CT
    "SMU04380600000000001",  # Suspect ID for Hartford, double-check
    # 44) New Orleans–Metairie, LA
    "SMS22353800000000001",
    # 45) Buffalo–Cheektowaga–Niagara Falls, NY
    "SMS36153800000000001",
    # 46) Raleigh, NC
    "SMS37395800000000001",
    # 47) Birmingham–Hoover, AL
    "SMS01138200000000001",
    # 48) Salt Lake City, UT
    "SMS49416200000000001",
    # 49) Rochester, NY
    "SMS36403800000000001",
    # 50) Louisville/Jefferson County, KY–IN
    "SMS21311400000000001",
]

# We'll combine the national series with the MSA list
ALL_SERIES_IDS = [NATIONAL_SERIES_ID] + MSA_SERIES_IDS

START_YEAR = "1990"
END_YEAR   = "2024"

# ------------------------------
# 2) Postgres Connection
# ------------------------------
DB_HOST = "localhost"
DB_PORT = "5432"
DB_NAME = "inquire_DB"
DB_USER = "postgres"
DB_PASS = "givedata"  # Replace with your actual password

# ------------------------------
# 3) Fetch Data from BLS
# ------------------------------
def fetch_bls_data():
    """
    Calls the BLS Public Data API for the specified series IDs and date range.
    Returns a DataFrame of [series_id, obs_date, value].
    """
    url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
    payload = {
        "seriesid": ALL_SERIES_IDS,
        "startyear": START_YEAR,
        "endyear": END_YEAR,
        "registrationkey": BLS_API_KEY
    }
    headers = {"Content-type": "application/json"}

    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()
    data_json = response.json()

    all_rows = []
    series_list = data_json.get("Results", {}).get("series", [])

    all_rows = []  # define this BEFORE your loop
for series_item in series_list:
    sid = series_item["seriesID"]
    for item in series_item.get("data", []):
        ...
        all_rows.append({...})

df = pd.DataFrame(all_rows)  # Now it works, since all_rows is defined
df["obs_date"] = pd.to_datetime(df["obs_date"])

# NEW lines to debug:
print("Earliest date in df:", df["obs_date"].min())
print("Latest date in df:",   df["obs_date"].max())
print(df.tail(10))

for series_item in series_list:
        sid = series_item["seriesID"]
        for item in series_item.get("data", []):
            period = item["period"]   # e.g. "M01".."M12" or "M13"
            if period.startswith("M") and period != "M13":
                year = int(item["year"])
                month = int(period[1:])
                value = float(item["value"])

                all_rows.append({
                    "series_id": sid,
                    "obs_date": f"{year}-{month:02d}-01",
                    "value": value
                })
df = pd.DataFrame(all_rows)  # existing line
df["obs_date"] = pd.to_datetime(df["obs_date"])

# NEW lines to see what was actually fetched:
print("Total row count from BLS API =", len(df))
print("Sample of last few rows:\n", df.tail(5))  # peek at the end
print("Earliest date in df:", df["obs_date"].min())
print("Latest date in df:",   df["obs_date"].max())

df = pd.DataFrame(all_rows)
df["obs_date"] = pd.to_datetime(df["obs_date"])
return df

# ------------------------------
# 4) Store in Postgres
# ------------------------------
def store_in_postgres(df):
    """
    Inserts rows into 'raw_nonfarm_jobs' table.
    Table schema recommended (SQL):
    
   
    );
    """
    conn = psycopg2.connect(
        host="localhost",
        port="5433",
        dbname="inquire_DB",
        user="postgres",
        password="givedata"
    )
    cur = conn.cursor()

    insert_query = """
        INSERT INTO raw_nonfarm_jobs (series_id, obs_date, value)
        VALUES (%s, %s, %s)
        ON CONFLICT DO NOTHING;
    """

    rows_inserted = 0
    for _, row in df.iterrows():
        cur.execute(insert_query, (row["series_id"], row["obs_date"], row["value"]))
        rows_inserted += cur.rowcount

    conn.commit()
    cur.close()
    conn.close()

    return rows_inserted

def main():
    print("Fetching data from BLS (National + 50 MSAs, 1990–2024)...")
    df = fetch_bls_data()
    print(f"Fetched {len(df)} total rows from BLS.")

    print("Storing into Postgres table 'raw_nonfarm_jobs'...")
    inserted_count = store_in_postgres(df)
    print(f"Inserted {inserted_count} new rows (or fewer if duplicates existed).")

if __name__ == "__main__":
    main()


NameError: name 'series_list' is not defined