In [2]:
import os
import requests
import pandas as pd
import numpy as np

In [3]:
api_key = os.getenv('FRED_API_KEY')
base = 'https://api.stlouisfed.org/fred'

In [4]:
params = {
    'api_key': api_key,
    'series_id': 'PAYEMS',
    'file_type': 'json',
    'observation_start': "2022-01-01"
}

In [5]:
r = requests.get(f'{base}/series/observations', params=params, timeout=30)
r.raise_for_status

<bound method Response.raise_for_status of <Response [200]>>

In [6]:
data = r.json()
print("Top-level keys:", data.keys())

Top-level keys: dict_keys(['realtime_start', 'realtime_end', 'observation_start', 'observation_end', 'units', 'output_type', 'file_type', 'order_by', 'sort_order', 'count', 'offset', 'limit', 'observations'])


In [7]:
obs = data["observations"]
df = pd.DataFrame(obs)

In [8]:
df.head(10)


Unnamed: 0,realtime_start,realtime_end,date,value
0,2025-09-21,2025-09-21,2022-01-01,150006
1,2025-09-21,2025-09-21,2022-02-01,150875
2,2025-09-21,2025-09-21,2022-03-01,151346
3,2025-09-21,2025-09-21,2022-04-01,151651
4,2025-09-21,2025-09-21,2022-05-01,151892
5,2025-09-21,2025-09-21,2022-06-01,152353
6,2025-09-21,2025-09-21,2022-07-01,153049
7,2025-09-21,2025-09-21,2022-08-01,153286
8,2025-09-21,2025-09-21,2022-09-01,153513
9,2025-09-21,2025-09-21,2022-10-01,153913


In [9]:
import os, time, requests, pandas as pd
from pathlib import Path

API_KEY = os.getenv("FRED_API_KEY") or "PASTE_YOUR_KEY"
BASE = "https://api.stlouisfed.org/fred"
OUTDIR = Path("data/revisions"); OUTDIR.mkdir(parents=True, exist_ok=True)

SERIES = "PAYEMS"      # total nonfarm, SA (monthly)
USREC_SERIES = "USREC" # NBER recession indicator (0/1)

def fred(endpoint, **params):
    p = {"api_key": API_KEY, "file_type": "json"}; p.update(params)
    r = requests.get(f"{BASE}/{endpoint}", params=p, timeout=60)
    r.raise_for_status()
    return r.json()

# 1) get all release (vintage) dates for PAYEMS (≈ monthly, since 1939)
vints = fred("series/vintagedates", series_id=SERIES)["vintage_dates"]
print(f"{SERIES} vintages: {len(vints)} (first={vints[0]}, last={vints[-1]})")

# 2) fetch observations for many vintages in batches → tidy “panel”
def fetch_obs_for_vintages(series_id, vintage_dates, batch=80, sleep=0.15):
    frames = []
    for i in range(0, len(vintage_dates), batch):
        vd = ",".join(vintage_dates[i:i+batch])
        obs = fred("series/observations", series_id=series_id, vintage_dates=vd)["observations"]
        df = pd.DataFrame(obs)
        frames.append(df)
        time.sleep(sleep)
    panel = pd.concat(frames, ignore_index=True)
    # types
    panel["ref_month"] = pd.to_datetime(panel["date"])
    panel["vintage"]   = pd.to_datetime(panel["realtime_start"])
    panel["value"]     = pd.to_numeric(panel["value"].replace(".", pd.NA))
    return panel[["ref_month","vintage","value"]].sort_values(["ref_month","vintage"])

panel = fetch_obs_for_vintages(SERIES, vints)
print("panel rows:", len(panel), "| months:", panel["ref_month"].nunique())

# 3) for each ref_month, take earliest 3 distinct vintages → first / second / third
def first_second_third(g):
    g = g.dropna(subset=["value"]).drop_duplicates(subset=["vintage"]).sort_values("vintage")
    vals = g["value"].tolist()
    return pd.Series({
        "first":  vals[0] if len(vals)>=1 else pd.NA,
        "second": vals[1] if len(vals)>=2 else pd.NA,
        "third":  vals[2] if len(vals)>=3 else pd.NA,
        "n_vintages": len(vals)
    })

rev = (panel.groupby("ref_month")[["vintage","value"]]
             .apply(lambda g: first_second_third(g))
             .sort_index())

# 4) revision deltas (levels and %)
rev["rev_2nd_minus_1st"] = rev["second"] - rev["first"]
rev["rev_3rd_minus_1st"] = rev["third"]  - rev["first"]
rev["pct_rev_3rd_vs_3rd"] = 100 * rev["rev_3rd_minus_1st"] / rev["third"]

# 5) optional: tag recessions (latest “now” series is fine—no vintages needed)
usrec = fred("series/observations", series_id=USREC_SERIES,
             observation_start="1939-01-01")["observations"]
usrec = (pd.DataFrame(usrec)
           .assign(date=lambda d: pd.to_datetime(d["date"]),
                   USREC=lambda d: pd.to_numeric(d["value"]))
           .rename(columns={"date":"ref_month"})
           [["ref_month","USREC"]])
rev = rev.reset_index().merge(usrec, on="ref_month", how="left").set_index("ref_month")

# 6) save outputs
rev.to_csv(OUTDIR/"payems_monthly_revisions.csv", index=True)
rev.to_parquet(OUTDIR/"payems_monthly_revisions.parquet")
print("saved:", (OUTDIR/"payems_monthly_revisions.csv").resolve())

# quick sanity prints
print("\nsanity (post-1990):")
rev90 = rev.loc[rev.index>="1990-01-01"]
print("mean 2nd-1st:", round(rev90["rev_2nd_minus_1st"].mean(),1),
      "| mean 3rd-1st:", round(rev90["rev_3rd_minus_1st"].mean(),1),
      "| rows:", len(rev90))
print("recent tail:")
print(rev.tail(6)[["first","second","third","rev_2nd_minus_1st","rev_3rd_minus_1st","USREC"]])


PAYEMS vintages: 848 (first=1955-05-06, last=2025-09-05)
panel rows: 19757 | months: 1040
saved: C:\Users\Max\Documents\BLS_revision\data\revisions\payems_monthly_revisions.csv

sanity (post-1990):
mean 2nd-1st: 14.4 | mean 3rd-1st: 25.7 | rows: 428
recent tail:
             first  second   third rev_2nd_minus_1st rev_3rd_minus_1st  USREC
ref_month                                                                    
2025-03-01  159398  159340  159275               -58              -123      0
2025-04-01  159517  159422  159433               -95               -84      0
2025-05-01  159561  159577  159452                16              -109      0
2025-06-01  159724  159466  159439              -258              -285      0
2025-07-01  159539  159518    <NA>               -21              <NA>      0
2025-08-01  159540    <NA>    <NA>              <NA>              <NA>      0


In [17]:
rev.tail(30)

Unnamed: 0_level_0,first,second,third,n_vintages,rev_2nd_minus_1st,rev_3rd_minus_1st,pct_rev_3rd_vs_3rd,USREC
ref_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-03-01,155569,155420.0,155472.0,5,-149.0,-97.0,-0.062391,0
2023-04-01,155673,155766.0,155689.0,5,93.0,16.0,0.010277,0
2023-05-01,156105,155995.0,155970.0,5,-110.0,-135.0,-0.086555,0
2023-06-01,156204,156155.0,156075.0,5,-49.0,-129.0,-0.082653,0
2023-07-01,156342,156232.0,156311.0,5,-110.0,-31.0,-0.019832,0
2023-08-01,156419,156538.0,156476.0,5,119.0,57.0,0.036427,0
2023-09-01,156874,156773.0,156738.0,5,-101.0,-136.0,-0.086769,0
2023-10-01,156923,156888.0,156843.0,5,-35.0,-80.0,-0.051006,0
2023-11-01,157087,157016.0,157014.0,4,-71.0,-73.0,-0.046493,0
2023-12-01,157232,157347.0,157304.0,4,115.0,72.0,0.045771,0
