In [1]:
# ============================================================
# Risk-Free Rate Construction (Treasury Yields)
# ============================================================

import pandas as pd
import numpy as np
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

In [2]:
# ============================================================
# Data directory
# ============================================================

DATA_DIR = Path("../data")
DATA_DIR.mkdir(exist_ok=True)

In [3]:
# ============================================================
# Download 3-Month Treasury Bill from FRED
# ------------------------------------------------------------
# Symbol:
#   DTB3 = 3-Month Treasury Bill (secondary market)
#
# Units:
#   Percent per annum (e.g. 0.45 = 0.45%)
# ============================================================

import pandas_datareader.data as web

START_DATE = "2010-01-01"
END_DATE   = "2023-12-31"

rf_raw = web.DataReader(
    "DTB3",
    "fred",
    start=START_DATE,
    end=END_DATE
)

rf_raw.head()

Unnamed: 0_level_0,DTB3
DATE,Unnamed: 1_level_1
2010-01-01,
2010-01-04,0.08
2010-01-05,0.07
2010-01-06,0.06
2010-01-07,0.05


In [4]:
# ============================================================
# Clean risk-free rate series
# ============================================================

rf = rf_raw.reset_index().rename(columns={
    "DATE": "QUOTE_DATE",
    "DTB3": "RF_RATE"
})

# Convert percent → decimal
rf["RF_RATE"] = rf["RF_RATE"] / 100.0

rf.head()

Unnamed: 0,QUOTE_DATE,RF_RATE
0,2010-01-01,
1,2010-01-04,0.0008
2,2010-01-05,0.0007
3,2010-01-06,0.0006
4,2010-01-07,0.0005


In [5]:
# ============================================================
# Handle missing values correctly
# ------------------------------------------------------------
# - Treasury yields are business-day series
# - Options trade on trading days
# - Use last available yield (forward-fill)
# ============================================================

rf = rf.sort_values("QUOTE_DATE").reset_index(drop=True)

# Forward-fill missing yields
rf["RF_RATE"] = rf["RF_RATE"].ffill()

# Drop any leading NaNs (very early dates)
rf = rf.dropna(subset=["RF_RATE"])

# Sanity check
assert rf["RF_RATE"].isna().sum() == 0

print("RF date range:", rf["QUOTE_DATE"].min(), "→", rf["QUOTE_DATE"].max())

RF date range: 2010-01-04 00:00:00 → 2023-12-29 00:00:00


In [6]:
# ============================================================
# Quick sanity checks
# ============================================================

print(rf.describe())

print("\nMin RF:", rf["RF_RATE"].min())
print("Max RF:", rf["RF_RATE"].max())

                QUOTE_DATE      RF_RATE
count                 3650  3650.000000
mean   2016-12-31 12:00:00     0.009400
min    2010-01-04 00:00:00    -0.000500
25%    2013-07-03 06:00:00     0.000500
50%    2016-12-31 12:00:00     0.001500
75%    2020-06-30 18:00:00     0.015300
max    2023-12-29 00:00:00     0.053600
std                    NaN     0.014467

Min RF: -0.0005
Max RF: 0.0536


In [7]:
# ============================================================
# Save cleaned risk-free rate
# ============================================================

OUT_PATH = DATA_DIR / "risk_free_rate_3m.csv"
rf.to_csv(OUT_PATH, index=False)

print("Saved risk-free rate to:")
print(OUT_PATH)

Saved risk-free rate to:
../data/risk_free_rate_3m.csv


In [8]:
# ============================================================
# OPTIONAL: validate merge with SPY options
# ============================================================

try:
    opts = pd.read_csv(
        DATA_DIR / "options_clean_SPY.csv",
        parse_dates=["QUOTE_DATE"]
    )

    merged = opts.merge(
        rf,
        on="QUOTE_DATE",
        how="left"
    )

    merged["RF_RATE"] = merged["RF_RATE"].ffill()

    print("After merge:")
    print("Missing RF:", merged["RF_RATE"].isna().sum())

except FileNotFoundError:
    print("options_clean_SPY.csv not found — skipping validation")


After merge:
Missing RF: 0
