# Credit Spread Prediction – Data Collection
This notebook downloads and cleans macro-financial data from FRED and Yahoo Finance.

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from fredapi import Fred
from curl_cffi import requests



In [22]:

# session = requests.Session(impersonate="chrome")
START_DATE = "1960-01-01"
END_DATE   = None  # or '2025-01-01'

FRED_API_KEY = "29d8cc35b9e6d25028c3de96e6782c6f"
fred = Fred(api_key=FRED_API_KEY)

# Helper to ensure tz-naive
def make_tz_naive(idx):
    if getattr(idx, "tz", None) is not None:
        return idx.tz_convert(None)
    return idx


In [23]:
aaa = fred.get_series("AAA", observation_start=START_DATE, observation_end=END_DATE)
aaa.index = pd.to_datetime(aaa.index)
# aaa = aaa.round(4)
aaa.name = "AAA_Yield"

# Moody's Seasoned Baa Corporate Bond Yield
baa = fred.get_series("BAA", observation_start=START_DATE, observation_end=END_DATE)
baa.index = pd.to_datetime(baa.index)
# baa = baa.round(4)
baa.name = "BAA_Yield"

# 10Y Treasury
t10 = fred.get_series("GS10", observation_start=START_DATE, observation_end=END_DATE)
t10.index = pd.to_datetime(t10.index)
# t10 = t10.round(4)
t10.name = "T10Y"

# Convert yields from % to decimals (optional but usually nicer)
aaa = aaa / 100.0
baa = baa / 100.0
t10 = t10 / 100.0

spread_AAA = aaa - t10
spread_AAA.name = "AAA_10Y_Spread"

spread_BAA = baa - t10
spread_BAA.name = "BAA_10Y_Spread"

# Market Variables

In [24]:
# VIX (daily → monthly)
vix_daily = yf.download("^VIX", start=START_DATE, end=END_DATE, progress=False)["Close"]
vix_daily.index = make_tz_naive(vix_daily.index)
vix_daily.name = "VIX"
vix = vix_daily
# S&P500 price and monthly returns (proper monthly returns)
sp500_daily = yf.download("^GSPC", start=START_DATE, end=END_DATE, progress=False)["Close"]
sp500_daily.index = make_tz_naive(sp500_daily.index)
sp500_daily.name = "SP500_Close"

sp500_monthly = sp500_daily
sp500_ret = sp500_monthly.pct_change()
sp500_ret.name = "SP500_Return"


1 Failed download:
['^VIX']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')

1 Failed download:
['^GSPC']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


# Macro/ Yield_curve

In [25]:
# 2Y Treasury
t2 = fred.get_series("GS2", observation_start=START_DATE, observation_end=END_DATE)
t2.index = pd.to_datetime(t2.index)
t2 = t2
t2.name = "T2Y"
t2 = t2 / 100.0  # percent to decimal

yield_curve_slope = t10 - t2
yield_curve_slope.name = "YieldCurveSlope"

# Unemployment rate (monthly already, but resample for safety)
unemp = fred.get_series("UNRATE", observation_start=START_DATE, observation_end=END_DATE)
unemp.index = pd.to_datetime(unemp.index)
unemp = unemp
unemp.name = "Unemployment"

# CPI and inflation (monthly)
cpi = fred.get_series("CPIAUCSL", observation_start=START_DATE, observation_end=END_DATE)
cpi.index = pd.to_datetime(cpi.index)
cpi = cpi
inflation = cpi.pct_change()  # month-over-month inflation
inflation.name = "Inflation"

# GDP (quarterly → monthly, forward-filled, then growth)
gdp = fred.get_series("GDP", observation_start=START_DATE, observation_end=END_DATE)
gdp.index = pd.to_datetime(gdp.index)
# Quarterly to monthly with forward-fill
gdp_m = gdp
gdp_growth = gdp_m.pct_change()
gdp_growth.name = "GDP_Growth"


# Firm Level

In [26]:
# # S&P500 EPS proxy via price growth (if you don't have real EPS)
# sp_eps_daily = yf.Ticker("^GSPC").history(period="max")["Close"]
# sp_eps_daily.index = make_tz_naive(sp_eps_daily.index)
# sp_eps_daily = sp_eps_daily[sp_eps_daily.index >= pd.to_datetime(START_DATE)]
# if END_DATE is not None:
#     sp_eps_daily = sp_eps_daily[sp_eps_daily.index <= pd.to_datetime(END_DATE)]

# sp_eps_m = sp_eps_daily.resample("M").last()
# sp_eps = sp_eps_m.pct_change()
# sp_eps.name = "SP500_EPS_Proxy"

# Corporate leverage proxy (total credit market debt / something)
corp_lev_raw = fred.get_series("TCMDO", observation_start=START_DATE, observation_end=END_DATE)
corp_lev_raw.index = pd.to_datetime(corp_lev_raw.index)
corp_lev_m = corp_lev_raw
corp_leverage = corp_lev_m.pct_change()
corp_leverage.name = "Corporate_Leverage"

In [None]:
df = pd.concat(
    [
        spread_AAA,
        # spread_BAA,
        vix,
        sp500_ret,
        yield_curve_slope,
        unemp,
        inflation,
        gdp_growth,
        corp_leverage,
        # sp_eps,
    ],
    axis=1,
)

# print(df.head())


# df.set_index('Date')
df = df.interpolate(method='linear')

# # Delete the first row 

# # Combine all into a single DataFrame
df.to_csv("data.csv")

# print(df.shape)
# print(df.head())