# Credit Spread Prediction – Data Collection
This notebook downloads and cleans macro-financial data from FRED and Yahoo Finance.

In [2]:
!mkdir MF703_CreditSpreadProject
%cd MF703_CreditSpreadProject

!pip install pandas numpy matplotlib seaborn scikit-learn xgboost lightgbm fredapi yfinance shap statsmodels



/content/MF703_CreditSpreadProject
Collecting fredapi
  Downloading fredapi-0.5.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fredapi-0.5.2-py3-none-any.whl (11 kB)
Installing collected packages: fredapi
Successfully installed fredapi-0.5.2


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from fredapi import Fred

In [15]:
import pandas as pd
import yfinance as yf

# --- Corporate & Treasury Yields ---
aaa = fred.get_series('AAA').resample('M').last()
aaa.name = 'AAA_Yield'

baa = fred.get_series('BAA').resample('M').last()
baa.name = 'BAA_Yield'

t10 = fred.get_series('GS10').resample('M').last()
t10.name = 'T10Y'

spread_AAA = (aaa - t10)
spread_AAA.name = 'AAA_10Y_Spread'

spread_BAA = (baa - t10)
spread_BAA.name = 'BAA_10Y_Spread'


# --- MARKET VARIABLES ---

# VIX (daily → monthly)
vix_daily = yf.download('^VIX', start='1960-01-01')['Close']
# make sure index is tz-naive
if vix_daily.index.tz is not None:
    vix_daily = vix_daily.tz_convert(None)
vix_daily.name = 'VIX'
vix = vix_daily.resample('M').last()

# S&P500 returns (daily → monthly)
sp500_close = yf.download('^GSPC', start='1960-01-01')['Close']
if sp500_close.index.tz is not None:
    sp500_close = sp500_close.tz_convert(None)
sp500_close.name = 'SP500_Close'
sp500_ret = sp500_close.pct_change().resample('M').last()
sp500_ret.name = 'SP500_Return'


# --- MACRO / YIELD CURVE ---

t2 = fred.get_series('GS2').resample('M').last()
t2.name = 'T2Y'
yield_curve_slope = (t10 - t2)
yield_curve_slope.name = 'YieldCurveSlope'

unemp = fred.get_series('UNRATE').resample('M').last()
unemp.name = 'Unemployment'

cpi = fred.get_series('CPIAUCSL').resample('M').last()
inflation = cpi.pct_change()
inflation.name = 'Inflation'

gdp = fred.get_series('GDP').resample('M').ffill()
gdp_growth = gdp.pct_change()
gdp_growth.name = 'GDP_Growth'


# --- FIRM-LEVEL PROXIES ---

corp_leverage = fred.get_series('TCMDO').resample('M').ffill().pct_change()
corp_leverage.name = 'Corporate_Leverage'

sp_eps_daily = yf.Ticker('^GSPC').history(period="max")['Close']
# THIS was tz-aware → strip timezone
if sp_eps_daily.index.tz is not None:
    sp_eps_daily = sp_eps_daily.tz_convert(None)
sp_eps = sp_eps_daily.pct_change().resample('M').last()
sp_eps.name = 'SP500_EPS_Proxy'


# --- MERGE EVERYTHING ---

df = pd.concat([
    spread_AAA,
    spread_BAA,

    vix,
    sp500_ret,
    yield_curve_slope,

    unemp,
    inflation,
    gdp_growth,

    corp_leverage,
    sp_eps
], axis=1).dropna()

print(df.shape)
df.head()


  aaa = fred.get_series('AAA').resample('M').last()
  baa = fred.get_series('BAA').resample('M').last()
  t10 = fred.get_series('GS10').resample('M').last()
  vix_daily = yf.download('^VIX', start='1960-01-01')['Close']
[*********************100%***********************]  1 of 1 completed
  vix = vix_daily.resample('M').last()
  sp500_close = yf.download('^GSPC', start='1960-01-01')['Close']
[*********************100%***********************]  1 of 1 completed
  sp500_ret = sp500_close.pct_change().resample('M').last()
  t2 = fred.get_series('GS2').resample('M').last()
  unemp = fred.get_series('UNRATE').resample('M').last()
  cpi = fred.get_series('CPIAUCSL').resample('M').last()
  gdp = fred.get_series('GDP').resample('M').ffill()
  corp_leverage = fred.get_series('TCMDO').resample('M').ffill().pct_change()
  corp_leverage = fred.get_series('TCMDO').resample('M').ffill().pct_change()


(424, 10)


  sp_eps = sp_eps_daily.pct_change().resample('M').last()


Unnamed: 0,AAA_10Y_Spread,BAA_10Y_Spread,^VIX,^GSPC,YieldCurveSlope,Unemployment,Inflation,GDP_Growth,Corporate_Leverage,SP500_EPS_Proxy
1990-01-31,0.78,1.73,25.360001,0.018887,0.12,5.4,0.009501,0.02183,0.016624,0.018887
1990-02-28,0.75,1.67,21.99,0.004936,0.1,5.3,0.003922,0.0,0.0,0.004936
1990-03-31,0.78,1.62,19.73,-0.002494,-0.04,5.2,0.004687,0.0,0.0,-0.002494
1990-04-30,0.67,1.51,19.52,0.005135,0.07,5.4,0.002333,0.01487,0.016369,0.005135
1990-05-31,0.71,1.65,17.370001,0.001025,0.12,5.4,0.001552,0.0,0.0,0.001025
