# Credit Spread Prediction â€“ Data Collection
This notebook downloads and cleans macro-financial data from FRED and Yahoo Finance.

In [2]:
!mkdir MF703_CreditSpreadProject
%cd MF703_CreditSpreadProject

!pip install pandas numpy matplotlib seaborn scikit-learn xgboost lightgbm fredapi yfinance shap statsmodels



/content/MF703_CreditSpreadProject
Collecting fredapi
  Downloading fredapi-0.5.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fredapi-0.5.2-py3-none-any.whl (11 kB)
Installing collected packages: fredapi
Successfully installed fredapi-0.5.2


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from fredapi import Fred

In [11]:
fred = Fred(api_key="2dee14a9f6222157bf414d8fd93746dc")

# Corporate yields
aaa_yield = fred.get_series('AAA')     # Moody's AAA Corporate Bond Yield
baa_yield = fred.get_series('BAA')     # Moody's BAA Corporate Bond Yield

# Treasury yield (10-year constant maturity)
treasury_10y = fred.get_series('GS10') # 10-year Treasury

# Monthly versions
aaa_m = aaa_yield.resample('M').last()
baa_m = baa_yield.resample('M').last()
t10_m = treasury_10y.resample('M').last()

# Credit spreads vs Treasuries
spread_AAA_10Y = (aaa_m - t10_m).rename('AAA_10Y_Spread')
spread_BAA_10Y = (baa_m - t10_m).rename('BAA_10Y_Spread')

# Optional: AA corporate yield
try:
    aa_yield = fred.get_series('DAAA').resample('M').last()
    spread_AA_10Y = (aa_yield - t10_m).rename('AA_10Y_Spread')
except:
    pass

# Spread between credit qualities (BAA - AAA)
credit_curve_slope = (spread_BAA_10Y - spread_AAA_10Y).rename('BAA_minus_AAA')

# Combine spreads
spread_df = pd.concat([
    spread_AAA_10Y,
    spread_BAA_10Y,
    credit_curve_slope
], axis=1).dropna()

spread_df.head(), spread_df.shape



  aaa_m = aaa_yield.resample('M').last()
  baa_m = baa_yield.resample('M').last()
  t10_m = treasury_10y.resample('M').last()
  aa_yield = fred.get_series('DAAA').resample('M').last()


(            AAA_10Y_Spread  BAA_10Y_Spread  BAA_minus_AAA
 1953-04-30            0.40            0.82           0.42
 1953-05-31            0.29            0.73           0.44
 1953-06-30            0.29            0.75           0.46
 1953-07-31            0.35            0.93           0.58
 1953-08-31            0.29            0.90           0.61,
 (871, 3))

In [10]:
df = pd.read_csv('credit_spread_dataset.csv', index_col=0, parse_dates=True)
df.head()
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 102 entries, 2000-01-31 to 2025-04-30
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Spread        102 non-null    float64
 1   VIX           102 non-null    float64
 2   GDP           102 non-null    float64
 3   Unemployment  102 non-null    float64
 4   YieldCurve    102 non-null    float64
dtypes: float64(5)
memory usage: 4.8 KB


Unnamed: 0,Spread,VIX,GDP,Unemployment,YieldCurve
count,102.0,102.0,102.0,102.0,102.0
mean,1.021961,20.084902,17635.671676,5.702941,1.091275
std,0.411796,7.933551,5515.639091,2.050185,0.987462
min,0.55,10.18,10002.179,3.4,-0.91
25%,0.78,14.6,13782.615,4.2,0.205
50%,0.925,18.47,16369.98,5.05,1.055
75%,1.1375,23.515,20888.08275,6.35,1.9175
max,3.09,59.889999,30485.729,14.8,2.84
