In [1]:
import requests
import numpy as np
import pandas as pd
import json

import datetime

import yfinance as yf
from fredapi import Fred 

from bs4 import BeautifulSoup
import re

import seaborn as sns
import matplotlib.pyplot as plt

# Data for Final Project:

I collected my data from 3 sources: [FRED database](https://fred.stlouisfed.org/), [Office of Financial Research](https://www.financialresearch.gov/financial-stress-index/), and [Yahoo Finance](https://finance.yahoo.com/).

The steps to collect the data was different between the 3 sources. Once I cleaned the data from each source, I merged the data into one dataframe and saved this as a CSV (all_data.csv).

Not all of the features were available on a daily timeframe, so I used forward fill (ffill) to repeat values that only exist on a weekly, monthly, or quarterly timeframe.

Most features that are not updated on a daily basis have a lag between observation date and the actual date an observation is published. To avoid any forward-looking bias, I synchronized the values between all features based on publishing date and labeled this is as 'model_date'.

Although my dataset is now on a daily timeframe, I will isolate my observations to one day of the week (such as Friday) to reduce noise in the data. The dates include weekends even though most values are only published on weekdays. The reason for keeping weekends is because when I switch to a weekly basis, I want the time difference between observations to always be the same and not be affected by holidays. In the case of a holiday, the previous day's values are assigned to the holiday.

Now that all the data is collected and cleaned, I can start to analyze and conduct feature selection.

In [2]:
all_data = pd.read_csv('all_data_Apr28.csv')
all_data = all_data.set_index('model_date')
all_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7752 entries, 2003-02-07 to 2024-04-28
Data columns (total 60 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   AE                    7752 non-null   float64
 1   ASPUS                 7752 non-null   float64
 2   BAMLC0A0CM            7752 non-null   float64
 3   BAMLEMCBPIOAS         7752 non-null   float64
 4   BAMLEMRACRPIASIAOAS   7752 non-null   float64
 5   BAMLH0A0HYM2          7752 non-null   float64
 6   BAMLH0A0HYM2EY        7752 non-null   float64
 7   BAMLHE00EHYIOAS       7752 non-null   float64
 8   CIVPART               7752 non-null   float64
 9   CORESTICKM159SFRBATL  7752 non-null   float64
 10  CPIAUCSL              7752 non-null   float64
 11  CPILFESL              7752 non-null   float64
 12  CSUSHPINSA            7752 non-null   float64
 13  Credit                7752 non-null   float64
 14  DAAA                  7752 non-null   float64
 15  DFF        

In [3]:
all_data.shape

(7752, 60)

In [4]:
all_data.describe()

Unnamed: 0,AE,ASPUS,BAMLC0A0CM,BAMLEMCBPIOAS,BAMLEMRACRPIASIAOAS,BAMLH0A0HYM2,BAMLH0A0HYM2EY,BAMLHE00EHYIOAS,CIVPART,CORESTICKM159SFRBATL,...,PSAVERT,RRPONTSYD,T10YIE,T5YIE,UNRATE,US,Volatility,WALCL,^GSPC,^VIX
count,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,...,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0,7752.0
mean,-0.145258,339923.826109,1.556046,3.394034,2.594453,5.186489,7.68355,5.167094,63.938287,2.563584,...,4.55783,244.704243,2.087681,1.924354,5.874497,-0.140759,-0.402171,3701084.0,2134.098025,19.093411
std,2.163126,76858.119036,0.898017,1.752037,1.312416,2.586461,2.579344,3.264253,1.603443,1.134876,...,4.243106,579.910414,0.407139,0.579814,2.044059,1.824247,1.507751,2530878.0,1138.917476,8.57402
min,-2.691,232500.0,0.79,1.44,1.01,2.41,3.92,1.78,60.2,0.66,...,-1.7,0.0,0.04,-2.24,3.4,-2.008,-2.597,714292.0,676.530029,9.14
25%,-1.515,278000.0,1.03,2.57,1.79,3.68,6.14,3.43,62.7,2.059257,...,2.6,1.43,1.83,1.61,4.4,-1.20725,-1.4,899317.5,1237.910034,13.42
50%,-0.755,322100.0,1.33,3.11,2.46,4.45,7.41,4.17,63.3,2.34,...,4.2,3.5,2.19,1.95,5.3,-0.6125,-0.668,3662035.0,1692.475037,16.76
75%,0.547,377900.0,1.66,3.81,2.94,5.92,8.4,5.66,65.9,2.67,...,5.6,102.741,2.37,2.32,7.2,0.131,0.205,4481799.0,2799.370056,21.9125
max,14.057,542900.0,6.56,13.86,9.79,21.82,23.26,23.26,66.6,6.606693,...,33.0,2553.716,3.02,3.59,14.7,13.279,9.787,8965487.0,5254.350098,82.690002


In [5]:
all_data.head(10)

Unnamed: 0_level_0,AE,ASPUS,BAMLC0A0CM,BAMLEMCBPIOAS,BAMLEMRACRPIASIAOAS,BAMLH0A0HYM2,BAMLH0A0HYM2EY,BAMLHE00EHYIOAS,CIVPART,CORESTICKM159SFRBATL,...,PSAVERT,RRPONTSYD,T10YIE,T5YIE,UNRATE,US,Volatility,WALCL,^GSPC,^VIX
model_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2003-02-07,1.631,232500.0,1.7,4.65,2.26,8.39,11.5,8.11,66.3,2.8,...,4.1,2.5,1.89,1.6,5.7,2.762,1.039,720659.0,829.690002,34.009998
2003-02-08,1.719,232500.0,1.7,4.65,2.26,8.39,11.5,8.11,66.3,2.8,...,4.1,2.5,1.89,1.6,5.7,2.833,1.089,720659.0,829.690002,34.009998
2003-02-09,1.709,232500.0,1.7,4.65,2.26,8.39,11.5,8.11,66.3,2.8,...,4.1,2.5,1.89,1.6,5.7,2.878,1.09,720659.0,829.690002,34.009998
2003-02-10,1.709,232500.0,1.69,4.63,2.28,8.35,11.54,8.1,66.3,2.8,...,4.1,2.5,1.93,1.66,5.7,2.878,1.09,720659.0,835.969971,33.990002
2003-02-11,1.709,232500.0,1.69,4.6,2.25,8.37,11.52,8.04,66.3,2.8,...,4.1,2.5,1.93,1.62,5.7,2.878,1.09,720659.0,829.200012,33.68
2003-02-12,1.602,232500.0,1.69,4.6,2.29,8.5,11.59,8.13,66.3,2.8,...,4.1,2.5,1.89,1.6,5.7,2.736,1.073,720659.0,818.679993,34.330002
2003-02-13,1.501,232500.0,1.72,4.65,2.34,8.69,11.7,8.21,66.3,2.8,...,4.1,2.5,1.88,1.56,5.7,2.728,0.976,714292.0,817.369995,33.700001
2003-02-14,1.491,232500.0,1.71,4.63,2.31,8.62,11.71,8.2,66.3,2.78,...,4.1,2.5,1.92,1.62,5.7,2.751,0.997,714292.0,834.890015,32.619999
2003-02-15,1.573,232500.0,1.71,4.63,2.31,8.62,11.71,8.2,66.3,2.78,...,4.1,2.5,1.92,1.62,5.7,2.887,1.013,714292.0,834.890015,32.619999
2003-02-16,1.425,232500.0,1.71,4.63,2.31,8.62,11.71,8.2,66.3,2.78,...,4.1,2.5,1.92,1.62,5.7,2.68,0.933,714292.0,834.890015,32.619999


In [6]:
all_data.tail(10)

Unnamed: 0_level_0,AE,ASPUS,BAMLC0A0CM,BAMLEMCBPIOAS,BAMLEMRACRPIASIAOAS,BAMLH0A0HYM2,BAMLH0A0HYM2EY,BAMLHE00EHYIOAS,CIVPART,CORESTICKM159SFRBATL,...,PSAVERT,RRPONTSYD,T10YIE,T5YIE,UNRATE,US,Volatility,WALCL,^GSPC,^VIX
model_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-04-19,-0.297,492300.0,0.94,1.95,1.31,3.37,8.1,3.66,62.7,4.509212,...,3.6,397.234,2.41,2.44,3.8,-0.861,-0.179,7405506.0,4967.22998,18.709999
2024-04-20,-0.407,492300.0,0.94,1.95,1.31,3.37,8.1,3.66,62.7,4.509212,...,3.6,397.234,2.41,2.44,3.8,-0.887,-0.301,7405506.0,4967.22998,18.709999
2024-04-21,-0.263,492300.0,0.94,1.95,1.31,3.37,8.1,3.66,62.7,4.509212,...,3.6,397.234,2.41,2.44,3.8,-0.857,-0.137,7405506.0,4967.22998,18.709999
2024-04-22,-0.263,492300.0,0.93,1.95,1.32,3.29,8.01,3.65,62.7,4.509212,...,3.6,409.816,2.41,2.45,3.8,-0.857,-0.137,7405506.0,5010.600098,16.940001
2024-04-23,-0.263,492300.0,0.92,1.94,1.32,3.2,7.88,3.59,62.7,4.509212,...,3.6,435.88,2.41,2.44,3.8,-0.857,-0.137,7405506.0,5070.549805,15.69
2024-04-24,-0.422,492300.0,0.92,1.93,1.31,3.19,7.9,3.55,62.7,4.509212,...,3.6,441.215,2.41,2.42,3.8,-0.959,-0.359,7405506.0,5071.629883,15.97
2024-04-25,-0.52,492300.0,0.92,1.92,1.3,3.24,8.0,3.57,62.7,4.509212,...,3.6,443.928,2.42,2.43,3.8,-1.031,-0.493,7402434.0,5048.419922,15.37
2024-04-26,-0.545,492300.0,0.92,1.92,1.3,3.24,8.0,3.57,62.7,4.509212,...,3.2,464.912,2.43,2.44,3.8,-1.005,-0.489,7402434.0,5099.959961,15.03
2024-04-27,-0.545,492300.0,0.92,1.92,1.3,3.24,8.0,3.57,62.7,4.509212,...,3.2,464.912,2.43,2.44,3.8,-1.005,-0.489,7402434.0,5099.959961,15.03
2024-04-28,-0.545,492300.0,0.92,1.92,1.3,3.24,8.0,3.57,62.7,4.509212,...,3.2,464.912,2.43,2.44,3.8,-1.005,-0.489,7402434.0,5099.959961,15.03
