In [None]:
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

## S&P 500 Earnings Yield

From Compustat indices, cant use own data as its only annual earnings

In [None]:
compustat_index = pd.read_csv('../../data/indices/compustat_na_index_prices.zip')

In [None]:
comp_sp500 = compustat_index[compustat_index["gvkeyx"] == 3]

In [None]:
comp_sp500

In [None]:
comp_sp500 = comp_sp500[["datadate", "prccm", "epsx12", "dvpsxm"]]
comp_sp500.rename(columns={"datadate": "date", "prccm": "price", "epsx12": "eps_12m", "dvpsxm": "dvps"}, inplace=True)
comp_sp500["date"] = pd.to_datetime(comp_sp500["date"])
comp_sp500["date"] = comp_sp500["date"] + pd.DateOffset(days=1)

In [None]:
comp_sp500["dividend_yield"] = comp_sp500["dvps"] / comp_sp500["price"]
comp_sp500["dvps_12m"] = comp_sp500["dvps"].rolling(12).sum()
comp_sp500["dividend_yield_12m"] = comp_sp500["dividend_yield"].rolling(12).sum()
comp_sp500["earnings_yield_12m"] = comp_sp500["eps_12m"] / comp_sp500["price"]

In [None]:
comp_sp500.plot(x="date", y=["eps_12m"])

In [None]:
comp_sp500.plot(x="date", y=["dvps_12m"])

In [None]:
comp_sp500.plot(x="date", y=["earnings_yield_12m", "dividend_yield_12m"], logy=True)

In [None]:
comp_sp500[["date", "earnings_yield_12m"]].to_parquet("../../data/indicators/US/sp500_earnings_yield.parquet", index=False)
comp_sp500[["date", "dividend_yield_12m"]].to_parquet("../../data/indicators/US/sp500_dividend_yield.parquet", index=False)
comp_sp500[["date", "eps_12m"]].to_parquet("../../data/indicators/US/sp500_eps.parquet", index=False)
comp_sp500[["date", "dvps_12m"]].to_parquet("../../data/indicators/US/sp500_dvps.parquet", index=False)

## Unemployment

https://fred.stlouisfed.org/series/UNRATE
https://www.bls.gov/bls/archived_sched.htm
Monthly + 15 days ish

In [None]:
us_unemployment = pd.read_csv("../../data/indicators/raw/fred_unemployment_us.csv")
us_unemployment.rename(columns={"DATE" : "date", "UNRATE" : "unemployment"}, inplace=True)
us_unemployment["date"] = pd.to_datetime(us_unemployment["date"])

In [None]:
us_unemployment

In [None]:
us_unemployment.to_parquet("../../data/indicators/US/us_unemployment.parquet", index=False)

# Moodys Aaa-Baa credit spread

https://fred.stlouisfed.org/series/AAA
https://fred.stlouisfed.org/series/Baa
Daily

In [None]:
moodys_Aaa = pd.read_csv("../../data/indicators/raw/fred_moodys_Aaa.csv")
moodys_Aaa.rename(columns={"DATE" : "date", "AAA" : "moodys_Aaa"}, inplace=True)
moodys_Aaa["date"] = pd.to_datetime(moodys_Aaa["date"])
moodys_Baa = pd.read_csv("../../data/indicators/raw/fred_moodys_Baa.csv")
moodys_Baa.rename(columns={"DATE" : "date", "BAA" : "moodys_Baa"}, inplace=True)
moodys_Baa["date"] = pd.to_datetime(moodys_Baa["date"])


In [None]:
moodys_credit = pd.merge(moodys_Aaa, moodys_Baa, on="date", how="outer")

In [None]:
moodys_credit["spread"] = moodys_credit["moodys_Baa"] - moodys_credit["moodys_Aaa"]

In [None]:
moodys_credit

In [None]:
moodys_credit.plot(x="date", y="spread", title="Moody's Baa - Aaa spread")

In [None]:
moodys_credit.to_parquet("../../data/indicators/US/moodys_credit.parquet", index=False)

# ISM Manufacturers index

EIKON
First business day of each month
Does not need to be changed

In [None]:
ISM_index = pd.read_csv("../../data/indicators/raw/ISMSURVEY.csv", delimiter=";", decimal=",")

In [None]:
ISM_index.rename(columns={"Name" : "date", "US ISM MANUFACTURERS SURVEY: PRODUCTION INDEX SADJ" : "ISM_prod_index", 
                          "US ISM MANUFACTURERS SURVEY: NEW ORDERS INDEX SADJ" : "ISM_new_orders_index",
                          "US ISM MANUFACTURERS SURVEY: EMPLOYMENT INDEX SADJ" : "ISM_emp_index"}, inplace=True)

In [None]:
ISM_index["date"] = pd.to_datetime(ISM_index["date"], format="%d.%m.%Y")
ISM_index = ISM_index[["date", "ISM_prod_index", "ISM_new_orders_index", "ISM_emp_index"]]

In [None]:
ISM_index

In [None]:
ISM_index = ISM_index[ISM_index["date"] >= "1947-01-01"]

In [None]:
ISM_index.to_parquet("../../data/indicators/US/ISM_index.parquet", index=False)

# Initial unemployment claims (FRED)

https://fred.stlouisfed.org/series/ICSA
Ca. en uke

In [None]:
initial_claims = pd.read_csv("../../data/indicators/raw/ICSA.csv")

In [None]:
initial_claims.rename(columns={"DATE" : "date", "ICSA" : "initial_claims"}, inplace=True)

In [None]:
initial_claims["date"] = pd.to_datetime(initial_claims["date"])

In [None]:
initial_claims.to_parquet("../../data/indicators/US/initial_claims.parquet", index=False)

# Yields (FED)

https://www.federalreserve.gov/releases/h15/
Daily

In [None]:
fed_interest_rates = pd.read_csv('../../data/indicators/raw/FRB_H15.csv', sep=',',)

In [None]:
fed_interest_rates_dict = {
    "Time Period" : "date",
    "RIFLGFCM01_N.B" : "rate_1_month",
    "RIFLGFCM03_N.B" : "rate_3_month",
    "RIFLGFCM06_N.B" : "rate_6_month",
    "RIFLGFCY01_N.B" : "rate_1_year",
    "RIFLGFCY02_N.B" : "rate_2_year",
    "RIFLGFCY03_N.B" : "rate_3_year",
    "RIFLGFCY05_N.B" : "rate_5_year",
    "RIFLGFCY07_N.B" : "rate_7_year",
    "RIFLGFCY10_N.B" : "rate_10_year",
    "RIFLGFCY20_N.B" : "rate_20_year",
    "RIFLGFCY30_N.B" : "rate_30_year"
}

In [None]:
fed_interest_rates.columns = fed_interest_rates.iloc[4]
fed_interest_rates = fed_interest_rates.iloc[5:]
fed_interest_rates = fed_interest_rates.rename(columns=fed_interest_rates_dict)

In [None]:
fed_interest_rates.to_parquet("../../data/indicators/US/fed_yields.parquet", index=False)

# Federal funds (FED)

https://www.federalreserve.gov/datadownload/Download.aspx?rel=H15&series=c5025f4bbbed155a6f17c587772ed69e&filetype=csv&label=include&layout=seriescolumn&from=01/01/1919&to=04/16/2024

Changes irregularly

In [None]:
fed_FF = pd.read_csv('../../data/indicators/raw/FRB_H15_FF.csv', sep=',')


In [None]:
fed_FF.columns = fed_FF.iloc[4]
fed_FF = fed_FF.iloc[5:]
fed_FF = fed_FF.rename(columns={"Time Period" : "date", "RIFSPFF_N.D" : "rate_fed_funds"})

In [None]:
fed_FF

In [None]:
fed_FF.to_parquet("../../data/indicators/US/fed_funds_rate.parquet", index=False)

# REAL GDP and GNP (FRED)

https://fred.stlouisfed.org/series/GDPC1
https://fred.stlouisfed.org/series/GNPC96

Data is at the start of quarter

Assume 2 months after quarter end

https://www.bea.gov/news/archive?field_related_product_target_id=451&created_1=29&title=


In [None]:
GDP = pd.read_csv('../../data/indicators/raw/REAL_GDP_B_FRED.csv', sep=',')

In [None]:
GDP.rename(columns={"DATE" : "date", "GDPC1" : "real_gdp"}, inplace=True)
GDP["date"] = pd.to_datetime(GDP["date"])

In [None]:
GDP.to_parquet("../../data/indicators/US/real_gdp.parquet", index=False)

In [None]:
GNP = pd.read_csv('../../data/indicators/raw/REAL_GNP_B_FRED.csv', sep=',')

In [None]:
GNP.rename(columns={"DATE" : "date", "GNPC96" : "real_gnp"}, inplace=True)
GNP["date"] = pd.to_datetime(GNP["date"])

In [None]:
GNP.to_parquet("../../data/indicators/US/real_gnp.parquet", index=False)

# Money Supply (FED)

https://www.federalreserve.gov/datadownload/Download.aspx?rel=H6&series=c15a520be72b938dd7e44d42e744319a&filetype=csv&label=include&layout=seriescolumn&from=01/01/1959&to=04/30/2024

Monthly, fourth tuesday every month. Add one month

In [None]:
M1M2 = pd.read_csv('../../data/indicators/raw/FRB_M1M2.csv', sep=',')

In [None]:
M1M2.columns = M1M2.iloc[4]
M1M2 = M1M2.iloc[5:]
M1M2 = M1M2.rename(columns={"Time Period" : "date", "M1.M" : "M1", "M2.M" : "M2"})
M1M2["date"] = pd.to_datetime(M1M2["date"])
M1M2["M1"] = pd.to_numeric(M1M2["M1"])
M1M2["M2"] = pd.to_numeric(M1M2["M2"])

In [None]:
M1M2.to_parquet("../../data/indicators/US/M1M2.parquet", index=False)

# Consumer Price index, Inflation (FRED)

https://fred.stlouisfed.org/series/CPIAUCSL

https://www.bls.gov/bls/archived_sched.htm

Around 1 month after month end

In [None]:
CPI = pd.read_csv('../../data/indicators/raw/CPIAUCSL.csv', sep=',')

In [None]:
CPI.rename(columns={"DATE" : "date", "CPIAUCSL" : "cpi"}, inplace=True)
CPI["date"] = pd.to_datetime(CPI["date"])


In [None]:
CPI.to_parquet("../../data/indicators/US/cpi.parquet", index=False)

# Consumption (FRED)

https://www.bea.gov/news/archive?field_related_product_target_id=716&created_1=29&title=

1 month + some days after month end

In [None]:
PCE = pd.read_csv('../../data/indicators/raw/PCE.csv', sep=',')

In [None]:
PCE.rename(columns={"DATE" : "date", "PCE" : "pce"}, inplace=True)
PCE["date"] = pd.to_datetime(PCE["date"])

In [None]:
PCE.to_parquet("../../data/indicators/US/pce.parquet", index=False)

# Recessions NBER (FRED) https://fred.stlouisfed.org/series/USRECDM

In [None]:
nber = pd.read_csv("../../data/indicators/raw/USRECDM.csv")

In [None]:
nber.rename(columns={"DATE" : "date", "USRECDM" : "recession"}, inplace=True)

In [None]:
nber.to_parquet("../../data/indicators/US/nber_recession.parquet", index=False)

In [None]:
nber_recessions_dates = nber[nber['recession'] == 1]["date"]
nber_expansion_dates = nber[nber['recession'] == 0]["date"]
pd.DataFrame(nber_recessions_dates).to_csv('../../time_periods/model_train_ready/nber_recession_dates.csv', index=False)
pd.DataFrame(nber_expansion_dates).to_csv('../..//time_periods/model_train_ready/nber_expansion_dates.csv', index=False)

# VIX index (WRDS CBOE)

In [None]:
vix_data = pd.read_csv('../../data/indices/VIX.zip', engine='pyarrow')
vix_data.rename(columns={'Date': 'date', 'vix': 'vix_SP500_close', 'vxo' : 'vix_SP100_close', 'vxn': 'vix_NASDAQ_close', 'vxd': 'vix_DJIA_close'}, inplace=True)
vix_data["date"] = pd.to_datetime(vix_data['date'])

In [None]:
vix_data.drop_duplicates(subset=['date'], keep='first', inplace=True)

In [None]:
vix_data = vix_data[['date', 'vix_SP500_close', 'vix_SP100_close']]

In [None]:
vix_data.to_csv('../../data/indices/VIX_daily.csv', index=False)
vix_data.to_parquet('../../data/indicators/US/VIX_daily.parquet', index=False)