In [1]:
# %pip install --upgrade xlrd
# %pip install pyarrow
# %pip install openpyxl
# %pip install yahoo_fin
# %pip install alpha_vantage
# %pip install yfinance

import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime, date, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from functools import reduce

# import yfinance as yf
# from yahoo_fin.stock_info import get_data
# from alpha_vantage.timeseries import TimeSeries

In [2]:
#pd.set_option("display.max_rows", None, "display.max_columns", None)
now = datetime.now()

In [3]:
# https://fred.stlouisfed.org/series/T10Y2Y/

df = pd.read_excel('https://fred.stlouisfed.org/graph/fredgraph.xls?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1168&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=T10Y2Y&scale=left&cosd=1976-06-01&coed=2022-07-06&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Daily&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2022-07-07&revision_date=2022-07-07&nd=1976-06-01',
                    header=10, 
                    names=['date','10yr-2yr_bonds'])

df = df.sort_values(by="date")
df = df.reset_index(drop=True)
df.to_feather('Data/T10Y2Y.ftr')

In [4]:
# https://fred.stlouisfed.org/series/FEDFUNDS

df = pd.read_excel('https://fred.stlouisfed.org/graph/fredgraph.xls?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1168&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=FEDFUNDS&scale=left&cosd=1954-07-01&coed=2022-06-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2022-07-06&revision_date=2022-07-06&nd=1954-07-01',
                    header=10, 
                    names=['date','FEDrate'])

df = df.sort_values(by="date")
df = df.reset_index(drop=True)
df.to_feather('Data/FEDFUNDS.ftr')

In [5]:
# Historical SPX data stored in Excel file as backup

df = pd.read_excel('Data/SPX.xlsx')
df['Date'] = df['Date'].apply(parse)
df.columns = [col.lower().replace('*','') for col in df.columns]
df['high-low'] = df['high']-df['low']
df = df[['date', 'close', 'high-low']].copy()

df = df.sort_values(by="date")
df = df.reset_index(drop=True)
df.to_feather('Data/SPX.ftr')

In [6]:
# https://www.marketwatch.com/investing/index/spx/download-data

df_old = pd.read_feather('Data/SPX.ftr')
last_date = df_old['date'].max() + relativedelta(days=1)

df = pd.read_csv('https://www.marketwatch.com/investing/index/spx/downloaddatapartial?startdate={}%2000:00:00&enddate={}%2023:59:59&frequency=p1d&csvdownload=true'
                        .format(last_date.strftime('%m/%d/%Y'), 
                                now.strftime('%m/%d/%Y')))

df.columns = [col.lower() for col in df.columns]
for col in [i for i in df.columns if i != 'date']:
        df[col] = pd.to_numeric(df[col].apply(lambda x: x.replace(',',''))).copy()

df['high-low'] = df['high'] - df['low']
df['date'] = df['date'].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'))
df = df.drop(['open','high','low'], axis=1)

df.columns = df_old.columns
df = df.sort_values(by='date')
df = df.reset_index(drop=True)

df = pd.concat((df_old, df), ignore_index=True)
df.to_feather('Data/SPX.ftr')

In [7]:
# https://download.bls.gov/pub/time.series/cu/cu.txt

CPI_types = ['11.USFoodBeverage', '12.USHousing', '13.USApparel', '14.USTransportation', '15.USMedical', '16.USRecreation', '17.USEducationAndCommunication', '18.USOtherGoodsAndServices', '20.USCommoditiesServicesSpecial']
CPI_dict = {}

for name in CPI_types:
    df = pd.read_csv(f'https://download.bls.gov/pub/time.series/cu/cu.data.{name}', delimiter='\t')
    df = df.iloc[:,:4]
    df.columns = ['series_id','year','month','CPI']
    df['month'] = df['month'].apply(lambda x: int(x[1:]))
    df = df[df['month'] != 13]
    df = df.reset_index(drop=True)

    # Calculating the mean CPI for each of the categories in CPI_types,
    # and filling a dictionary with those series.
    new_CPI = []
    for y in df['year'].unique():
        for m in df['month'].unique():
            year = df[df['year']==y]
            month = year[year['month']==m]
            thingy = (y, m, month['CPI'].mean(skipna=True))
            new_CPI.append(thingy)
    new_name = name.split('.')[1]
    CPI_dict[f'{name}'] = pd.DataFrame(new_CPI, columns=['year', 'month', f'CPI_{new_name}'])


df = CPI_dict[list(CPI_dict.keys())[0]][['year', 'month']]
for key in list(CPI_dict.keys()):
    df[key] = CPI_dict[key].iloc[:,2]

df['day'] = 1
date_creation = df[['year', 'month', 'day']].astype(str).copy()
date_creation['date'] = date_creation['year']+'-'+date_creation['month']+'-'+date_creation['day']
df['date'] = date_creation['date'].apply(parse)
df = df.drop(['year','month','day'], axis=1)

df = df.sort_values(by='date')
df = df.reset_index(drop=True)
df.to_feather('Data/CPI.ftr')

In [8]:
file_list = [file.replace('.ftr','') for file in os.listdir('Data') if '.ftr' in file and file != 'cleaned_dataset.ftr']
joining_dict = {filename : pd.read_feather(f'Data/{filename}.ftr') for filename in file_list}
start_date = max([df['date'].min() for df in joining_dict.values()])
joining_dict = {key : df[df['date'] >= start_date] for key, df in zip(joining_dict.keys(), joining_dict.values())}

joined = reduce(lambda left,right: pd.merge_asof(left, right, on='date'), joining_dict.values(), joining_dict.pop('SPX'))
joined = joined[joined['date'] <= now].reset_index(drop=True)

In [9]:
joined = pd.merge_asof(joined, pd.DataFrame({"date":joined.date, "SPX_close-7":joined['close'].rolling(window=7).mean()}).reset_index(drop=True), on='date')
joined = pd.merge_asof(joined, pd.DataFrame({"date":joined.date, "SPX_close-15":joined['close'].rolling(window=15).mean()}).reset_index(drop=True), on='date')
joined = joined.rename(columns={'close':'SPX_close'})

joined = joined.fillna(method='ffill')
joined = joined.fillna(method='bfill')

FEDrate_delta = [] # Creating a column to signal when (and how) the Fed changes rate
percent_change = 0.03
SPX_drop = [] # Creating a column to signal when, in the course of the next week, the SPX drops by more than percent_change% from current level
SPX_bump = [] # Creating a column to signal when, in the course of the next week, the SPX increases by more than percent_change% from current level


for i in range(len(joined) - 1):
    FEDrate_delta.append(joined['FEDrate'][i+1] - joined['FEDrate'][i])   
    drop = False
    bump = False
    for j in range(7):
        if i+j+1 < len(joined):
            if not drop:
                drop = (joined['SPX_close-7'][i] - joined['SPX_close-7'][i+j+1]) > percent_change*joined['SPX_close-7'][i]
            if not bump:
                bump = (joined['SPX_close-7'][i+j+1] - joined['SPX_close-7'][i]) > percent_change*joined['SPX_close-7'][i]
    SPX_drop.append(int(drop))
    SPX_bump.append(int(bump))
FEDrate_delta.append(0)
SPX_drop.append(0)
SPX_bump.append(0)
joined['FEDrate_delta'] = FEDrate_delta
joined['SPX_drop'] = SPX_drop
joined['SPX_bump'] = SPX_bump

joined.to_feather('Data/cleaned_dataset.ftr')

In [10]:
joined

Unnamed: 0,date,SPX_close,high-low,11.USFoodBeverage,12.USHousing,13.USApparel,14.USTransportation,15.USMedical,16.USRecreation,17.USEducationAndCommunication,18.USOtherGoodsAndServices,20.USCommoditiesServicesSpecial,FEDrate,10yr-2yr_bonds,SPX_close-7,SPX_close-15,FEDrate_delta,SPX_drop,SPX_bump
0,1976-06-01,99.85,1.38,63.337333,50.389286,45.416667,30.112500,25.687500,120.066176,168.391429,59.100000,49.880435,5.48,0.68,99.360000,100.871333,0.0,0,0
1,1976-06-02,100.22,1.43,63.337333,50.389286,45.416667,30.112500,25.687500,120.066176,168.391429,59.100000,49.880435,5.48,0.71,99.360000,100.871333,0.0,0,0
2,1976-06-03,100.13,1.42,63.337333,50.389286,45.416667,30.112500,25.687500,120.066176,168.391429,59.100000,49.880435,5.48,0.70,99.360000,100.871333,0.0,0,0
3,1976-06-04,99.15,1.48,63.337333,50.389286,45.416667,30.112500,25.687500,120.066176,168.391429,59.100000,49.880435,5.48,0.77,99.360000,100.871333,0.0,0,0
4,1976-06-07,98.63,1.42,63.337333,50.389286,45.416667,30.112500,25.687500,120.066176,168.391429,59.100000,49.880435,5.48,0.79,99.360000,100.871333,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11620,2022-06-29,3818.83,37.48,254.565194,229.346117,116.256000,135.168675,222.422222,126.300000,159.750000,330.528395,157.150862,1.21,0.04,3824.662857,3828.253333,0.0,0,0
11621,2022-06-30,3785.38,80.32,254.565194,229.346117,116.256000,135.168675,222.422222,126.300000,159.750000,330.528395,157.150862,1.21,0.06,3827.604286,3806.227333,0.0,0,0
11622,2022-07-01,3825.33,77.72,254.565194,229.346117,114.302000,135.478313,223.722222,126.300000,160.271429,330.528395,157.295690,1.21,0.04,3836.952857,3793.394667,0.0,0,0
11623,2022-07-05,3831.39,90.13,254.565194,229.346117,114.302000,135.478313,223.722222,126.300000,160.271429,330.528395,157.295690,1.21,0.00,3842.047143,3788.763333,0.0,0,0
