In [8]:
import pandas as pd
from datetime import datetime
from dateutil.parser import parse

# !pip install --upgrade xlrd
# !pip install pyarrow
# !pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.0.10-py2.py3-none-any.whl (242 kB)
     ------------------------------------- 242.1/242.1 kB 14.5 MB/s eta 0:00:00
Collecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.10


In [106]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [93]:
# https://fred.stlouisfed.org/series/FEDFUNDS

df = pd.read_excel('Data/FEDFUNDS.xls', header=10, names=['date','FEDrate'])
df = df[df['date'] >= datetime(year=2017, month=1, day=1)]
df = df.reset_index(drop=True)
df['year'] = df['date'].apply(lambda x: x.year)
df['month'] = df['date'].apply(lambda x: x.month)
df.to_feather('Data/FEDFUNDS.ftr')

In [107]:
df.head()

Unnamed: 0,date,FEDrate,year,month
0,2017-01-01,0.65,2017,1
1,2017-02-01,0.66,2017,2
2,2017-03-01,0.79,2017,3
3,2017-04-01,0.9,2017,4
4,2017-05-01,0.91,2017,5


In [79]:
# https://fred.stlouisfed.org/series/EFFRVOL

df = pd.read_excel('Data/EFFRVOL.xls', header=10, names=['date', 'EFFRVol'])
df['year'] = df['date'].apply(lambda x: x.year)
df['month'] = df['date'].apply(lambda x: x.month)
df['day'] = df['date'].apply(lambda x: x.day)
df.to_feather('Data/EFFRVOL.ftr')

In [108]:
df.head()

Unnamed: 0,date,FEDrate,year,month
0,2017-01-01,0.65,2017,1
1,2017-02-01,0.66,2017,2
2,2017-03-01,0.79,2017,3
3,2017-04-01,0.9,2017,4
4,2017-05-01,0.91,2017,5


In [81]:
df = pd.read_excel('Data/SP500.xlsx', header=5)
df = df.iloc[:,0:2].copy()
df.columns = ['date','last_price']
df['year'] = df['date'].apply(lambda x: x.year)
df['month'] = df['date'].apply(lambda x: x.month)
df['day'] = df['date'].apply(lambda x: x.day)
df.to_feather('Data/SP500.ftr')

In [109]:
df.head()

Unnamed: 0,date,FEDrate,year,month
0,2017-01-01,0.65,2017,1
1,2017-02-01,0.66,2017,2
2,2017-03-01,0.79,2017,3
3,2017-04-01,0.9,2017,4
4,2017-05-01,0.91,2017,5


In [157]:
a = pd.read_feather('Data/FEDFUNDS.ftr')
b = pd.read_feather('Data/EFFRVOL.ftr')
c = pd.read_feather('Data/SP500.ftr')

# Merging datasets, filling in values of missing dates using the previous value.
joined = pd.merge_asof(c, a, on='date', by=['year', 'month'])
joined = pd.merge_asof(joined, b, on='date', by=['year', 'month', 'day'])
joined = joined.dropna()
joined = joined.reset_index(drop=True)

# There were some places where EFFRVol = 0, so I filled them with the mean of preceding and following values
for i in range(len(joined)):
    if joined['EFFRVol'][i] == 0:
        joined['EFFRVol'][i] = (joined['EFFRVol'][i-1] + joined['EFFRVol'][i+1]) / 2

# Creating a column to signal when (and how) the Fed changes rate
FEDrate_delta = []
for i in range(len(joined) - 1):
    FEDrate_delta.append(joined['FEDrate'][i+1] - joined['FEDrate'][i])
FEDrate_delta.append(0)
joined['FEDrate_delta'] = FEDrate_delta

joined = joined[['date', 'year', 'month', 'day', 'FEDrate', 'FEDrate_delta', 'EFFRVol', 'last_price']]
joined.to_feather('Data/cleaned_dataset.ftr')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  joined['EFFRVol'][i] = (joined['EFFRVol'][i-1] + joined['EFFRVol'][i+1]) / 2


In [158]:
joined.head()

Unnamed: 0,date,year,month,day,FEDrate,FEDrate_delta,EFFRVol,last_price
0,2017-01-03,2017,1,3,0.65,0.0,80.0,2257.83
1,2017-01-04,2017,1,4,0.65,0.0,79.0,2270.75
2,2017-01-05,2017,1,5,0.65,0.0,77.0,2269.0
3,2017-01-06,2017,1,6,0.65,0.0,79.0,2276.98
4,2017-01-09,2017,1,9,0.65,0.0,74.0,2268.9
