In [66]:
import pandas as pd
from pandas.tseries.offsets import *
import numpy as np

In [67]:
# File position
resource = '../../data/'
results = '../../data/generated/'

In [68]:
# Load spreadsheet from Datastream
xl = pd.ExcelFile(resource +'external/datastream.xlsx')

# Load industrial production
ind_prod = xl.parse('ind_prod', skiprows=2, parse_dates=[0])
ind_prod = ind_prod.set_axis(['date', 'ind_prod'], axis=1)
ind_prod['date'] = ind_prod['date'] + MonthEnd(0)

# Load inflation 
# cpi = xl.parse('cpi', skiprows=2, parse_dates=[0])
# cpi = cpi.set_axis(['date', 'cpi'], axis=1)
# cpi['date'] = cpi['date'] + MonthEnd(0)

# Load gold index
gold = xl.parse('gold', skiprows=2, parse_dates=[0])
gold = gold.set_axis(['date', 'gold'], axis=1)
gold['month'] = gold['date'].dt.to_period('M')
gold = gold.groupby('month').last().reset_index(drop=True)
gold['date'] = gold['date'] + MonthEnd(0)

# Merge
datastream = pd.merge(ind_prod, gold, on='date', how='outer')
# ts = pd.merge(ts, cpi, on='date', how='outer')

In [69]:
# Load spreadsheet from Shiller's website
xl = pd.ExcelFile(resource +'external/ie_data.xls')
ie_m = xl.parse('Data', skiprows=7)
ie_m = ie_m[['Date','P','D','CPI']]
ie_m = ie_m.set_axis(['date','sp_price','sp_div','cpi'], axis=1)
ie_m['date'] =  ie_m['date'].apply(lambda x: f'{x:.2f}')
ie_m['date'] = pd.to_datetime(ie_m['date'], format='%Y.%m')
ie_m['date'] = ie_m['date'] + MonthEnd(0)
ie_m = ie_m.iloc[:-1]

# Merge
ts = pd.merge(ie_m, datastream, on='date', how='outer')

In [70]:
# Load 3-factor data from kenneth frech website
ff_m = pd.read_csv(resource +'external/F-F_Research_Data_Factors.CSV', skiprows=3)
ff_m = ff_m.set_axis(['date','ex_mkt','smb','hml','rf'], axis=1)
ff_m = ff_m.loc[:ff_m[ff_m['date'] == ' Annual Factors: January-December '].index[0]-1]
ff_m['date'] = pd.to_datetime(ff_m['date'], format='%Y%m')
ff_m['date'] = ff_m['date'] + MonthEnd(0)
ff_m[['ex_mkt','smb','hml','rf']] = ff_m[['ex_mkt','smb','hml','rf']].apply(pd.to_numeric)
ff_m[['ex_mkt','smb','hml','rf']] = ff_m[['ex_mkt','smb','hml','rf']].div(100)

# Merge
ts = pd.merge(ts, ff_m, on='date', how='left')

In [71]:
# Load momentum data from kenneth frech website
mom_m = pd.read_csv(resource +'external/F-F_Momentum_Factor.CSV', skiprows=13)
mom_m = mom_m.set_axis(['date','mom'], axis=1)
mom_m = mom_m.loc[:mom_m[mom_m['date'] == 'Annual Factors:'].index[0]-1]
mom_m['date'] = pd.to_datetime(mom_m['date'], format='%Y%m')
mom_m['date'] = mom_m['date'] + MonthEnd(0)
mom_m['mom'] = mom_m['mom'].apply(pd.to_numeric)
mom_m['mom'] = mom_m['mom']/100

# Merge
ts = pd.merge(ts, mom_m, on='date', how='left')

In [72]:
# Load data from fred
fred = pd.read_csv(resource +'pulled/fred.csv', parse_dates=['date'])
fred = fred[['date','DTB3','DGS10','DGS1','AAA','BAA','TWEXBMTH','TWEXBGSMTH']]
fred['TWEXBGSMTH'] = pd.to_numeric(fred['TWEXBGSMTH'], errors='coerce')
fred[['DTB3','DGS10','DGS1','AAA','BAA','TWEXBMTH','TWEXBGSMTH']] = fred[['DTB3','DGS10','DGS1','AAA','BAA','TWEXBMTH','TWEXBGSMTH']].div(100)

# Merge
ts = pd.merge(ts, fred, on='date', how='left')

In [73]:
# Load bond returns data (high yield and government) from bloomberg
xl = pd.ExcelFile(resource +'external/bloomberg.xlsx')

# Load industrial production
bl = xl.parse('data', skiprows=2, parse_dates=['Dates'])
bl = bl.set_axis(['date', 'long_gov_ret', 'medium_gov_ret','high_yd_bd_ret'], axis=1)
bl[['long_gov_ret', 'medium_gov_ret','high_yd_bd_ret']] = bl[['long_gov_ret', 'medium_gov_ret','high_yd_bd_ret']].div(100)
bl['date'] = bl['date'] + MonthEnd(0)

# Merge
ts = pd.merge(ts, bl, on='date', how='left')

In [74]:
# Load dsv (change in the aggregate survival probability) data
xl = pd.ExcelFile(resource +'external/DSV.xls')

# Load industrial production
dsv = xl.parse('Sheet1', header=None)
dsv = dsv.set_axis(['date', 'dsv'], axis=1)
dsv['date'] = pd.to_datetime(dsv['date'], format='%Y%m')
dsv['date'] = dsv['date'] + MonthEnd(0)
dsv['dsv'] = dsv['dsv']/100

# Merge
ts = pd.merge(ts, dsv, on='date', how='left')

In [75]:
ts.to_csv(results + 'time_series.csv', index=False)