# 0. Import Required Packages

In [67]:
import math
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

import warnings
warnings.filterwarnings("ignore")

# 1. Data Wrangling
## 1.1 Predictors

In [68]:
spy = pd.read_csv('./spy.csv', parse_dates=True, index_col=0)
data = pd.read_csv('./research_dataset_2022.csv', parse_dates=True, index_col=0)
data['DEF'] = data['BAA'] - data['AAA']
data['TERM'] = data['US10YR'] - data['US3M']
# Cointegrating Residual of Consumption, Assets, and Wealth
X = data[['Consumption_new', 'Income_new']][1800:]; Y = data['Assets_new'][1800:]
LR = LinearRegression().fit(X, Y)
residual = Y - LR.predict(X)
data['CAY'] = [np.nan] * 1800 + residual.to_list()
data['VRP'] = data['trans_VIX'] - data['garch']
data['PCR'] = data['trans_spy_open'] / data['trans_SPGSCI']
# the current price versus the past 10 month simple moving average.
data['MA'] = data['trans_spy_open'] / data['trans_spy_open'].rolling(217).mean()
var = data[['DividendToPriceRatio', 'trans_PE_RATIO', 'PxToBook', 'CAPE',
            'bbyield', 'DEF', 'TERM', 'CAY', 'SIM', 'VRP', 'IC', 'trans_BDIY',
            'NOS', 'CPI', 'PCR', 'MA', 'SI', 'trans_MVOLE', 'BER', 'NAPMPRIC',
            'CATY', 'IND_PROD']]
var.rename(columns = {'DividendToPriceRatio':'DP', 'trans_PE_RATIO':'PE',
                      'PxToBook':'BM', 'bbyield':'BY', 'trans_BDIY':'BDI',
                      'trans_MVOLE':'MVOLE', 'IND_PROD':'IP'}, inplace = True)

## 1.2 Future Returns

In [69]:
def calculate_returns(ndays):
    spy[str(ndays)+'DaysReturn'] = np.nan
    for i in range(len(spy)):
        try:
            present = spy.iloc[i]['Open']
            future = spy.iloc[i+ndays]['Open']
            spy[str(ndays)+'DaysReturn'].iloc[i] = (future - present) / present
        except:
            pass

In [70]:
# 1-month return
calculate_returns(22)
var['1MReturn'] = spy['22DaysReturn']
# 3-month return
calculate_returns(65)
var['3MReturn'] = spy['65DaysReturn']
# 6-month return
calculate_returns(130)
var['6MReturn'] = spy['130DaysReturn']
# 1-year return
calculate_returns(260)
var['1YReturn'] = spy['22DaysReturn']

## 1.3 Correlation between Predictors and Future Returns

In [71]:
pd.DataFrame({'1MReturn': var[var.columns].corr()['1MReturn'][:22],
              '3MReturn': var[var.columns].corr()['3MReturn'][:22],
              '6MReturn': var[var.columns].corr()['6MReturn'][:22],
              '1YReturn': var[var.columns].corr()['1YReturn'][:22]})

Unnamed: 0,1MReturn,3MReturn,6MReturn,1YReturn
DP,0.078258,0.137155,0.230952,0.078258
PE,-0.065558,-0.102665,-0.086845,-0.065558
BM,-0.040313,-0.053655,-0.089445,-0.040313
CAPE,-0.053041,-0.078239,-0.130288,-0.053041
BY,-0.005242,0.00023,-0.000864,-0.005242
DEF,-0.03385,-0.052801,0.009482,-0.03385
TERM,-0.053682,-0.095321,-0.078274,-0.053682
CAY,-0.015589,-0.041409,-0.087364,-0.015589
SIM,0.07246,0.046447,-0.008314,0.07246
VRP,0.045568,0.115506,0.133785,0.045568


# 2. Modeling

In [80]:
var.dropna(subset=['3MReturn'], inplace=True)
# Replace NaN Values with Zeros
var = var.fillna(0)
X = var[var.columns.drop(['1MReturn', '3MReturn', '6MReturn', '1YReturn'])].to_numpy()
# Select 10 years' data as training set
X_train = X[:2599]; X_rest = X[2600:]
Y = var['3MReturn'].to_numpy()
Y_train = Y[:2599]; Y_rest = X[2600:]

## 2.1 Kitchen Sinking Regression

In [36]:
len(X_rest)
math.ceil((len(X_rest)/22))

112

In [None]:
for i in math.ceil((len(X_rest)/22)):
    