# Linear model for prediction of equity risk premium

This notebook follows the implementation of Rapach (2010) which contained in jupyter notebook [MODEL_forecast_combining.ipynb](./MODEL_forecast_combining.ipynb). 

In this notebook, we predict the equity risk premium of `IVV` and `IEF` rather than the `SPY` in (Rapach, 2010).

We average the prediction performance of each equity to get the final prediction performance.

Table of contents:
* [Linear prediction using individual macroeconomic variables](##linear-prediction-using-individual-macroeconomic-variables)
* [Combined linear prediction](##combined-linear-prediction)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../module')

from model import get_ts_predictions
from model import get_combined_prediction
from model import get_benchmark_of_equity_premium_prediction
from analysis import get_period_return
from data_handler import get_econ_predictors
from data_handler import get_monthly_date_format
from data_handler import get_quarterly_date_format
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit

%matplotlib inline

load the econ data and the equity data

In [2]:
START_DATE = '2002-06'
END_DATE = '2019-12'

econ_predictors = get_econ_predictors(START_DATE=START_DATE, END_DATE=END_DATE, data_freq='monthly')
equity_premium = econ_predictors.pop('Equity Premium')

In [3]:
IVV_IEF_daily_ret = pd.read_csv('../../data/IVV_IEF_daily_ret.csv', index_col=0, parse_dates=True)
IVV_IEF_monthly_ret = IVV_IEF_daily_ret.groupby(pd.Grouper(freq='M')).apply(get_period_return)
IVV_IEF_monthly_ret.index = [get_monthly_date_format(x) for x in IVV_IEF_monthly_ret.index]
equity_return_ivv = IVV_IEF_monthly_ret['IVV']
equity_return_ief = IVV_IEF_monthly_ret['IEF']

In [4]:
econ_predictors = econ_predictors[:-1] # drop the last element, because there's no OOS prediction true value for the last date
equity_return_ief = equity_return_ief[1:] # shift the label 1 day backwards to make it prediction problem
equity_return_ivv = equity_return_ivv[1:]

In [5]:
econ_predictors

Unnamed: 0,Dividend Price Ratio,Dividend Yield,Earnings Price Ratio,Earnings Payout Ratio,Stock Variance,Book To Market,Net Equity Expansion,Treasury Bill,Long Term Yield,Long Term Return,Term Spread,Default Yield Spread,Default Return Spread,Inflation
2002-07,-4.045200,-4.127490,-3.488749,-0.556451,0.015343,0.282032,0.028441,0.0168,0.0544,0.0303,0.0376,0.0137,-0.0209,0.001112
2002-08,-4.055095,-4.050226,-3.454868,-0.600227,0.009409,0.284412,0.024748,0.0162,0.0510,0.0464,0.0348,0.0121,-0.0012,0.003331
2002-09,-3.943597,-4.060146,-3.301014,-0.642583,0.007366,0.324555,0.029299,0.0163,0.0480,0.0417,0.0317,0.0125,-0.0087,0.001660
2002-10,-4.020712,-3.937810,-3.411480,-0.609232,0.011590,0.293437,0.025671,0.0158,0.0508,-0.0294,0.0350,0.0141,0.0054,0.001657
2002-11,-4.070465,-4.014964,-3.495322,-0.575143,0.004449,0.276975,0.022908,0.0123,0.0521,-0.0122,0.0398,0.0131,0.0252,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-07,-3.966309,-3.953266,-3.098391,-0.867918,0.000594,0.244274,-0.012604,0.0210,0.0206,0.0024,-0.0004,0.0099,0.0060,0.001671
2019-08,-3.941330,-3.959587,-3.086025,-0.855304,0.004318,0.248539,-0.010142,0.0195,0.0163,0.0797,-0.0032,0.0089,-0.0059,-0.000051
2019-09,-3.951689,-3.934654,-3.108987,-0.842702,0.000605,0.243797,-0.010838,0.0189,0.0170,-0.0192,-0.0019,0.0088,0.0002,0.000783
2019-10,-3.965984,-3.945758,-3.112869,-0.853115,0.001510,0.242630,-0.013157,0.0165,0.0171,-0.0052,0.0006,0.0091,0.0058,0.002286


In [6]:
equity_return_ivv

2002-08    0.004708
2002-09   -0.103629
2002-10    0.083160
2002-11    0.057272
2002-12   -0.053539
             ...   
2019-08   -0.016576
2019-09    0.019498
2019-10    0.021607
2019-11    0.036364
2019-12    0.029248
Freq: M, Name: IVV, Length: 209, dtype: float64

## Linear prediction using individual macroeconomic variables

In [9]:
rolling_window_size = 5 * 12 # number of periods (months)

In [19]:
prediction_dict_ivv = {predictor: get_ts_predictions(X=econ_predictors[predictor].values, 
                                                 y=equity_return_ivv.values, 
                                                 X_test_size=len(equity_return_ivv) - rolling_window_size)
                   for predictor in econ_predictors.columns}
prediction_dict_ief = {predictor: get_ts_predictions(X=econ_predictors[predictor].values, 
                                                 y=equity_return_ief.values, 
                                                 X_test_size=len(equity_return_ief) - rolling_window_size)
                   for predictor in econ_predictors.columns}

In [20]:
prediction_df_ivv = pd.DataFrame(prediction_dict_ivv)
prediction_df_ivv.index = equity_return_ivv.index[rolling_window_size:]
prediction_df_ief = pd.DataFrame(prediction_dict_ief)
prediction_df_ief.index = equity_return_ief.index[rolling_window_size:]
prediction_df_ivv

Unnamed: 0,Dividend Price Ratio,Dividend Yield,Earnings Price Ratio,Earnings Payout Ratio,Stock Variance,Book To Market,Net Equity Expansion,Treasury Bill,Long Term Yield,Long Term Return,Term Spread,Default Yield Spread,Default Return Spread,Inflation
2007-08,0.017176,0.011226,0.011464,0.009829,0.009917,0.008231,0.010987,0.008866,0.007887,0.009550,0.010216,0.009558,0.013247,0.012360
2007-09,0.016656,0.017295,0.011157,0.009885,0.010841,0.008295,0.010944,0.009286,0.010865,0.009714,0.010214,0.008937,0.010319,0.013885
2007-10,0.013483,0.017686,0.011197,0.010431,0.010240,0.009366,0.012566,0.010246,0.010946,0.010331,0.010950,0.009451,0.008575,0.009891
2007-11,0.012653,0.013728,0.010502,0.010385,0.010205,0.009597,0.013245,0.010338,0.012880,0.010393,0.011080,0.009248,0.011222,0.010621
2007-12,0.019863,0.011919,0.009563,0.009459,0.011611,0.007664,0.009550,0.009388,0.018745,0.009196,0.009749,0.009913,0.011367,0.005776
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08,0.007830,0.007770,0.008316,0.008244,0.011424,0.005692,0.006460,0.007229,0.015102,0.007876,0.010610,0.008761,0.009280,0.008257
2019-09,0.008090,0.007503,0.008171,0.008154,0.006257,0.005630,0.006999,0.007279,0.016226,0.017424,0.010203,0.009216,0.007054,0.007080
2019-10,0.007986,0.008135,0.008269,0.008241,0.011307,0.005558,0.006875,0.007429,0.016030,0.005046,0.010340,0.009333,0.008186,0.007659
2019-11,0.007833,0.007946,0.008341,0.008279,0.010147,0.005698,0.006328,0.007821,0.016112,0.006908,0.010384,0.009225,0.009228,0.008634


## Combined linear prediction

In [21]:
combined_prediction_ivv = get_combined_prediction(true_values=equity_return_ivv[prediction_df_ivv.index].values.reshape(-1,1), 
                                            prediction=prediction_df_ivv.values,
                                            prediction_index=prediction_df_ivv.index,
                                            holdout_size=5 * 12)
combined_prediction_ief = get_combined_prediction(true_values=equity_return_ief[prediction_df_ief.index].values.reshape(-1,1), 
                                            prediction=prediction_df_ief.values,
                                            prediction_index=prediction_df_ief.index,
                                            holdout_size=5 * 12)
combined_prediction_ivv

Unnamed: 0,Mean,Median,Trimmed mean,DMSPE theta 1,DMSPE theta 0.9
2012-08,0.007271,0.006628,0.006981,0.007224,0.007337
2012-09,0.007141,0.006466,0.006776,0.007122,0.007193
2012-10,0.007318,0.006775,0.006918,0.007294,0.007313
2012-11,0.007181,0.006436,0.006934,0.007140,0.007202
2012-12,0.006236,0.006561,0.005950,0.006208,0.006219
...,...,...,...,...,...
2019-08,0.008775,0.008251,0.008505,0.008769,0.008763
2019-09,0.008949,0.007796,0.008519,0.008975,0.009001
2019-10,0.008600,0.008160,0.008276,0.008587,0.008591
2019-11,0.008777,0.008310,0.008423,0.008773,0.008798


## Benchmark of linear prediction
combine historical average and true value to the prediction data frame

In [22]:
benchmark_df_ivv = get_benchmark_of_equity_premium_prediction(equity_premium=equity_return_ivv.values,
                                                              prediction_size=len(prediction_df_ivv),
                                                              holdout_size=5 * 12,
                                                              prediction_index=prediction_df_ivv.index)
benchmark_df_ief = get_benchmark_of_equity_premium_prediction(equity_premium=equity_return_ief.values,
                                                              prediction_size=len(prediction_df_ief),
                                                              holdout_size=5 * 12,
                                                              prediction_index=prediction_df_ief.index)
benchmark_df_ivv

Unnamed: 0,Historical Average,Equity Premium
2012-08,0.006093,0.024774
2012-09,0.006247,0.023375
2012-10,0.006387,-0.020222
2012-11,0.006171,0.008694
2012-12,0.006191,0.009487
...,...,...
2019-08,0.008300,-0.016576
2019-09,0.008179,0.019498
2019-10,0.008234,0.021607
2019-11,0.008298,0.036364


## Results

In [26]:
prediction_total_ivv = pd.concat([prediction_df_ivv, 
                              combined_prediction_ivv, 
                              benchmark_df_ivv], axis=1).dropna()
prediction_total_ief = pd.concat([prediction_df_ief, 
                              combined_prediction_ief, 
                              benchmark_df_ief], axis=1).dropna()

In [30]:
prediction_total_ivv

Unnamed: 0,Dividend Price Ratio,Dividend Yield,Earnings Price Ratio,Earnings Payout Ratio,Stock Variance,Book To Market,Net Equity Expansion,Treasury Bill,Long Term Yield,Long Term Return,...,Default Yield Spread,Default Return Spread,Inflation,Mean,Median,Trimmed mean,DMSPE theta 1,DMSPE theta 0.9,Historical Average,Equity Premium
2012-08,0.006250,0.007420,0.004249,0.005125,0.009501,0.006897,-0.000820,0.007287,0.018832,0.007171,...,0.004741,0.015874,0.002901,0.007271,0.006628,0.006981,0.007224,0.007337,0.006093,0.024774
2012-09,0.006421,0.007635,0.004632,0.005387,0.010810,0.007146,-0.000250,0.007557,0.018904,0.005297,...,0.005134,0.005569,0.009216,0.007141,0.006466,0.006776,0.007122,0.007193,0.006247,0.023375
2012-10,0.006557,0.007717,0.004992,0.005624,0.010483,0.007205,0.000933,0.007789,0.018507,0.004976,...,0.005656,0.006933,0.008463,0.007318,0.006775,0.006918,0.007294,0.007313,0.006387,-0.020222
2012-11,0.006401,0.007350,0.004610,0.005440,0.010360,0.007013,0.001303,0.007380,0.016030,0.005575,...,0.006471,0.012099,0.004147,0.007181,0.006436,0.006934,0.007140,0.007202,0.006171,0.008694
2012-12,0.006462,0.007950,0.004671,0.005528,0.008970,0.007072,0.002713,0.007407,0.016101,0.006660,...,0.006926,-0.000186,0.000619,0.006236,0.006561,0.005950,0.006208,0.006219,0.006191,0.009487
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08,0.007830,0.007770,0.008316,0.008244,0.011424,0.005692,0.006460,0.007229,0.015102,0.007876,...,0.008761,0.009280,0.008257,0.008775,0.008251,0.008505,0.008769,0.008763,0.008300,-0.016576
2019-09,0.008090,0.007503,0.008171,0.008154,0.006257,0.005630,0.006999,0.007279,0.016226,0.017424,...,0.009216,0.007054,0.007080,0.008949,0.007796,0.008519,0.008975,0.009001,0.008179,0.019498
2019-10,0.007986,0.008135,0.008269,0.008241,0.011307,0.005558,0.006875,0.007429,0.016030,0.005046,...,0.009333,0.008186,0.007659,0.008600,0.008160,0.008276,0.008587,0.008591,0.008234,0.021607
2019-11,0.007833,0.007946,0.008341,0.008279,0.010147,0.005698,0.006328,0.007821,0.016112,0.006908,...,0.009225,0.009228,0.008634,0.008777,0.008310,0.008423,0.008773,0.008798,0.008298,0.036364


In [1]:
prediction_total_ief.plot(subplots = True,
                        use_index = True,
                        fontsize = 10,
                        layout = (7,3), 
                        figsize = (40,50))

NameError: name 'prediction_total_ief' is not defined

In [29]:
prediction_total_ivv.to_csv('../../data/linear_prediction_ivv_2012_2019.csv')
prediction_total_ief.to_csv('../../data/linear_prediction_ief_2012_2019.csv')