# Implied Expected Return

In [1]:
from __future__ import annotations
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
from datetime import datetime
import matplotlib.pyplot as plt
import sys
sys.path.append('../module')
import re

import statsmodels.api as sm
from sklearn.covariance import LedoitWolf
from sklearn.model_selection import TimeSeriesSplit

from model import get_shrunk_covariance_matrix
from model import get_implied_expected_return
from analysis import get_return_forecast_performance
from IO_handler import post_dataframe_to_latex_table
from data_handler import get_econ_predictors
from data_handler import get_monthly_date_format, get_quarterly_date_format

Load equity return daily data and market portfolio weight daily time series data

In [2]:
# load data 
START_DATE = '1947-01'
END_DATE = '2021-12'
data_freq = 'quarterly'

date_freq_to_data_func_map = {'monthly': ('../../data/econ_predictors_monthly_2021_Amit_Goyal.csv',
                                        get_monthly_date_format), 
                            'quarterly': ('../../data/econ_predictors_quarterly_2021_Amit_Goyal.csv',
                                        get_quarterly_date_format)}
data_path, date_format_func = date_freq_to_data_func_map[data_freq]

data = pd.read_csv(data_path, index_col=0)
data.index = [date_format_func(str(x), {'format':'%Y%m'}) for x in data.index]
econ_data = data[START_DATE:END_DATE]

equity_price = econ_data['Index'].apply(lambda x: re.sub(r'[^\w\s|.]', '', x))
equity_price = equity_price.astype(float)
equity_return = equity_price.pct_change().dropna()
rf = econ_data['Rfree']

equities_returns_df = pd.DataFrame([equity_return, rf]).T.dropna()
equities_returns_df.columns = ['Stock', 'Bond']
equities_returns_df

Unnamed: 0,Stock,Bond
1947-06,0.002637,0.000950
1947-09,-0.006575,0.000950
1947-12,0.012574,0.002000
1948-03,-0.014379,0.002375
1948-06,0.110080,0.002500
...,...,...
2020-12,0.116881,0.000275
2021-03,0.057725,0.000225
2021-06,0.081706,0.000075
2021-09,0.002336,0.000100


In [3]:
# portfolio weights
W_equal = pd.DataFrame(np.ones_like(equities_returns_df)/2, index=equities_returns_df.index, columns=equities_returns_df.columns)
W_equal

Unnamed: 0,Stock,Bond
1947-06,0.5,0.5
1947-09,0.5,0.5
1947-12,0.5,0.5
1948-03,0.5,0.5
1948-06,0.5,0.5
...,...,...
2020-12,0.5,0.5
2021-03,0.5,0.5
2021-06,0.5,0.5
2021-09,0.5,0.5


Hyperparameters

In [7]:
W_efficient = W_equal.copy()
risk_free_rate = equities_returns_df['Bond'].copy()
SAMPLE_SIZE = len(equities_returns_df)
N = stock_num = len(W_efficient.columns)
l = lagrange_multiplier = risk_free_rate
gamma = risk_averse = 2.4
equity_name_list = W_efficient.columns

In [8]:
DATA_FREQUENCY = 4
INIT_WINDOW_SIZE = 17
SAMPLE_SIZE = equities_returns_df.shape[0]
tscv = TimeSeriesSplit(n_splits = SAMPLE_SIZE - DATA_FREQUENCY * INIT_WINDOW_SIZE,
                       test_size=1)

In [9]:
sigma_t = [get_shrunk_covariance_matrix(equities_returns_df.iloc[train_index]) for i, (train_index, test_index) in enumerate(tscv.split(equities_returns_df))]

Model for implied expected return
\begin{equation}
\mu=l 1+\gamma \sum w
\end{equation}
In terms of two asset, we have
$$
\left[\begin{array}{l}\mu_1 \\ \mu_2\end{array}\right]=L\left[\begin{array}{l}1 \\ 1\end{array}\right]+\gamma\left[\begin{array}{ll}\Sigma_{11} & \Sigma_{12} \\ \Sigma_{21} & \Sigma_{22}\end{array}\right]\left[\begin{array}{l}w_1 \\ w_2\end{array}\right]
$$

In [11]:
pred_size = len(sigma_t)
W_efficient = W_efficient.iloc[-pred_size:]
l = l.iloc[-pred_size:]

In [12]:
mu_implied = \
    l.values.reshape(-1, 1, 1) + \
    gamma * np.array(sigma_t).reshape((-1, N, N)) @ W_efficient.values.reshape(-1, N, 1)

In [13]:
mu_implied = pd.DataFrame(mu_implied.reshape(-1, N), columns=equity_name_list)
mu_implied = mu_implied[:-1] # remove the last forecast
mu_implied.index = l.index[1:]
mu_implied

Unnamed: 0,Stock,Bond
1964-09,0.013403,0.009055
1964-12,0.013187,0.008904
1965-03,0.013247,0.009028
1965-06,0.013966,0.009798
1965-09,0.014131,0.010021
...,...,...
2004-03,0.009172,0.002480
2004-06,0.009085,0.002375
2004-09,0.009156,0.002475
2004-12,0.009952,0.003301


In [96]:
mu_implied = \
    rf.values.reshape(-1, 1, 1) + \
    gamma * np.array(sigma_t).reshape(-1,2,2) @ W_MKTCAP_t.values.reshape(-1, 2, 1)
    
mu_implied = pd.DataFrame(mu_implied.reshape(-1, 2), columns=['IVV', 'IEF'], index=forecast_time_index)
mu_implied = mu_implied[:-1] # remove the last forecast
mu_implied

Unnamed: 0,IVV,IEF
2003-09-30,0.000817,0.000694
2003-10-31,0.000916,0.000798
2003-11-30,0.000816,0.000700
2003-12-31,0.000817,0.000699
2004-01-31,0.000918,0.000796
...,...,...
2019-07-31,0.001953,0.001780
2019-08-31,0.002053,0.001880
2019-09-30,0.001753,0.001580
2019-10-31,0.001959,0.001779


Wrap up into a function

In [14]:
def get_implied_expected_return(risk_free_rate:pd.DataFrame, 
                                equities_returns_df:pd.DataFrame,
                                W_efficient:pd.DataFrame,
                                DATA_FREQUENCY:int = 4,
                                INIT_WINDOW_SIZE:int = 17) -> pd.DataFrame:
    '''
    Returns the expected implied return for a given risk free rate and equity return matrix.

    Parameters
    ----------
    risk_free_rate : pd.DataFrame
        Risk free rate.
    equities_returns_df : pd.DataFrame
        Equity returns.
    W_efficient : pd.DataFrame
        MV-efficient portfolio weights.
    DATA_FREQUENCY : int, optional
        DESCRIPTION. The default is 12.
    INIT_WINDOW_SIZE : int, optional
        DESCRIPTION. The default is 17.

    Returns
    -------
    expected_return : pd.DataFrame

    '''
    # TO DO
    # check return df columns match weights df columns

    # hyper-parameter
    SAMPLE_SIZE = len(equities_returns_df)
    N = stock_num = len(W_efficient.columns)
    l = lagrange_multiplier = risk_free_rate
    gamma = risk_averse = 2.4
    equity_name_list = W_efficient.columns

    SAMPLE_SIZE = equities_returns_df.shape[0]
    tscv = TimeSeriesSplit(n_splits = SAMPLE_SIZE - DATA_FREQUENCY * INIT_WINDOW_SIZE,
                           test_size=1)
    sigma_t = [get_shrunk_covariance_matrix(equities_returns_df.iloc[train_index]) for i, (train_index, test_index) in enumerate(tscv.split(equities_returns_df))]

    pred_size = len(sigma_t)
    W_efficient = W_efficient.iloc[-pred_size:]
    l = l.iloc[-pred_size:]

    mu_implied = \
        l.values.reshape(-1, 1, 1) + \
        gamma * np.array(sigma_t).reshape((-1, N, N)) @ W_efficient.values.reshape(-1, N, 1)
    
    mu_implied = pd.DataFrame(mu_implied.reshape(-1, N), columns=equity_name_list)
    mu_implied = mu_implied[:-1] # remove the last forecast
    mu_implied.index = l.index[1:]
    
    return(mu_implied)
    

In [15]:
mu_implied_equal = get_implied_expected_return(risk_free_rate=equities_returns_df.Bond, equities_returns_df=equities_returns_df, W_efficient=W_equal)
mu_implied_equal.head()

Unnamed: 0,Stock,Bond
1964-09,0.013403,0.009055
1964-12,0.013187,0.008904
1965-03,0.013247,0.009028
1965-06,0.013966,0.009798
1965-09,0.014131,0.010021


In [100]:
# save the expected return forecast
mu_implied_mktcap.to_csv('../../data/implied_expected_return_MKTCAP.csv')
mu_implied_equal.to_csv('../../data/implied_expected_return_equal.csv')
mu_implied_svm.to_csv('../../data/implied_expected_return_svm.csv')
mu_implied_knn.to_csv('../../data/implied_expected_return_knn.csv')

# Implied Expected Return for Rapach Prediction Period

In [4]:
# load data 
START_DATE = '1947-01'
END_DATE = '2021-12'
data_freq = 'quarterly'

date_freq_to_data_func_map = {'monthly': ('../../data/econ_predictors_monthly_2021_Amit_Goyal.csv',
                                        get_monthly_date_format), 
                            'quarterly': ('../../data/econ_predictors_quarterly_2021_Amit_Goyal.csv',
                                        get_quarterly_date_format)}
data_path, date_format_func = date_freq_to_data_func_map[data_freq]

data = pd.read_csv(data_path, index_col=0)
data.index = [date_format_func(str(x), {'format':'%Y%m'}) for x in data.index]
econ_data = data[START_DATE:END_DATE]

equity_price = econ_data['Index'].apply(lambda x: re.sub(r'[^\w\s|.]', '', x))
equity_price = equity_price.astype(float)
equity_return = equity_price.pct_change().dropna()
rf = econ_data['Rfree']

equities_returns_df = pd.DataFrame([equity_return, rf]).T.dropna()
equities_returns_df.columns = ['Stock', 'Bond']
equities_returns_df

Unnamed: 0,Stock,Bond
1947-06,0.002637,0.000950
1947-09,-0.006575,0.000950
1947-12,0.012574,0.002000
1948-03,-0.014379,0.002375
1948-06,0.110080,0.002500
...,...,...
2020-12,0.116881,0.000275
2021-03,0.057725,0.000225
2021-06,0.081706,0.000075
2021-09,0.002336,0.000100


MV efficient portfolio

In [5]:
W_equal = pd.DataFrame(np.ones_like(equities_returns_df)/2, index=equities_returns_df.index, columns=equities_returns_df.columns)
W_equal

Unnamed: 0,Stock,Bond
1947-06,0.5,0.5
1947-09,0.5,0.5
1947-12,0.5,0.5
1948-03,0.5,0.5
1948-06,0.5,0.5
...,...,...
2020-12,0.5,0.5
2021-03,0.5,0.5
2021-06,0.5,0.5
2021-09,0.5,0.5


Calculate implied expected return for each equity within the portoflio.

In [6]:
mu_implied_equal = get_implied_expected_return(risk_free_rate=equities_returns_df.Bond, equities_returns_df=equities_returns_df, W_efficient=W_equal)
mu_implied_equal.head()

Unnamed: 0,Stock,Bond
1964-09,0.013403,0.009055
1964-12,0.013187,0.008904
1965-03,0.013247,0.009028
1965-06,0.013966,0.009798
1965-09,0.014131,0.010021


Calculate the equity premium from the implied expected return.

In [7]:
equity_premium_implied = mu_implied_equal['Stock'] - mu_implied_equal['Bond']
equity_premium_implied.head()

1964-09    0.004348
1964-12    0.004283
1965-03    0.004219
1965-06    0.004168
1965-09    0.004110
Freq: M, dtype: float64

In [None]:
equity_premium_implied.to_csv('../../data/prediction_implied_quarterly_1947_1964_2021.csv')

# Implied Expected Return for Reinforcement Learning Prediction

The RL prediction is gathered from the jupyter notebook [MODEL_RL_prediction.ipynb](MODEL_RL_prediction.ipynb).

In [3]:
# load data 
START_DATE = '1947-01'
END_DATE = '2021-12'
data_freq = 'quarterly'

date_freq_to_data_func_map = {'monthly': ('../../data/econ_predictors_monthly_2021_Amit_Goyal.csv',
                                        get_monthly_date_format), 
                            'quarterly': ('../../data/econ_predictors_quarterly_2021_Amit_Goyal.csv',
                                        get_quarterly_date_format)}
data_path, date_format_func = date_freq_to_data_func_map[data_freq]

data = pd.read_csv(data_path, index_col=0)
data.index = [date_format_func(str(x), {'format':'%Y%m'}) for x in data.index]
econ_data = data[START_DATE:END_DATE]

equity_price = econ_data['Index'].apply(lambda x: re.sub(r'[^\w\s|.]', '', x))
equity_price = equity_price.astype(float)
equity_return = equity_price.pct_change().dropna()
rf = econ_data['Rfree']

equities_returns_df = pd.DataFrame([equity_return, rf]).T.dropna()
equities_returns_df.columns = ['Stock', 'Bond']
equities_returns_df

Unnamed: 0,Stock,Bond
1947-06,0.002637,0.000950
1947-09,-0.006575,0.000950
1947-12,0.012574,0.002000
1948-03,-0.014379,0.002375
1948-06,0.110080,0.002500
...,...,...
2020-12,0.116881,0.000275
2021-03,0.057725,0.000225
2021-06,0.081706,0.000075
2021-09,0.002336,0.000100


RL efficient portfolio

In [12]:
pred_rl = pd.read_csv('../../log/pred_rl_1946-12_2021-12.csv', index_col=0, usecols=[0,1],parse_dates=True, names=['','Stock'], skiprows=[0])
pred_rl.index = pd.PeriodIndex(pred_rl.index, freq='Q')
pred_rl['Bond'] = 1 - pred_rl['Stock']
pred_rl

Unnamed: 0,Stock,Bond
,,
1965Q1,0.188341,0.811659
1965Q2,1.000000,0.000000
1965Q3,0.000000,1.000000
1965Q4,0.000000,1.000000
1966Q1,0.000000,1.000000
...,...,...
2020Q4,1.000000,0.000000
2021Q1,0.702568,0.297432
2021Q2,0.000000,1.000000


implied expected return for each equity within the RL portoflio.

In [26]:
W_efficient = pred_rl
risk_free_rate = equities_returns_df.Bond
DATA_FREQUENCY = 4
INIT_WINDOW_SIZE = 18


SAMPLE_SIZE = len(equities_returns_df)
N = stock_num = len(W_efficient.columns)
l = lagrange_multiplier = risk_free_rate
gamma = risk_averse = 2.4
equity_name_list = W_efficient.columns

In [27]:
SAMPLE_SIZE = equities_returns_df.shape[0]
tscv = TimeSeriesSplit(n_splits = SAMPLE_SIZE - DATA_FREQUENCY * INIT_WINDOW_SIZE,
                        test_size=1)
sigma_t = [get_shrunk_covariance_matrix(equities_returns_df.iloc[train_index]) for i, (train_index, test_index) in enumerate(tscv.split(equities_returns_df))]

In [31]:
for i, (train_index, test_index) in enumerate(tscv.split(equities_returns_df)):
    print(i)
    print(equities_returns_df.index[train_index])

0
PeriodIndex(['1947-06', '1947-09', '1947-12', '1948-03', '1948-06', '1948-09',
             '1948-12', '1949-03', '1949-06', '1949-09', '1949-12', '1950-03',
             '1950-06', '1950-09', '1950-12', '1951-03', '1951-06', '1951-09',
             '1951-12', '1952-03', '1952-06', '1952-09', '1952-12', '1953-03',
             '1953-06', '1953-09', '1953-12', '1954-03', '1954-06', '1954-09',
             '1954-12', '1955-03', '1955-06', '1955-09', '1955-12', '1956-03',
             '1956-06', '1956-09', '1956-12', '1957-03', '1957-06', '1957-09',
             '1957-12', '1958-03', '1958-06', '1958-09', '1958-12', '1959-03',
             '1959-06', '1959-09', '1959-12', '1960-03', '1960-06', '1960-09',
             '1960-12', '1961-03', '1961-06', '1961-09', '1961-12', '1962-03',
             '1962-06', '1962-09', '1962-12', '1963-03', '1963-06', '1963-09',
             '1963-12', '1964-03', '1964-06', '1964-09', '1964-12', '1965-03'],
            dtype='period[M]')
1
PeriodIndex(['19

In [35]:
mu_implied_rl = get_implied_expected_return(risk_free_rate=equities_returns_df.Bond, equities_returns_df=equities_returns_df, W_efficient=pred_rl, INIT_WINDOW_SIZE=18)
mu_implied_rl.head()

Unnamed: 0,Stock,Bond
1965-09,0.018512,0.009749
1965-12,0.009419,0.009953
1966-03,0.009725,0.010248
1966-06,0.010876,0.011395
1966-09,0.011392,0.011904


calculate the equity premium from the implied expected return.

In [37]:
equity_premium_implied = mu_implied_rl['Stock'] - mu_implied_rl['Bond']
equity_premium_implied.head()

1965-09    0.008763
1965-12   -0.000534
1966-03   -0.000523
1966-06   -0.000519
1966-09   -0.000512
Freq: M, dtype: float64

In [None]:
equity_premium_implied.to_csv('../../data/prediction_implied_quarterly_rl_1947_1964_2021.csv')

# Performance analyse

The following measurement is used to evaluate the performance of the implied expected return. It's originated from the paper [Implied Expected Returns and the Choice of a Mean–Variance Efficient Portfolio Proxy](https://jpm.pm-research.com/content/41/4/68). But now we use a different measurement to evaluate the performance.

### Hit ratio (HR)

$$
HR = \frac{N_c}{N}
$$

Where $N_c$ is the number of predictions' sign is aligned with the true returns' sign, and $N$ is the number of predictions.

In [23]:
N_c = (np.sign(IVV_IEF_monthly_ret_oos) == np.sign(mu_implied)).sum().sum()
N = IVV_IEF_monthly_ret_oos.count().sum()
HR = N_c / N
HR_percentage = HR * 100

### Root-Mean-Squared Forecast Error (RMSFE)

$$
RMSFE = \sqrt{\frac{\sum_{t=1}^n\left(Y_t-\hat{Y}_t\right)^2}{n}}
$$

In [24]:
RMSFE = np.sqrt(np.square(mu_implied - IVV_IEF_monthly_ret_oos).sum().mean())
RMSFE_percentage = RMSFE * 100

### Cross-Sectional Standard Deviation

$$
\sigma_i = \frac{\sum_{t=1}^T \sigma_{i,t}}{T}
$$
Where $\sigma_{i,t}$ is cross-sectional standard deviation at time $t$.

In [25]:
sigma_i = mu_implied.std(axis=1).mean()
sigma_i_percentage = sigma_i * 100

### Time Series Volatility
$$
\sigma_t = \frac{\sum_{i=1}^N \sigma_{t,i}}{N}
$$
Where $\sigma_{t,i}$ is the time series volatility of equity $i$.

In [26]:
sigma_t = mu_implied.std(axis=0).mean()
sigma_t_percentage = sigma_t * 100

### The First-order Autocorrelation

$$
\rho_1=\frac{\sum_{t=2}^T\left(r_t-\bar{r}\right)\left(r_{t-1}-\bar{r}\right)}{\sum_{t=1}^T\left(r_t-\bar{r}\right)^2}
$$

In [27]:
#calculate autocorrelations
rho_1_vector = mu_implied.apply(lambda x: sm.tsa.acf(x, nlags=1)[1], axis=0)
rho_1 = rho_1_vector.mean()
rho_1_percentage = rho_1 * 100

### $R^2$

$$
\begin{equation}
R^2 = 1-\frac{\sum_i\left(y_i-f_i\right)^2}{\sum_i\left(y_i-\bar{y}\right)^2}
\end{equation}
$$

In [35]:
ss_res = ((IVV_IEF_monthly_ret_oos - mu_implied_svm) ** 2).values.sum()
ss_tot = ((IVV_IEF_monthly_ret_oos - IVV_IEF_monthly_ret_oos.mean()) ** 2).values.sum()
R_2 = 1 - ss_res / ss_tot
R_2

-1.2315275624154625

### Performance function

In [36]:
def get_return_forecast_performance(y_hat:pd.DataFrame, y:pd.DataFrame, forecast_name:str='forecast performance') -> tuple:
    '''Evaluate the return forecast in terms of following measurement:
    1. Hit ratio (HR)
    2. Root-Mean-Squared Forecast Error (RMSFE)
    3. Cross-Sectional Standard Deviation
    4. Time Series Volatility
    5. The First-order Autocorrelation

    ----------
    Args:
    y_hat: return forecast
    y: the true value

    ----------
    Returns:
    a dataframe of five measurement
    '''

    N_c = (np.sign(y) == np.sign(y_hat)).sum().sum()
    N = y.count().sum()
    HR = N_c / N
    HR_percentage = HR * 100

    RMSFE = np.sqrt(np.square(y_hat - y).sum().mean())
    RMSFE_percentage = RMSFE * 100

    sigma_i = y_hat.std(axis=1).mean()
    sigma_i_percentage = sigma_i * 100

    sigma_t = y_hat.std(axis=0).mean()
    sigma_t_percentage = sigma_t * 100

    rho_1_vector = y_hat.apply(lambda x: sm.tsa.acf(x, nlags=1)[1], axis=0)
    rho_1 = rho_1_vector.mean()
    rho_1_percentage = rho_1 * 100

    ss_res = ((y - y_hat) ** 2).values.sum()
    ss_tot = ((y - y.mean()) ** 2).values.sum()
    R_2 = 1 - ss_res / ss_tot

    performance_df = pd.DataFrame([HR_percentage, RMSFE_percentage, sigma_i_percentage, sigma_t_percentage, rho_1_percentage, R_2], 
                                  index=['HR', ' RMSFE', 'sigma_i', 'sigma_t', 'rho_1', 'R^2'],
                                  columns=[forecast_name])

    return(performance_df)

In [101]:
performance_df_svm = get_return_forecast_performance(y_hat= mu_implied_svm, y = IVV_IEF_monthly_ret_oos, forecast_name='SVM')
performance_df_knn = get_return_forecast_performance(y_hat= mu_implied_knn, y = IVV_IEF_monthly_ret_oos, forecast_name='KNN')
performance_df_equal = get_return_forecast_performance(y_hat= mu_implied_equal, y = IVV_IEF_monthly_ret_oos, forecast_name='Equal Weight')
performance_df_mktcap = get_return_forecast_performance(y_hat= mu_implied_mktcap, y = IVV_IEF_monthly_ret_oos, forecast_name='MKTCAP')
performance_df = pd.concat([performance_df_mktcap, performance_df_equal, performance_df_svm, performance_df_knn], axis=1).T
performance_df

Unnamed: 0,HR,RMSFE,sigma_i,sigma_t,rho_1,R^2
MKTCAP,63.076923,2.942319,0.020376,0.128204,98.674953,-1.225107
Equal Weight,63.076923,2.931166,0.008763,0.129899,98.719797,-1.20827
SVM,63.076923,2.928542,0.011093,0.129917,98.632392,-1.204318
KNN,62.820513,2.937362,0.012887,0.12949,98.590325,-1.217616


In [102]:
post_dataframe_to_latex_table(performance_df, 'implied_expected_return_forecast_performance', kwargs={'header': False})

Save table to:../../table/


  df.to_latex(target_folder_path + table_name + '.tex', float_format = float_format, **kwargs)


In [40]:
performance_df.to_latex('/Users/cheng/Google Drive/PhD/Research/Tactical asset allocation/table/monthly_return_forecasts_performance_12_12.tex', float_format='%.3f')

  performance_df.to_latex('/Users/cheng/Google Drive/PhD/Research/Tactical asset allocation/table/monthly_return_forecasts_performance_12_12.tex', float_format='%.3f')
