# Implied Expected Return

In [1]:
from __future__ import annotations
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
from datetime import datetime
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.covariance import LedoitWolf
from sklearn.model_selection import TimeSeriesSplit
sys.path.append('../module')
from model import get_shrunk_covariance_matrix
from analysis import get_return_forecast_performance
from IO_handler import post_dataframe_to_latex_table

Load equity return daily data and market portfolio weight daily time series data

In [2]:
IVV_IEF_daily_ret = pd.read_csv('../../data/IVV_IEF_daily_ret.csv', index_col=0, parse_dates=True)
W_MKTCAP_t = pd.read_csv('../../data/mktcap_w_ief_ivv.csv', index_col=0, parse_dates=True)
rf = pd.read_csv('../../data/F-F_Research_Data_Factors_Monthly.csv', usecols=[0,4],index_col=0, parse_dates=True, date_parser = lambda x: datetime.strptime(x, "%Y%m"))
w_svm = pd.read_csv('../../data/weights_svm.csv', index_col=0, parse_dates=True)
w_knn = pd.read_csv('../../data/weights_knn.csv', index_col=0, parse_dates=True)

In [3]:
# select time period for predicting
DATE_START = '2003-08-01'
DATE_END = '2019-11-30'
IVV_IEF_daily_ret = IVV_IEF_daily_ret.loc[DATE_START:DATE_END]
W_MKTCAP_t = W_MKTCAP_t[DATE_START:DATE_END]
rf = rf[DATE_START:DATE_END] / 100 # FF report rf in percentage

In [4]:
# get test data
FORECAST_DATE_START = '2003-09-30'
FORECAST_DATE_END = '2020-01-01'
forecast_time_index = pd.date_range(start=FORECAST_DATE_START, end=FORECAST_DATE_END, freq='M')
IVV_IEF_monthly_ret = IVV_IEF_daily_ret.groupby(pd.Grouper(freq='M')).mean()
IVV_IEF_monthly_ret_oos = IVV_IEF_monthly_ret.loc[forecast_time_index[:-1]] # drop the last date

In [42]:
sigma_t = IVV_IEF_daily_ret.groupby(pd.Grouper(freq='M')).apply(get_shrunk_covariance_matrix)
W_MKTCAP_t = W_MKTCAP_t.groupby(pd.Grouper(freq='M')).mean()
W_equal = pd.DataFrame(np.ones_like(W_MKTCAP_t)/2, index=W_MKTCAP_t.index, columns=W_MKTCAP_t.columns)

In [89]:
sigma_t.shape

(196,)

In [90]:
DATA_FREQUENCY = 252
WINDOW_SIZE = 5
tscv = TimeSeriesSplit(n_splits = 196,
                       test_size=20, 
                       max_train_size= DATA_FREQUENCY * WINDOW_SIZE)

In [95]:
sigma_t = [get_shrunk_covariance_matrix(IVV_IEF_daily_ret.iloc[train_index]) for i, (train_index, test_index) in enumerate(tscv.split(IVV_IEF_daily_ret))]

Model for implied expected return
\begin{equation}
\mu=l 1+\gamma \sum w
\end{equation}
In terms of two asset, we have
$$
\left[\begin{array}{l}\mu_1 \\ \mu_2\end{array}\right]=L\left[\begin{array}{l}1 \\ 1\end{array}\right]+\gamma\left[\begin{array}{ll}\Sigma_{11} & \Sigma_{12} \\ \Sigma_{21} & \Sigma_{22}\end{array}\right]\left[\begin{array}{l}w_1 \\ w_2\end{array}\right]
$$

In [20]:
# hyper-parameter
N = stock_num = 2
l = lagrange_multiplier = 0.001
gamma = risk_averse = 2.4

In [96]:
mu_implied = \
    rf.values.reshape(-1, 1, 1) + \
    gamma * np.array(sigma_t).reshape(-1,2,2) @ W_MKTCAP_t.values.reshape(-1, 2, 1)
    
mu_implied = pd.DataFrame(mu_implied.reshape(-1, 2), columns=['IVV', 'IEF'], index=forecast_time_index)
mu_implied = mu_implied[:-1] # remove the last forecast
mu_implied

Unnamed: 0,IVV,IEF
2003-09-30,0.000817,0.000694
2003-10-31,0.000916,0.000798
2003-11-30,0.000816,0.000700
2003-12-31,0.000817,0.000699
2004-01-31,0.000918,0.000796
...,...,...
2019-07-31,0.001953,0.001780
2019-08-31,0.002053,0.001880
2019-09-30,0.001753,0.001580
2019-10-31,0.001959,0.001779


Wrap up into a function

In [97]:
def get_implied_expected_return(risk_free_rate:pd.DataFrame, 
                                equity_daily_return_df:pd.DataFrame,
                                W_efficient:pd.DataFrame,
                                date_index) -> pd.DataFrame:
    '''
    Returns the expected implied return for a given risk free rate and equity daily return.

    Parameters
    ----------
    risk_free_rate : pd.DataFrame
    equity_daily_return_df : pd.DataFrame
    W_efficient : pd.DataFrame
    date_index : pd.DatetimeIndex
    
    Returns
    -------
    expected_return : pd.DataFrame

    '''
    # TO DO
    # check return df columns match weights df columns

    # hyper-parameter
    N = stock_num = len(W_efficient.columns)
    l = lagrange_multiplier = risk_free_rate
    gamma = risk_averse = 2.4
    equity_name_list = W_efficient.columns

    # sigma_t = equity_daily_return_df.groupby(pd.Grouper(freq='M')).apply(get_shrunk_covariance_matrix)
    DATA_FREQUENCY = 252
    WINDOW_SIZE = 5
    tscv = TimeSeriesSplit(n_splits = 196, test_size=20, max_train_size= DATA_FREQUENCY * WINDOW_SIZE)
    sigma_t = [get_shrunk_covariance_matrix(equity_daily_return_df.iloc[train_index]) for i, (train_index, test_index) in enumerate(tscv.split(equity_daily_return_df))]

    mu_implied = \
        l.values.reshape(-1, 1, 1) + \
        gamma * np.array(sigma_t).reshape((-1, N, N)) @ W_efficient.values.reshape(-1, N, 1)
    
    mu_implied = pd.DataFrame(mu_implied.reshape(-1, N), columns=equity_name_list, index=date_index)
    mu_implied = mu_implied[:-1] # remove the last forecast
    
    return(mu_implied)
    

In [98]:
mu_implied_mktcap = get_implied_expected_return(risk_free_rate=rf, equity_daily_return_df=IVV_IEF_daily_ret, W_efficient=W_MKTCAP_t, date_index=forecast_time_index)
mu_implied_equal = get_implied_expected_return(risk_free_rate=rf, equity_daily_return_df=IVV_IEF_daily_ret, W_efficient=W_equal, date_index=forecast_time_index)
mu_implied_svm = get_implied_expected_return(risk_free_rate=rf, equity_daily_return_df=IVV_IEF_daily_ret, W_efficient=w_svm, date_index=forecast_time_index)
mu_implied_knn = get_implied_expected_return(risk_free_rate=rf, equity_daily_return_df=IVV_IEF_daily_ret, W_efficient=w_knn, date_index=forecast_time_index)

In [100]:
# save the expected return forecast
mu_implied_mktcap.to_csv('../../data/implied_expected_return_MKTCAP.csv')
mu_implied_equal.to_csv('../../data/implied_expected_return_equal.csv')
mu_implied_svm.to_csv('../../data/implied_expected_return_svm.csv')
mu_implied_knn.to_csv('../../data/implied_expected_return_knn.csv')

## Performance analyse

### Hit ratio (HR)

$$
HR = \frac{N_c}{N}
$$

Where $N_c$ is the number of predictions' sign is aligned with the true returns' sign, and $N$ is the number of predictions.

In [23]:
N_c = (np.sign(IVV_IEF_monthly_ret_oos) == np.sign(mu_implied)).sum().sum()
N = IVV_IEF_monthly_ret_oos.count().sum()
HR = N_c / N
HR_percentage = HR * 100

### Root-Mean-Squared Forecast Error (RMSFE)

$$
RMSFE = \sqrt{\frac{\sum_{t=1}^n\left(Y_t-\hat{Y}_t\right)^2}{n}}
$$

In [24]:
RMSFE = np.sqrt(np.square(mu_implied - IVV_IEF_monthly_ret_oos).sum().mean())
RMSFE_percentage = RMSFE * 100

### Cross-Sectional Standard Deviation

$$
\sigma_i = \frac{\sum_{t=1}^T \sigma_{i,t}}{T}
$$
Where $\sigma_{i,t}$ is cross-sectional standard deviation at time $t$.

In [25]:
sigma_i = mu_implied.std(axis=1).mean()
sigma_i_percentage = sigma_i * 100

### Time Series Volatility
$$
\sigma_t = \frac{\sum_{i=1}^N \sigma_{t,i}}{N}
$$
Where $\sigma_{t,i}$ is the time series volatility of equity $i$.

In [26]:
sigma_t = mu_implied.std(axis=0).mean()
sigma_t_percentage = sigma_t * 100

### The First-order Autocorrelation

$$
\rho_1=\frac{\sum_{t=2}^T\left(r_t-\bar{r}\right)\left(r_{t-1}-\bar{r}\right)}{\sum_{t=1}^T\left(r_t-\bar{r}\right)^2}
$$

In [27]:
#calculate autocorrelations
rho_1_vector = mu_implied.apply(lambda x: sm.tsa.acf(x, nlags=1)[1], axis=0)
rho_1 = rho_1_vector.mean()
rho_1_percentage = rho_1 * 100

### $R^2$

$$
\begin{equation}
R^2 = 1-\frac{\sum_i\left(y_i-f_i\right)^2}{\sum_i\left(y_i-\bar{y}\right)^2}
\end{equation}
$$

In [35]:
ss_res = ((IVV_IEF_monthly_ret_oos - mu_implied_svm) ** 2).values.sum()
ss_tot = ((IVV_IEF_monthly_ret_oos - IVV_IEF_monthly_ret_oos.mean()) ** 2).values.sum()
R_2 = 1 - ss_res / ss_tot
R_2

-1.2315275624154625

### Performance function

In [36]:
def get_return_forecast_performance(y_hat:pd.DataFrame, y:pd.DataFrame, forecast_name:str='forecast performance') -> tuple:
    '''Evaluate the return forecast in terms of following measurement:
    1. Hit ratio (HR)
    2. Root-Mean-Squared Forecast Error (RMSFE)
    3. Cross-Sectional Standard Deviation
    4. Time Series Volatility
    5. The First-order Autocorrelation

    ----------
    Args:
    y_hat: return forecast
    y: the true value

    ----------
    Returns:
    a dataframe of five measurement
    '''

    N_c = (np.sign(y) == np.sign(y_hat)).sum().sum()
    N = y.count().sum()
    HR = N_c / N
    HR_percentage = HR * 100

    RMSFE = np.sqrt(np.square(y_hat - y).sum().mean())
    RMSFE_percentage = RMSFE * 100

    sigma_i = y_hat.std(axis=1).mean()
    sigma_i_percentage = sigma_i * 100

    sigma_t = y_hat.std(axis=0).mean()
    sigma_t_percentage = sigma_t * 100

    rho_1_vector = y_hat.apply(lambda x: sm.tsa.acf(x, nlags=1)[1], axis=0)
    rho_1 = rho_1_vector.mean()
    rho_1_percentage = rho_1 * 100

    ss_res = ((y - y_hat) ** 2).values.sum()
    ss_tot = ((y - y.mean()) ** 2).values.sum()
    R_2 = 1 - ss_res / ss_tot

    performance_df = pd.DataFrame([HR_percentage, RMSFE_percentage, sigma_i_percentage, sigma_t_percentage, rho_1_percentage, R_2], 
                                  index=['HR', ' RMSFE', 'sigma_i', 'sigma_t', 'rho_1', 'R^2'],
                                  columns=[forecast_name])

    return(performance_df)

In [101]:
performance_df_svm = get_return_forecast_performance(y_hat= mu_implied_svm, y = IVV_IEF_monthly_ret_oos, forecast_name='SVM')
performance_df_knn = get_return_forecast_performance(y_hat= mu_implied_knn, y = IVV_IEF_monthly_ret_oos, forecast_name='KNN')
performance_df_equal = get_return_forecast_performance(y_hat= mu_implied_equal, y = IVV_IEF_monthly_ret_oos, forecast_name='Equal Weight')
performance_df_mktcap = get_return_forecast_performance(y_hat= mu_implied_mktcap, y = IVV_IEF_monthly_ret_oos, forecast_name='MKTCAP')
performance_df = pd.concat([performance_df_mktcap, performance_df_equal, performance_df_svm, performance_df_knn], axis=1).T
performance_df

Unnamed: 0,HR,RMSFE,sigma_i,sigma_t,rho_1,R^2
MKTCAP,63.076923,2.942319,0.020376,0.128204,98.674953,-1.225107
Equal Weight,63.076923,2.931166,0.008763,0.129899,98.719797,-1.20827
SVM,63.076923,2.928542,0.011093,0.129917,98.632392,-1.204318
KNN,62.820513,2.937362,0.012887,0.12949,98.590325,-1.217616


In [102]:
post_dataframe_to_latex_table(performance_df, 'implied_expected_return_forecast_performance', kwargs={'header': False})

Save table to:../../table/


  df.to_latex(target_folder_path + table_name + '.tex', float_format = float_format, **kwargs)


In [40]:
performance_df.to_latex('/Users/cheng/Google Drive/PhD/Research/Tactical asset allocation/table/monthly_return_forecasts_performance_12_12.tex', float_format='%.3f')

  performance_df.to_latex('/Users/cheng/Google Drive/PhD/Research/Tactical asset allocation/table/monthly_return_forecasts_performance_12_12.tex', float_format='%.3f')
