# FINM 367 HW 7

In [272]:
import pandas as pd
import numpy as np
import os
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS


from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import tree
from sklearn.neural_network import MLPRegressor

import warnings
warnings.filterwarnings("ignore")

pwd = os.getcwd()

## Helper Function

include:
- performance summary
- time series regression
- ...

In [273]:
def performance_summary(return_data):
    summary_stats = return_data.mean().to_frame('Mean').apply(lambda x: x*12)
    summary_stats['Volatility'] = return_data.std().apply(lambda x: x*np.sqrt(12))
    summary_stats['Sharpe Ratio'] = summary_stats['Mean']/summary_stats['Volatility']
    
    summary_stats['Skewness'] = return_data.skew()
    summary_stats['Excess Kurtosis'] = return_data.kurtosis()
    summary_stats['VaR (0.05)'] = return_data.quantile(.05, axis = 0)
    summary_stats['CVaR (0.05)'] = return_data[return_data <= return_data.quantile(.05, axis = 0)].mean()
    summary_stats['Min'] = return_data.min()
    summary_stats['Max'] = return_data.max()
    
    wealth_index = 1000*(1+return_data).cumprod()
    previous_peaks = wealth_index.cummax()
    drawdowns = (wealth_index - previous_peaks)/previous_peaks

    summary_stats['Max Drawdown'] = drawdowns.min()
    summary_stats['Peak'] = [previous_peaks[col][:drawdowns[col].idxmin()].idxmax() for col in previous_peaks.columns]
    summary_stats['Bottom'] = drawdowns.idxmin()
    
    recovery_date = []
    for col in wealth_index.columns:
        prev_max = previous_peaks[col][:drawdowns[col].idxmin()].max()
        recovery_wealth = pd.DataFrame([wealth_index[col][drawdowns[col].idxmin():]]).T
        recovery_date.append(recovery_wealth[recovery_wealth[col] >= prev_max].index.min())
    summary_stats['Recovery'] = recovery_date
    
    return summary_stats

In [274]:
def time_series_regression(portfolio, factors, multiple_factors = False, resid = False):
    
    ff_report = pd.DataFrame(index=portfolio.columns)
    bm_residuals = pd.DataFrame(columns=portfolio.columns)

    rhs = sm.add_constant(factors)

    for portf in portfolio.columns:
        lhs = portfolio[portf]
        res = sm.OLS(lhs, rhs, missing='drop').fit()
        ff_report.loc[portf, 'alpha_hat'] = res.params['const'] * 12
        if multiple_factors:
            ff_report.loc[portf, factors.columns[0] + ' beta'] = res.params[1]
            ff_report.loc[portf, factors.columns[1]+ ' beta'] = res.params[2] 
            ff_report.loc[portf, factors.columns[2]+ ' beta'] = res.params[3]
        else:
            ff_report.loc[portf, factors.name + ' beta'] = res.params[1]

            
        ff_report.loc[portf, 'info_ratio'] = np.sqrt(12) * res.params['const'] / res.resid.std()
        ff_report.loc[portf, 'treynor_ratio'] = 12 * portfolio[portf].mean() / res.params[1]
        ff_report.loc[portf, 'R-squared'] = res.rsquared
        ff_report.loc[portf, 'Tracking Error'] = (res.resid.std()*np.sqrt(12))

        if resid:
            bm_residuals[portf] = res.resid
            
            
        
    if resid:
        return bm_residuals
        
    return ff_report

In [275]:
def tangency_weights(returns, cov_mat = 1):
    
    if cov_mat ==1:
        cov_inv = np.linalg.inv((returns.cov()*12))
    else:
        cov = returns.cov()
        covmat_diag = np.diag(np.diag((cov)))
        covmat = cov_mat * cov + (1-cov_mat) * covmat_diag
        cov_inv = np.linalg.inv((covmat*12))  
        
    ones = np.ones(returns.columns[1:].shape) 
    mu = returns.mean()*12
    scaling = 1/(np.transpose(ones) @ cov_inv @ mu)
    tangent_return = scaling*(cov_inv @ mu) 
    tangency_wts = pd.DataFrame(index = returns.columns[1:], data = tangent_return, columns = ['Tangent Weights'] )
        
    return tangency_wts

In [276]:
def OOS_r2(df, factors, start):
    y = df['SPY']
    X = sm.add_constant(factors)

    forecast_err, null_err = [], []

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            null_forecast = currY.mean()
            reg_predict = reg.predict(X.iloc[[i]])
            actual = y.iloc[[i]]
            forecast_err.append(reg_predict - actual)
            null_err.append(null_forecast - actual)
            
    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()
    
    return ((1 - RSS/TSS),reg)

In [277]:
def OOS_strat(df, factors, start, weight):
    returns = []
    y = df['SPY']
    X = sm.add_constant(factors)

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            pred = reg.predict(X.iloc[[i]])
            w = pred * weight
            returns.append((df.iloc[i]['SPY'] * w)[0])

    df_strat = pd.DataFrame(data = returns, index = df.iloc[-(len(returns)):].index, columns = ['Strat Returns'])
    return df_strat

## Sec 2

In [278]:
signals = pd.read_excel('../data/gmo_data.xlsx', sheet_name = 'signals')
risk_free = pd.read_excel('../data/gmo_data.xlsx', sheet_name = 'risk-free rate')
total_ret = pd.read_excel('../data/gmo_data.xlsx', sheet_name = 'total returns')

signals.index = signals['date']
risk_free.index = risk_free['date']
total_ret.index = total_ret['date']

signals = signals.drop(columns='date')
risk_free = risk_free.drop(columns='date') / 12
total_ret = total_ret.drop(columns='date')

### 2.1

In [279]:
excess_ret = total_ret.copy()
for col in excess_ret.columns:
    excess_ret[col] = excess_ret[col] - risk_free['TBill 3M']

excess_ret.tail()

Unnamed: 0_level_0,SPY,GMWAX,GMGEX
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-06-28,0.030822,-0.011654,-0.017546
2024-07-31,0.007706,0.025997,0.030278
2024-08-30,0.019106,0.010434,0.015282
2024-09-30,0.017162,0.008589,0.011512
2024-10-31,-0.012708,-0.040693,-0.04329


In [280]:
sub_samples = {
              '1996-2011' : ['1996','2011'],
              '2012-2024' : ['2012','2024'],
              '1996-2024' : ['1996','2024'],
              }

sum = []
for k,v in sub_samples.items():
    sub_gmo = excess_ret.loc[sub_samples[k][0]:sub_samples[k][1],['GMWAX']].dropna()
    summary = performance_summary(sub_gmo)
    summary = summary
    summary.index = [k]
    sum.append(summary)

summary = pd.concat(sum)
summary.loc[:,['Mean','Volatility','Sharpe Ratio']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio
1996-2011,0.046422,0.110499,0.42011
2012-2024,0.043423,0.094949,0.457326
1996-2024,0.045043,0.10349,0.43524


**Has the mean, vol, and Sharpe changed much since the case?**

The mean increased and volatility increased during 2012-2024, showing that GMO's forecasts and subsequently their asset allocations for GMWAX worked during this period of turmoil as compared to the previous sub-period of 1996-2011.

### 2.2

In [281]:
sub_samples = {
              '1996-2011' : ['1996','2011'],
              '2012-2024' : ['2012','2024'],
              '1996-2024' : ['1996','2024'],
              }

mdd = []
for k,v in sub_samples.items():
    sub_gmo = total_ret.loc[sub_samples[k][0]:sub_samples[k][1],['GMWAX']].dropna()
    drawdown = performance_summary(sub_gmo)
    drawdown = drawdown.loc[:,['Max Drawdown']]
    drawdown.index = [k]
    mdd.append(drawdown)

mdd = pd.concat(mdd)
mdd_summary = summary.loc[:,['Min','VaR (0.05)']].merge(mdd,how='inner',on=mdd.index).rename(columns={'key_0':'Sub-Sample'})
mdd_summary.index = mdd_summary['Sub-Sample']
mdd_summary = mdd_summary.drop(['Sub-Sample'],axis = 1)
mdd_summary

Unnamed: 0_level_0,Min,VaR (0.05),Max Drawdown
Sub-Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1996-2011,-0.14915,-0.044003,-0.293614
2012-2024,-0.115018,-0.040854,-0.216795
1996-2024,-0.14915,-0.041368,-0.293614


**2.2.a) Does GMWAX have high or low tail-risk as seen by these stats**

GMWAX seems to have low tail-risk as depicted by the tail risk statistics above.

**2.2.b) Does that vary much across the two subsamples?**

The tail risk is especially low in the latter sub-period of 2012-2024. This could be a factor of two aspects, better forecasting by GMO or reduced sub-sample length leading to less market downturns.

**2.3) For all three samples, regress excess returns of GMWAX on excess returns of SPY.**
- sub1 - 1996-2011
- sub2 - 2012-2024
- sub3 - 1996-2024

In [282]:
sub_1 = time_series_regression(excess_ret.loc['1996':'2011',['GMWAX']], excess_ret.loc['1996':'2011','SPY'])
sub_2 = time_series_regression(excess_ret.loc['2012':'2024',['GMWAX']], excess_ret.loc['2012':'2024','SPY'])
sub_3 = time_series_regression(excess_ret.loc['1996':'2024',['GMWAX']], excess_ret.loc['1996':'2024','SPY'])

sub_1.index = ['GMWAX 1996-2011']
sub_2.index = ['GMWAX 2012-2024']
sub_3.index = ['GMWAX 1996-2024']


reg_sub = pd.concat([sub_1,sub_2,sub_3])

**a) Report the estimated alpha, beta, and r-squared.**

In [283]:
reg_sub.loc[:,['SPY beta','alpha_hat','R-squared']]

Unnamed: 0,SPY beta,alpha_hat,R-squared
GMWAX 1996-2011,0.542128,0.027,0.648686
GMWAX 2012-2024,0.581793,-0.03396,0.748747
GMWAX 1996-2024,0.552608,0.000558,0.680167


**2.3.b) Is GMWAX a low-beta strategy? Has that changed since the case?**

GMWAX seems to have a relatively moderate beta with the market: ~55 - 600%, It's market beta is not very low, we can consider it a low-beta strategy. The beta remains quite stable across both sub-samples.

In [284]:
reg_sub.loc['GMWAX 2012-2024',['SPY beta','alpha_hat','R-squared']].to_frame().T

Unnamed: 0,SPY beta,alpha_hat,R-squared
GMWAX 2012-2024,0.581793,-0.03396,0.748747


**2.3.c) Does GMWAX provide alpha? Has that changed across the subsamples?**

GMWAX indeed provides alpha in 1996-2011 and 1996-2024 although the alpha seems not substantial during 2012-2024.

### 2.4 Repeat of 2.1 - 2.3 for `GMGEX`

In [285]:
sub_samples = {
              '1996-2011' : ['1996','2011'],
              '2012-2024' : ['2012','2024'],
              '1996-2024' : ['1996','2024'],
              }

sum = []
for k,v in sub_samples.items():
    sub_gmo = excess_ret.loc[sub_samples[k][0]:sub_samples[k][1],['GMGEX']].dropna()
    summary = performance_summary(sub_gmo)
    summary = summary
    summary.index = [k]
    sum.append(summary)

summary = pd.concat(sum)
summary.loc[:,['Mean','Volatility','Sharpe Ratio']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio
1996-2011,-0.003823,0.147253,-0.025963
2012-2024,0.001311,0.235554,0.005566
1996-2024,-0.001463,0.192622,-0.007595


**Has the mean, vol, and Sharpe changed much since the case?**

The mean increased and volatility decreased during 2012-2024, showing that GMO's forecasts and subsequently their asset allocations for GMGEX worked during this period of turmoil as compared to the previous sub-period of 1996-2011. But the volatility also increased substantially, hence all the Sharpe Ratios are just close to 0

In [286]:
sub_samples = {
              '1996-2011' : ['1996','2011'],
              '2012-2024' : ['2012','2024'],
              '1996-2024' : ['1996','2024'],
              }

mdd = []
for k,v in sub_samples.items():
    sub_gmo = total_ret.loc[sub_samples[k][0]:sub_samples[k][1],['GMGEX']].dropna()
    drawdown = performance_summary(sub_gmo)
    drawdown = drawdown.loc[:,['Max Drawdown']]
    drawdown.index = [k]
    mdd.append(drawdown)

mdd = pd.concat(mdd)
mdd_summary = summary.loc[:,['Min','VaR (0.05)']].merge(mdd,how='inner',on=mdd.index).rename(columns={'key_0':'Sub-Sample'})
mdd_summary.index = mdd_summary['Sub-Sample']
mdd_summary = mdd_summary.drop(['Sub-Sample'],axis = 1)
mdd_summary

Unnamed: 0_level_0,Min,VaR (0.05),Max Drawdown
Sub-Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1996-2011,-0.151592,-0.082292,-0.55563
2012-2024,-0.658863,-0.068027,-0.737364
1996-2024,-0.658863,-0.076213,-0.761812


In [287]:
sub_1 = time_series_regression(excess_ret.loc['1996':'2011',['GMGEX']], excess_ret.loc['1996':'2011','SPY'])
sub_2 = time_series_regression(excess_ret.loc['2012':'2024',['GMGEX']], excess_ret.loc['2012':'2024','SPY'])
sub_3 = time_series_regression(excess_ret.loc['1996':'2024',['GMGEX']], excess_ret.loc['1996':'2024','SPY'])

sub_1.index = ['GMGEX 1996-2011']
sub_2.index = ['GMGEX 2012-2024']
sub_3.index = ['GMGEX 1996-2024']


reg_sub = pd.concat([sub_1,sub_2,sub_3])

**a) Report the estimated alpha, beta, and r-squared.**

In [288]:
reg_sub.loc[:,['SPY beta','alpha_hat','R-squared']]

Unnamed: 0,SPY beta,alpha_hat,R-squared
GMGEX 1996-2011,0.764237,-0.031201,0.725898
GMGEX 2012-2024,0.838118,-0.110164,0.252468
GMGEX 1996-2024,0.786683,-0.06479,0.397891


**2.2.a) Does GMGEX have high or low tail-risk as seen by these stats**

GMWAX seems to have relatively higher tail-risk compared to GMO, as the CVaR and Max DrawDown increased to around two times.

**2.2.b) Does that vary much across the two subsamples?**

The tail risk is especially low in the latter sub-period of 1996-2011. It seems that the work of GMGEX was not efficient.

**2.3) For all three samples, regress excess returns of GMGEX on excess returns of SPY.**
- sub1 - 1996-2011
- sub2 - 2012-2024
- sub3 - 1996-2024

In [289]:
reg_sub.loc['GMGEX 2012-2024',['SPY beta','alpha_hat','R-squared']].to_frame().T

Unnamed: 0,SPY beta,alpha_hat,R-squared
GMGEX 2012-2024,0.838118,-0.110164,0.252468


**2.3.c) Does GMGEX provide alpha? Has that changed across the subsamples?**

GMGEX does not provide alpha in either subsample as alpha is negative.

Comparisons:

With a higher beta, GMGEX is more sensitive to market movements, whereas GMWAX demonstrates stronger alpha and lower beta, highlighting its relatively independent performance. And while GMGEX offers slightly better protection against tail risk, its higher volatility detracts from its overall risk-adjusted returns, as reflected in weaker Sharpe ratios. 

Additionally, GMWAX excels with better Sharpe ratios, driven by its lower volatility and stable performance.

## Sec 3

### 3.1

In [290]:
SPY = total_ret.loc[:,['SPY']]
signals.columns =['DP','EP', 'US10Y']
signal_1 = ['DP']
factor_1 = signals.loc[:,signal_1].shift(1).squeeze()
signal_reg_1 = time_series_regression(SPY, factor_1, multiple_factors=False, resid=False)
signal_reg_1.index = ['DP']
signal_reg_1


signal_2 = ['EP']
factor_2 = signals.loc[:,signal_2].shift(1).squeeze()
signal_reg_2 = time_series_regression(SPY, factor_2, multiple_factors=False, resid=False)
signal_reg_2.index = ['EP']
signal_reg_2


signal_3 = ['DP','EP','US10Y']
factor_3 = signals.loc[:,['DP','EP','US10Y']].shift(1)
signal_reg_3 = time_series_regression(SPY, factor_3, multiple_factors=True, resid=False)
signal_reg_3.index = ['DP,EP,US10Y']

display(signal_reg_1,signal_reg_2,signal_reg_3)

Unnamed: 0,alpha_hat,DP beta,info_ratio,treynor_ratio,R-squared,Tracking Error
DP,-0.132069,0.0108,-0.858926,9.45028,0.009422,0.153761


Unnamed: 0,alpha_hat,EP beta,info_ratio,treynor_ratio,R-squared,Tracking Error
EP,0.256374,-0.000646,1.662722,-158.084886,0.0039,0.154189


Unnamed: 0,alpha_hat,DP beta,EP beta,US10Y beta,info_ratio,treynor_ratio,R-squared,Tracking Error
"DP,EP,US10Y",-0.18497,0.011771,0.000257,-0.000696,-1.203434,8.670337,0.010184,0.153702


### 3.2

In [None]:
DP_return = (signals.loc[:,'DP'].shift(1).to_frame() * signal_reg_1['DP beta'])+signal_reg_1['alpha_hat']/12
DP_return = DP_return.rename(columns={'DP':'Forecasted Return'}) * 100
DP_forecast_return = pd.DataFrame(DP_return['Forecasted Return']*total_ret.loc[:,['SPY']]['SPY'], columns=DP_return.columns, index=DP_return.index)


EP_return = (signals.loc[:,'EP'].shift(1).to_frame() * signal_reg_2['EP beta'])+signal_reg_2['alpha_hat']/12
EP_return = EP_return.rename(columns={'EP':'Forecasted Return'}) * 100
EP_forecast_return = pd.DataFrame(EP_return['Forecasted Return']*total_ret.loc[:,['SPY']]['SPY'], columns=EP_return.columns, index=EP_return.index)


forecasted_rets = (np.array(signals.shift(1).loc[:,['DP','EP','US10Y']]) @ np.array(signal_reg_3.loc[:,['DP beta','EP beta','US10Y beta']].T))
multiple_factor_return = (pd.DataFrame(forecasted_rets,columns = ['Forecasted Return'],index= signals.index)) 
multiple_factor_return['Forecasted Return'] = (multiple_factor_return['Forecasted Return'] + float(signal_reg_3['alpha_hat']/12))*100
multiple_forecast_return = pd.DataFrame(multiple_factor_return['Forecasted Return'] *total_ret.loc[:,['SPY']]['SPY'], columns=multiple_factor_return.columns, index=multiple_factor_return.index)

In [292]:
strategy = {'DP': DP_forecast_return.dropna(),
          'EP': EP_forecast_return.dropna(),
          'DP-EP-US10Y': multiple_forecast_return.dropna()
         }
factor = excess_ret.loc[:,['SPY']]
total_strategy_summary = []

for key,value in strategy.items():
    strat = strategy[key]
    strat_summary = performance_summary(strat)
    strat_summary.index = [key]
    strat_summary['Negative Risk Premium Months'] = len(strat[strat['Forecasted Return'] - risk_free['TBill 3M'] <0])
    strat_summary['Total Months'] = len(strat)
    ts = time_series_regression(strat, factor[strat.index[0]:].squeeze(), False)
    strat_summary['Market Beta'] = ts['SPY beta'].values
    strat_summary['Market Alpha'] = ts['alpha_hat'].values
    strat_summary['Market Information Ratio'] = ts['info_ratio'].values
    
    total_strategy_summary.append(strat_summary)
    
total_strategy_df = pd.concat(total_strategy_summary)
  
total_strategy_df.loc[:,['Mean','Volatility','Sharpe Ratio','Max Drawdown','Market Beta','Market Alpha','Market Information Ratio']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio,Max Drawdown,Market Beta,Market Alpha,Market Information Ratio
DP,0.111181,0.172764,0.643543,-0.716268,0.948558,0.033654,0.368125
EP,0.09804,0.143765,0.681949,-0.581668,0.88146,0.025997,0.565802
DP-EP-US10Y,0.112994,0.173571,0.650993,-0.699447,0.947413,0.03556,0.381485


**3.3) GMO believes a risk premium is compensation for a security's tendency to lose money at "bad times". Let's consider risk characteristics.**

**3.3.a) For both strategies, the market, and GMO, calculate the monthly VaR for $\pi = 0.05$. Just use the quantile of the historic data for this VaR calculation.**

In [293]:
market_summary = performance_summary(excess_ret.loc[:,['SPY']])
summary = performance_summary(excess_ret.loc[:,['GMWAX']].dropna())
strat_var= pd.concat([total_strategy_df.loc[:,['VaR (0.05)']],market_summary.loc[:,['VaR (0.05)']],summary.loc[:,['VaR (0.05)']]])
strat_var

Unnamed: 0,VaR (0.05)
DP,-0.059401
EP,-0.06509
DP-EP-US10Y,-0.061992
SPY,-0.079673
GMWAX,-0.041368


**3.3b) The GMO case mentions that stocks under-performed short-term bonds from 2000-2011. Does the dynamic portfolio above under-perform the risk-free rate over this time?**

In [294]:
strats = {'DP': DP_forecast_return.dropna(),
          'EP': EP_forecast_return.dropna(),
          'DP-EP-US10Y': multiple_forecast_return.dropna()
         }
strat_summary_0011 =[]
for k,v in strats.items():
    strat = (strats[k]['2000':'2011']['Forecasted Return']).to_frame('Forecasted Returns')
    perf_summary = performance_summary(strat)
    perf_summary.index = [k]
    strat_summary_0011.append(perf_summary)
    

strat_summary_df_0011 = pd.concat(strat_summary_0011)
strat_summary_df_0011.loc[:,['Mean','Volatility','Sharpe Ratio','Max Drawdown']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio,Max Drawdown
DP,0.055427,0.218711,0.253427,-0.716268
EP,0.044407,0.168106,0.26416,-0.581668
DP-EP-US10Y,0.05213,0.212834,0.24493,-0.699447


All dynamic portfolios outperform the risk-free rate.

**3.3.c) Based on the regression estimates, in how many periods do we estimate a negative risk premium?**

In [295]:
neg_risk_premium = total_strategy_df.loc[:,['Negative Risk Premium Months','Total Months']]
neg_risk_premium['Negative Risk Premium Months (%)'] = neg_risk_premium['Negative Risk Premium Months'] *100/ neg_risk_premium['Total Months']
neg_risk_premium

Unnamed: 0,Negative Risk Premium Months,Total Months,Negative Risk Premium Months (%)
DP,128,334,38.323353
EP,124,334,37.125749
DP-EP-US10Y,130,334,38.922156


**3.3.d) Do you believe the dynamic strategy takes on extra risk?**

No, judging by the tail risk metrics and volatility of the dynamic strategies compared to SPY it does not seem like these strategies take on extra risk on the whole.

However, we must keep in mind that the strategies are dependent on running regressions with very little prediction power, so badly estimated parameters could lead to terrible performance. This is not evident in terms of very high volatility and tail risk in our backtesting period though.

In [296]:
total_strategy_df.loc[:,['Mean','Volatility','Sharpe Ratio','VaR (0.05)','Max Drawdown','Market Beta','Market Alpha','Market Information Ratio']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio,VaR (0.05),Max Drawdown,Market Beta,Market Alpha,Market Information Ratio
DP,0.111181,0.172764,0.643543,-0.059401,-0.716268,0.948558,0.033654,0.368125
EP,0.09804,0.143765,0.681949,-0.06509,-0.581668,0.88146,0.025997,0.565802
DP-EP-US10Y,0.112994,0.173571,0.650993,-0.061992,-0.699447,0.947413,0.03556,0.381485


In [297]:
market_summary.loc[:,['Mean','Volatility','Sharpe Ratio','VaR (0.05)','Max Drawdown']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio,VaR (0.05),Max Drawdown
SPY,0.080499,0.154451,0.521198,-0.079673,-0.559175


## Sec 4

### 4.1 Out-of-Sample R^2

In [298]:
factor = signals.loc[:,'EP'].shift(1).to_frame()
fund_ret = total_ret.loc[factor.index[0]:,['SPY']]
reg_ep = OOS_r2(fund_ret,factor,60)
OOS_RSquared_ep = reg_ep[0]
OOS_r2_ep = pd.DataFrame([[OOS_RSquared_ep]], columns = ['OOS R-Squared'], index = ['EP'])
reg_ep_params = reg_ep[1]

In [299]:
factor = signals.loc[:,'DP'].shift(1).to_frame()
fund_ret = total_ret.loc[factor.index[0]:,['SPY']]
reg_dp = OOS_r2(fund_ret,factor,60)
OOS_RSquared_dp = reg_dp[0]
OOS_r2_dp = pd.DataFrame([[OOS_RSquared_dp]], columns = ['OOS R-Squared'], index = ['DP'])
reg_dp_params = reg_dp[1]

In [300]:
factor = signals.loc[:,['DP','EP']].shift(1)
fund_ret = total_ret.loc[factor.index[0]:,['SPY']]
reg_epdp = OOS_r2(fund_ret,factor,60)
OOS_r2_epdp  = reg_epdp[0]
OOS_r2_epdp = pd.DataFrame([[OOS_r2_epdp]], columns = ['OOS R-Squared'], index = ['DP-EP'])
reg_epdp_params = reg_epdp[1]

In [301]:
factor = signals.loc[:,['DP','EP','US10Y']].shift(1)
fund_ret = total_ret.loc[factor.index[0]:,['SPY']]
reg_all = OOS_r2(fund_ret,factor,60)
OOS_RSquared_all  = reg_all[0]
OOS_r2_all = pd.DataFrame([[OOS_RSquared_all]], columns = ['OOS R-Squared'], index = ['All'])
reg_all_params = reg_all[1]

In [302]:
oos_r2_sum = pd.concat([OOS_r2_dp,OOS_r2_ep,OOS_r2_epdp,OOS_r2_all])
oos_r2_sum

Unnamed: 0,OOS R-Squared
DP,-0.045636
EP,-0.013791
DP-EP,-0.0643
All,-0.080918


**Did this forecasting strategy produce a positive OOS r-squared?**

This forecasting strategy produces a negative OOS r-squared, which indicates our strategy fits the data worse than a horizontal line given by the expanding mean of the sample.

### 4.2 Re-do problem 3.2 using this OOS forecast

In [303]:
factor = signals.loc[:,'EP'].shift(1).to_frame()
fund_ret= total_ret.loc[factor.index[0]:,['SPY']]
OOS_EP_predict = OOS_strat(fund_ret,factor, 60, 100).rename(columns={'Strat Returns':'EP_OOS_Returns'})

In [304]:
factor = signals.loc[:,'DP'].shift(1).to_frame()
fund_ret= total_ret.loc[factor.index[0]:,['SPY']]
OOS_DP_predict = OOS_strat(fund_ret,factor, 60, 100).rename(columns={'Strat Returns':'DP_OOS_Returns'})

In [305]:
factor = signals.loc[:,['DP','EP']].shift(1)
fund_ret= total_ret.loc[factor.index[0]:,['SPY']]
OOS_EPDP_predict = OOS_strat(fund_ret,factor, 60, 100).rename(columns={'Strat Returns':'DP-EP_OOS_Returns'})

In [306]:
factor = signals.loc[:,['DP','EP','US10Y']].shift(1)
fund_ret= total_ret.loc[factor.index[0]:,['SPY']]
OOS_all_predict = OOS_strat(fund_ret,factor, 60, 100).rename(columns={'Strat Returns':'All_OOS_Returns'})

In [307]:
oos_prediction_sum = pd.concat([OOS_DP_predict.T,OOS_EP_predict.T,OOS_all_predict.T])
oos_prediction_sum = oos_prediction_sum.T

strats = {'DP': OOS_DP_predict.dropna(),
          'EP': OOS_EP_predict.dropna(),
          'DP-EP':OOS_EPDP_predict.dropna(),
          'All': OOS_all_predict.dropna(),
          'SPY':excess_ret.loc[OOS_all_predict.index[0]:,['SPY']].rename(columns={'SPY':'SPY_OOS_Returns'}),
          'US3M':risk_free['TBill 3M'].to_frame('US3M_OOS_Returns')
         }
factor = excess_ret.loc[:,['SPY']]
strat_summary =[]
for k,v in strats.items():
    strat = strats[k]
    perf_summary = performance_summary(strat)
    perf_summary['Negative Risk Premium Months'] = len(strat[strat[k+'_OOS_Returns'] - risk_free['TBill 3M'] <0])
    perf_summary['Total Months'] = len(strat)
    perf_summary.index = [k]
    reg = time_series_regression(strat, factor[strat.index[0]:].squeeze(), False)
    perf_summary['Market Beta'] = reg['SPY beta'].values
    perf_summary['Market Alpha'] = reg['alpha_hat'].values
    perf_summary['Market Information Ratio'] = reg['info_ratio'].values
    strat_summary.append(perf_summary)
    

strat_summary_df = pd.concat(strat_summary)
strat_summary_df.loc[:,['Mean','Volatility','Sharpe Ratio','VaR (0.05)','Max Drawdown','Market Beta','Market Alpha','Market Information Ratio']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio,VaR (0.05),Max Drawdown,Market Beta,Market Alpha,Market Information Ratio
DP,0.081471,0.194486,0.418903,-0.061275,-0.685397,0.911001,0.004267071,0.030697
EP,0.101335,0.157383,0.643875,-0.062768,-0.541476,0.906053,0.02455078,0.305276
DP-EP,0.075989,0.234507,0.324039,-0.066831,-0.742527,0.716024,0.01530892,0.073347
All,0.039635,0.2153,0.184092,-0.062,-0.709924,0.499489,-0.002694775,-0.013342
SPY,0.084746,0.149312,0.567577,-0.07649,-0.517254,1.0,-2.7077950000000003e-17,-0.325156
US3M,0.021563,0.006069,3.552851,1.1e-05,-1.7e-05,-0.001246,0.02166375,3.571173


**How much better/worse is the OOS Earnings-Price ratio strategy compared to the in-sample version of 3.2?**

The OOS performs significantly worse than in-sample as expected. 

Lower mean, much higher volatility and tail risk measure, the oos performance seems to be taking much higher risk with a large negative beta to the market returns.

**4.3) Re-do problem 3.3 using this OOS forecast.**

In [308]:
oos_prediction_sum = pd.concat([OOS_DP_predict.T,OOS_EP_predict.T,OOS_all_predict.T])
oos_prediction_sum = oos_prediction_sum.T

strats = {'DP': OOS_DP_predict.dropna(),
          'EP': OOS_EP_predict.dropna(),
          'DP-EP':OOS_EPDP_predict.dropna(),
          'All': OOS_all_predict.dropna(),
          'US3M':risk_free['TBill 3M'].to_frame('US3M_OOS_Returns')
         }
factor = excess_ret.loc[:,['SPY']]['2000':'2011']
strat_summary =[]
for k,v in strats.items():
    strat = strats[k]['2000':'2011']
    perf_summary = performance_summary(strat)
    perf_summary['Negative Risk Premium Months'] = len(strat[strat[k+'_OOS_Returns'] - risk_free['2000':'2011']['TBill 3M'] <0])
    perf_summary['Total Months'] = len(strat)
    perf_summary.index = [k]
    reg = time_series_regression(strat, factor[strat.index[0]:].squeeze(), False)
    perf_summary['Market Beta'] = reg['SPY beta'].values
    perf_summary['Market Alpha'] = reg['alpha_hat'].values
    perf_summary['Market Information Ratio'] = reg['info_ratio'].values
    strat_summary.append(perf_summary)
    

strat_summary_df_0011 = pd.concat(strat_summary)
strat_summary_df_0011.loc[:,['Mean','Volatility','Sharpe Ratio','VaR (0.05)','Max Drawdown','Market Beta','Market Alpha','Market Information Ratio']]

Unnamed: 0,Mean,Volatility,Sharpe Ratio,VaR (0.05),Max Drawdown,Market Beta,Market Alpha,Market Information Ratio
DP,0.032176,0.266068,0.120931,-0.128644,-0.685397,1.094867,0.00664,0.03282
EP,0.1006,0.204432,0.492097,-0.070299,-0.541476,1.127577,0.074301,0.738662
DP-EP,0.027455,0.334128,0.082168,-0.153181,-0.742527,0.744736,0.010085,0.032244
All,-0.022747,0.305751,-0.074396,-0.134926,-0.709807,0.324792,-0.030322,-0.100596
US3M,0.022875,0.005782,3.956389,3.4e-05,-1.3e-05,-0.002864,0.022862,3.966904


In [309]:
neg_risk_premium = strat_summary_df.loc[:,['Negative Risk Premium Months','Total Months']]
neg_risk_premium['Negative Risk Premium Months (%)'] = neg_risk_premium['Negative Risk Premium Months'] *100/ neg_risk_premium['Total Months']
neg_risk_premium

Unnamed: 0,Negative Risk Premium Months,Total Months,Negative Risk Premium Months (%)
DP,100,275,36.363636
EP,100,275,36.363636
DP-EP,99,275,36.0
All,103,275,37.454545
SPY,102,275,37.090909
US3M,0,335,0.0


**Is the point-in-time version of the strategy riskier?**

Compared to the full sub-sample, the mean returns go down significantly during 2000-2011.The volatility slightly increasesthus the strategy experiences a lower Sharpe Ratio. Given the lower performance of the strategy and worse tail risk measures compared to SPY, the strategy does take on extra risk.