## 36700 Homework 7

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from arch import arch_model
from arch.univariate import GARCH, EWMAVariance 
from sklearn import linear_model
import scipy.stats as stats
from statsmodels.regression.rolling import RollingOLS
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

### Descriptions

In [108]:
descriptions = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='descriptions').dropna()
descriptions.rename( columns={'Unnamed: 0':'Ticker'}, inplace=True )
descriptions

Unnamed: 0,Ticker,Unit,Type,Description
0,DP,Ratio,Index,Dividend-Price Ratio of the S&P500
1,EP,Ratio,Index,Earnings-Price Ratio of the S&P500
2,US10Y,Yield,Index,10-Year Tnotes
3,SPY,Total Return,ETF,S&P 500
4,GMWAX,Total Return,Mutual Fund,GMO
5,RF,Total Return,Index,3-Month Tbills


### Signals

In [91]:
signals = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='signals').dropna()
signals.rename( columns={'Unnamed: 0':'Date'}, inplace=True )
signals.set_index('Date',inplace=True, drop=True)
signals.head()

Unnamed: 0_level_0,DP,EP,US10Y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1993-02-28,2.82,4.44,6.03
1993-03-31,2.77,4.41,6.03
1993-04-30,2.82,4.44,6.05
1993-05-31,2.81,4.38,6.16
1993-06-30,2.79,4.31,5.8


### Returns

In [80]:
ret = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='returns (total)')
ret.rename( columns={'Unnamed: 0':'Date'}, inplace=True )
ret.set_index('Date',inplace=True, drop=True)
returns = ret.dropna().subtract(riskfreerate['US3M'], axis=0).dropna()
returns.head()

Unnamed: 0_level_0,SPY,GMWAX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1996-11-30,0.06873,0.04091
1996-12-31,-0.02815,-0.01563
1997-01-31,0.05749,0.01044
1997-02-28,0.00522,0.01792
1997-03-31,-0.0486,-0.01961


### Risk-free rate

In [61]:
riskfreerate = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='risk-free rate').dropna()
riskfreerate.rename( columns={'Unnamed: 0':'Date'}, inplace=True )
riskfreerate.set_index('Date',inplace=True, drop=True)
riskfreerate.head()

Unnamed: 0_level_0,US3M
Date,Unnamed: 1_level_1
1993-02-28,0.00251
1993-03-31,0.00246
1993-04-30,0.00248
1993-05-31,0.00261
1993-06-30,0.00258


# 2. Analyzing GMO

#### 2.1

In [62]:
def summary_stats(df, annual_fac = 12):
    stats_df = pd.DataFrame(data = None)
    stats_df['Mean'] = df.mean()*annual_fac
    stats_df['Vol'] = df.std()*np.sqrt(annual_fac)
    stats_df['Sharpe'] = df.mean()/df.std()*np.sqrt(annual_fac)
    
    return stats_df

#### From inception to 2011

In [63]:
summary_stats(returns.loc[:'2011', ['GMWAX']])

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.01583,0.12501,0.1266


#### 2012 to present

In [64]:
summary_stats(returns.loc['2012':, ['GMWAX']])

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.03664,0.092,0.39823


#### Inception to present

In [65]:
summary_stats(returns[['GMWAX']])

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.0245,0.11231,0.21811


Even though the return has stayed fairly constant over time, the volatility was relatively higher until 2011. This is reflected in the lower sharpe ratio before 2012.

#### 2.2

In [66]:
def tail_risk(df):
    tr_df = pd.DataFrame(data = None)
    tr_df['Min return'] = df.min()
    tr_df['VaR-5th'] = df.quantile(.05)
    cum_ret = (1 + df).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    tr_df['Max Drawdown'] = drawdown.min()
    
    return tr_df

Minimum return, 5th percentile, maximum drawdown

#### From inception to 2011

In [67]:
tail_risk(returns.loc[:'2011', ['GMWAX','SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.14918,-0.05981,-0.47295
SPY,-0.16557,-0.08022,-0.56001


#### 2012 to present

In [68]:
tail_risk(returns.loc['2012':, ['GMWAX','SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.11865,-0.03969,-0.22605
SPY,-0.12473,-0.06866,-0.24813


In [69]:
tail_risk(returns[['GMWAX','SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.14918,-0.04829,-0.47295
SPY,-0.16557,-0.08001,-0.56001


GMWAX shows higher tail risk from inception to 2011 than from 2012 till date. Same is the case with maximum drawdown as well. Minimum returns of GMWAX are lower pre-2011 than post-2012.

#### 2.3

In [70]:
def reg_params(df, y_col, X_col, intercept = True, annual_fac=12):
    y = df[y_col]
    if intercept == True:
        X = sm.add_constant(df[X_col])
    else:
        X = df[X_col]
    
    model = sm.OLS(y, X, missing = 'drop').fit()
    reg_df = model.params.to_frame('Regression Parameters')
    reg_df.loc[r'$R^{2}$'] = model.rsquared
    
    if intercept == True:
        reg_df.loc['const'] *= annual_fac
    
    return reg_df

a. alpha, beta, R squared

In [71]:
reg_params(returns, 'GMWAX', 'SPY')

Unnamed: 0,Regression Parameters
const,-0.01699
SPY,0.5456
$R^{2}$,0.57774


b. While GMO has moderate exposure to the market and it's market beta is not very low, we can consider it a low-beta strategy. The beta is consistent across the subsamples so GMO's exposure to the market has not changed.

c. GMO does not provide alpha in either subsample as alpha is negative.

# 3. Forecast regressions

In [92]:
signals = signals.shift()
signals['SPY'] = ret['SPY']
signals.head()

Unnamed: 0_level_0,DP,EP,US10Y,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993-02-28,,,,0.01067
1993-03-31,2.82,4.44,6.03,0.02241
1993-04-30,2.77,4.41,6.03,-0.02559
1993-05-31,2.82,4.44,6.05,0.02697
1993-06-30,2.81,4.38,6.16,0.00367


Dividend-price ratio

In [94]:
DP = reg_params(signals, 'SPY', 'DP')
DP

Unnamed: 0,Regression Parameters
const,-0.11286
DP,0.00943
$R^{2}$,0.00936


Earning-price ratio

In [96]:
EP = reg_params(signals, 'SPY', 'EP')
EP

Unnamed: 0,Regression Parameters
const,-0.07121
EP,0.0032
$R^{2}$,0.00861


Dividend-price ratio, earnings-price ratio, and 10Y yeild

In [97]:
EP_DP_10Y = reg_params(signals, 'SPY', ['EP','DP','US10Y'])
EP_DP_10Y

Unnamed: 0,Regression Parameters
const,-0.1792
EP,0.00265
DP,0.00798
US10Y,-0.00097
$R^{2}$,0.01633


#### 3.2

In [101]:
w_DP = 100 * (DP.loc['const'][0]/12 + DP.loc['DP'][0] * signals['DP'])
r_DP = (w_DP * signals['SPY']).dropna()
w_EP = 100 * (EP.loc['const'][0]/12 + EP.loc['EP'][0] * signals['EP'])
r_EP = (w_EP * signals['SPY']).dropna()
w_3fac = 100 * (EP_DP_10Y.loc['const'][0]/12 + EP_DP_10Y.loc['EP'][0] * signals['EP']\
                                             + EP_DP_10Y.loc['DP'][0] * signals['DP']\
                                             + EP_DP_10Y.loc['US10Y'][0] * signals['US10Y'])
r_3fac = (w_3fac * signals['SPY']).dropna()

In [121]:
def summary_stats_bm(series, bm, series_total,annual_fac=12):
    ss_df = pd.DataFrame(data = None, index = ['Summary Stats'])
    ss_df['Mean'] = series.mean() * annual_fac
    ss_df['Vol'] = series.std() * np.sqrt(annual_fac)
    ss_df['Sharpe (Mean/Vol)'] = ss_df['Mean'] / ss_df['Vol']
    
    y = series
    X = sm.add_constant(bm.loc[series.index])
    reg = sm.OLS(y,X).fit()
    ss_df[r'$\alpha$'] = reg.params[0] * annual_fac
    ss_df[r'$\beta^{SPY}$'] = reg.params[1]
    
    cum_ret = (1 + series_total).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    ss_df['Max Drawdown'] = drawdown.min()
    
    ss_df['Information ratio'] = (reg.params[0] / reg.resid.std()) * np.sqrt(annual_fac)
    
    return round(ss_df, 4)

Mean, vol, sharpe, max-drawdown, alpha, beta

In [122]:
r_DP_total = r_DP.add(riskfreerate['US3M'])
r_DP_total.dropna()
summary_stats_bm(r_DP, signals[['SPY']],r_DP_total )

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.1095,0.149,0.7348,0.0207,0.8611,-0.6463,0.2759


#### 3.3

(a.)

In [124]:
VaR = pd.DataFrame([r_DP.quantile(.05), r_EP.quantile(.05), r_3fac.quantile(.05), 
                    signals['SPY'].quantile(.05), 
                    ret['GMWAX'].quantile(.05)],
                   index = ['DP Strat','EP Strat','3-factor Strat','SPY','GMO'], 
                   columns = ['5% VaR'])

VaR

Unnamed: 0,5% VaR
DP Strat,-0.05226
EP Strat,-0.05413
3-factor Strat,-0.06416
SPY,-0.07394
GMO,-0.04731


(b.)

In [125]:
summary_stats(r_DP.to_frame('DP Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
DP Strat,0.03933,0.18421,0.21353


In [126]:
summary_stats(r_EP.to_frame('EP Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
EP Strat,0.03727,0.1339,0.27835


In [127]:
summary_stats(r_3fac.to_frame('3-factor Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
3-factor Strat,0.0608,0.15739,0.3863


In [129]:
summary_stats(riskfreerate.loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
US3M,0.02306,0.00578,3.98663


All the dynamic strategies outperform the risk-free rate during 2000-2011.

(c.)

In [131]:
r_df = r_3fac.to_frame('3-factor Strat')
r_df['DP Strat'] = r_DP
r_df['EP Strat'] = r_EP
r_df['rf'] = riskfreerate['US3M']

df_riskprem = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df.columns[:3]:
    df_riskprem[col] = len(r_df[r_df[col] < r_df['rf']])/len(r_df) * 100
    
df_riskprem

Unnamed: 0,3-factor Strat,DP Strat,EP Strat
% of periods underperforming $r^{f}$,37.07865,37.35955,37.35955


(d.)

As per the tail risk metrics and volatility of the dynamic strategies compared to SPY it **does not seem** like these strategies take on extra risk on the whole. But the strategies are dependent on running regressions with very little prediction power, so badly estimated parameters could lead to terrible performance. This is not evident in terms of very high volatility and tail risk in our backtesting period though.

# 4. Out-of Sample Forecasting

#### 4.1

In [132]:
def OOS_r2(df, factors, start):
    y = df['SPY']
    X = sm.add_constant(df[factors])

    forecast_err, null_err = [], []

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            null_forecast = currY.mean()
            reg_predict = reg.predict(X.iloc[[i]])
            actual = y.iloc[[i]]
            forecast_err.append(reg_predict - actual)
            null_err.append(null_forecast - actual)
            
    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()
    
    return 1 - RSS/TSS

In [133]:
EP_OOS_r2 = OOS_r2(signals, ['EP'], 60)

print('EP OOS R-squared: ' + str(round(EP_OOS_r2, 4)))

EP OOS R-squared: -0.007


In [134]:
DP_OOS_r2 = OOS_r2(signals, ['DP'], 60)

print('DP OOS R-squared: ' + str(round(DP_OOS_r2, 4)))

DP OOS R-squared: -0.0024


No, this forecasting strategy produced a negative OOS R-squared.

#### 4.2

In [135]:
def OOS_strat(df, factors, start, weight):
    returns = []
    y = df['SPY']
    X = sm.add_constant(df[factors])

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            pred = reg.predict(X.iloc[[i]])
            w = pred * weight
            returns.append((df.iloc[i]['SPY'] * w)[0])

    df_strat = pd.DataFrame(data = returns, index = df.iloc[-(len(returns)):].index, columns = ['Strat Returns'])
    return df_strat

In [140]:
OOS_EP = OOS_strat(signals, ['EP'], 60, 100)

OOS_EP_total = OOS_EP.add(riskfreerate['US3M'])
summary_stats_bm(OOS_EP['Strat Returns'], signals['SPY'],OOS_EP_total)

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.0819,0.1654,0.4953,0.0353,0.5435,,0.249


In [141]:
OOS_DP = OOS_strat(signals, ['DP'], 60, 100)

OOS_DP_total = OOS_DP.add(riskfreerate['US3M'], axis=0).dropna()
summary_stats_bm(OOS_DP['Strat Returns'], signals['SPY'],OOS_DP_total)

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.0798,0.1763,0.4525,-0.0069,1.0109,,-0.088


Based on the summary stats, the strategy in 3.2 seems better.

#### 4.3

(a.)

In [142]:
VaR_OOS = pd.DataFrame([OOS_EP['Strat Returns'].quantile(.05),OOS_DP['Strat Returns'].quantile(.05),
                    signals['SPY'].quantile(.05), 
                    ret['GMWAX'].quantile(.05)],
                   index = ['EP Strat','DP Strat','SPY','GMO'], 
                   columns = ['5% VaR'])

VaR_OOS

Unnamed: 0,5% VaR
EP Strat,-0.07097
DP Strat,-0.07256
SPY,-0.07394
GMO,-0.04731


(b.)

In [143]:
summary_stats_bm(OOS_EP.loc['2000':'2011']['Strat Returns'], ret[['SPY']],OOS_EP_total)

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.0388,0.1959,0.1979,0.0333,0.2994,,0.1757


(c.)

In [146]:
r_df_OOS = OOS_EP.rename(columns={"Strat Returns": "EP Strat"})
r_df_OOS['rf'] = riskfreerate['US3M']

df_riskprem2 = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df_OOS.columns[:3]:
    df_riskprem2[col] = len(r_df_OOS[r_df_OOS[col] < r_df_OOS['rf']])/len(r_df_OOS) * 100
    
df_riskprem2

Unnamed: 0,EP Strat,rf
% of periods underperforming $r^{f}$,38.38384,0.0


(d.)

In [147]:
r_df_OOS_DP = OOS_DP.rename(columns={"Strat Returns": "DP Strat"})
r_df_OOS_DP['rf'] = riskfreerate['US3M']

riskprem2_DP = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df_OOS_DP.columns[:3]:
    riskprem2_DP[col] = len(r_df_OOS_DP[r_df_OOS_DP[col] < r_df_OOS_DP['rf']])/len(r_df_OOS_DP) * 100
    
riskprem2_DP

Unnamed: 0,DP Strat,rf
% of periods underperforming $r^{f}$,39.05724,0.0


Based on the results in 4.3, we can see that these two strategies don't significantly outperform the risk-free rate in terms of premium. Also, it is riskier than SPY.