## Importing packages and loading datasets

In [55]:
import pandas as pd
import numpy as np

In [56]:
# Load all datasets
signals = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='signals')
rf_data = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='risk-free rate')
returns = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='total returns')

# Convert dates into datetime objects
signals['date'] = pd.to_datetime(signals['date'])
rf_data['date'] = pd.to_datetime(rf_data['date'])
returns['date'] = pd.to_datetime(returns['date'])

# Merge all data into one dataframe
df = returns.merge(rf_data, on='date', how='inner')
df = df.merge(signals, on='date', how='inner')
df = df.sort_values('date').reset_index(drop=True)

# Create excess returns (total return - risk-free rate)
df['SPY_excess'] = df['SPY'] - df['TBill 3M']
df['GMWAX_excess'] = df['GMWAX'] - df['TBill 3M']
df['GMGEX_excess'] = df['GMGEX'] - df['TBill 3M']

# Rename for convenience (DP = Dividend Yield, EP = Earnings Yield, Yield = 10-Year Yield)
df['DP'] = df['SPX D/P']
df['EP'] = df['SPX E/P']
df['Yield'] = df['T-Note 10YR']

## 2.1

In [57]:
# Define the three time periods
cutoff_2011 = pd.Timestamp('2011-12-31')

period1 = df[df['date'] <= cutoff_2011].copy()  # Inception → 2011
period2 = df[df['date'] > cutoff_2011].copy()   # 2012 → Present
period3 = df.copy()                              # Inception → Present

periods = [
    ('Inception → 2011', period1),
    ('2012 → Present', period2),
    ('Inception → Present', period3)
]

# Create lists to store results
results = []

for period_name, period_data in periods:
    if len(period_data) == 0:
        continue
    
    # Calculate metrics using excess returns
    excess_ret = period_data['GMWAX_excess'].dropna()
    
    # Mean (annualized)
    mean_monthly = excess_ret.mean()
    mean_annual = mean_monthly * 12
    
    # Volatility (annualized)
    vol_monthly = excess_ret.std()
    vol_annual = vol_monthly * np.sqrt(12)
    
    # Sharpe Ratio (annualized)
    sharpe = (mean_monthly / vol_monthly) * np.sqrt(12)
    
    # Append results
    results.append({
        'Period': period_name,
        'Mean Return (annualized)': round(mean_annual, 4),
        'Volatility (annualized)': round(vol_annual, 4),
        'Sharpe Ratio': round(sharpe, 4)
    })

# Create and display DataFrame
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Period,Mean Return (annualized),Volatility (annualized),Sharpe Ratio
0,Inception → 2011,-0.2653,0.1319,-2.0118
1,2012 → Present,-0.1233,0.1096,-1.1252
2,Inception → Present,-0.1974,0.1233,-1.6012


#### **Has the mean, vol, and Sharpe changed much since the case?**

#### Key observations:
#### 1. Mean returns are negative across all periods, indicating underperformance vs risk-free rate
#### 2. The more recent period (2012→Present) shows less negative returns (-0.12% vs -0.27%)
#### 3. Volatility has decreased in the recent period (10.96% vs 13.19%)
#### 4. Sharpe ratios are negative throughout but improved in recent period (-1.13 vs -2.01)
#### 5. Overall performance remains poor with negative risk-adjusted returns

## 2.2

In [58]:
results = []

for period_name, period_data in periods:
    if len(period_data) == 0:
        continue
    
    # Use TOTAL returns for tail risk, EXCESS returns for VaR
    total_ret = period_data['GMWAX'].dropna()
    excess_ret = period_data['GMWAX_excess'].dropna()
    
    # Minimum return (monthly)
    min_return = excess_ret.min()
    
    # VaR at 5th percentile (monthly)
    var_5 = excess_ret.quantile(0.05)
    
    # Maximum Drawdown - use TOTAL returns
    cumulative_wealth = (1 + total_ret).cumprod()
    running_max = cumulative_wealth.expanding().max()
    drawdown = (cumulative_wealth - running_max) / running_max
    max_dd = drawdown.min()
    
    # Append results
    results.append({
        'Period': period_name,
        'Minimum Return': round(min_return, 4),
        'VaR-5%': round(var_5, 4),
        'Maximum Drawdown': round(max_dd, 4)
    })

# Create and display DataFrame
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Period,Minimum Return,VaR-5%,Maximum Drawdown
0,Inception → 2011,-0.1934,-0.0838,-0.2936
1,2012 → Present,-0.1156,-0.0711,-0.2168
2,Inception → Present,-0.1934,-0.077,-0.2936


#### Tail Risk :

#### (a) GMWAX exhibits **HIGH tail risk** based on these statistics:
#### - Maximum drawdown of -29.36% over the full period indicates severe downside risk
#### - Minimum monthly return of -19.34% shows extreme negative outcomes are possible
#### - VaR-5% of -7.70% suggests 1 in 20 months could see losses exceeding 7.7%
 
#### (b) Tail risk **varies moderately** across subsamples:
#### - Earlier period (Inception→2011): More extreme tail risk with -29.36% max drawdown and -19.34% minimum return
#### - Recent period (2012→Present): Improved but still significant tail risk with -21.68% max drawdown and -11.56% monthly minimum return
#### - VaR-5% improved from -8.38% to -7.11%, showing some reduction in downside risk
#### - Despite improvement, both periods show substantial tail risk that would concern risk-averse investors


## 2.3

In [59]:
results = []

for period_name, period_data in periods:
    if len(period_data) == 0:
        continue
    
    # Prepare data for regression
    y = period_data['GMWAX_excess'].dropna()
    x = period_data['SPY_excess'].dropna()
    
    # Align the two series
    common_idx = y.index.intersection(x.index)
    y_clean = y.loc[common_idx].values
    x_clean = x.loc[common_idx].values
    
    # OLS regression: y = alpha + beta * x
    n = len(y_clean)
    X_matrix = np.column_stack([np.ones(n), x_clean])
    
    # Solve using least squares
    params = np.linalg.lstsq(X_matrix, y_clean, rcond=None)[0]
    alpha = params[0]
    beta = params[1]
    
    # Calculate R-squared
    y_pred = X_matrix @ params
    ss_res = np.sum((y_clean - y_pred) ** 2)
    ss_tot = np.sum((y_clean - y_clean.mean()) ** 2)
    r_squared = 1 - (ss_res / ss_tot)
    
    # Annualize alpha
    alpha_annual = alpha * 12
    
    # Append results
    results.append({
        'Period': period_name,
        'Alpha (annualized)': round(alpha_annual, 4),
        'Beta': round(beta, 4),
        'R²': round(r_squared, 4)
    })

# Create and display DataFrame
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Period,Alpha (annualized),Beta,R²
0,Inception → 2011,-0.0944,0.6195,0.6878
1,2012 → Present,-0.0997,0.6294,0.7693
2,Inception → Present,-0.0966,0.6224,0.7271


#### **Is GMWAX a low-beta strategy?**
 
#### Yes, GMWAX is a low-beta strategy. Across all periods, the beta ranges from 0.62 to 0.63, which is substantially below 1.0. This indicates that GMWAX has approximately 62% of the market's systematic risk. The beta has remained remarkably stable across subsamples:
#### - Inception → 2011: β = 0.6195
#### - 2012 → Present: β = 0.6294
#### - Full period: β = 0.6224
 
#### The low-beta characteristic has not changed since the case period—it has been consistent throughout GMWAX's history.
 
#### **Does GMWAX provide alpha?**
 
#### No, GMWAX does not provide positive alpha. The annualized alpha is negative across all periods:
#### - Inception → 2011: α = -0.0944% (annualized)
#### - 2012 → Present: α = -0.0997% (annualized)
#### - Full period: α = -0.0966% (annualized)
 
#### The negative alpha suggests that GMWAX has underperformed what would be expected given its market exposure (beta). This pattern has been consistent across subsamples, with no meaningful improvement in the more recent period (2012 → Present). The R² values (0.69 to 0.77) indicate that market exposure explains a substantial portion of GMWAX's returns, but the fund has not generated excess returns beyond what its beta would predict.

## 2.4

In [60]:
# Performance Metrics - GMGEX
results_perf = []
for period_name, period_data in periods:
    if len(period_data) == 0:
        continue
    
    excess_ret = period_data['GMGEX_excess'].dropna()
    
    mean_monthly = excess_ret.mean()
    mean_annual = mean_monthly * 12
    
    vol_monthly = excess_ret.std()
    vol_annual = vol_monthly * np.sqrt(12)
    
    sharpe = (mean_monthly / vol_monthly) * np.sqrt(12)
    
    results_perf.append({
        'Period': period_name,
        'Mean Return (annualized)': round(mean_annual, 4),
        'Volatility (annualized)': round(vol_annual, 4),
        'Sharpe Ratio': round(sharpe, 4)
    })

# Create and display DataFrame
results_perf_df = pd.DataFrame(results_perf)
print("\n--- PERFORMANCE METRICS - GMGEX ---")
display(results_perf_df)

# Tail Risk Metrics - GMGEX
results_tail = []
for period_name, period_data in periods:
    if len(period_data) == 0:
        continue
    
    total_ret = period_data['GMGEX'].dropna()
    excess_ret = period_data['GMGEX_excess'].dropna()
    
    min_return = excess_ret.min()
    var_5 = excess_ret.quantile(0.05)
    
    cumulative_wealth = (1 + total_ret).cumprod()
    running_max = cumulative_wealth.expanding().max()
    drawdown = (cumulative_wealth - running_max) / running_max
    max_dd = drawdown.min()
    
    results_tail.append({
        'Period': period_name,
        'Minimum Return': round(min_return, 4),
        'VaR-5%': round(var_5, 4),
        'Maximum Drawdown': round(max_dd, 4)
    })

# Create and display DataFrame
results_tail_df = pd.DataFrame(results_tail)
print("\n--- TAIL RISK METRICS - GMGEX ---")
display(results_tail_df)

# Market Exposure - GMGEX vs SPY
results_exposure = []
for period_name, period_data in periods:
    if len(period_data) == 0:
        continue
    
    y = period_data['GMGEX_excess'].dropna()
    x = period_data['SPY_excess'].dropna()
    
    common_idx = y.index.intersection(x.index)
    y_clean = y.loc[common_idx].values
    x_clean = x.loc[common_idx].values
    
    n = len(y_clean)
    X_matrix = np.column_stack([np.ones(n), x_clean])
    
    params = np.linalg.lstsq(X_matrix, y_clean, rcond=None)[0]
    alpha = params[0]
    beta = params[1]
    
    y_pred = X_matrix @ params
    ss_res = np.sum((y_clean - y_pred) ** 2)
    ss_tot = np.sum((y_clean - y_clean.mean()) ** 2)
    r_squared = 1 - (ss_res / ss_tot)
    
    alpha_annual = alpha * 12
    
    results_exposure.append({
        'Period': period_name,
        'Alpha (annualized)': round(alpha_annual, 4),
        'Beta': round(beta, 4),
        'R²': round(r_squared, 4)
    })

# Create and display DataFrame
results_exposure_df = pd.DataFrame(results_exposure)
print("\n--- MARKET EXPOSURE - GMGEX vs SPY ---")
display(results_exposure_df)

# Key Differences: GMWAX vs GMGEX
gmwax_data = period3['GMWAX_excess'].dropna()
gmgex_data = period3['GMGEX_excess'].dropna()

# Calculate Sharpe Ratios
gmwax_sharpe = (gmwax_data.mean() / gmwax_data.std()) * np.sqrt(12)
gmgex_sharpe = (gmgex_data.mean() / gmgex_data.std()) * np.sqrt(12)

# Calculate Maximum Drawdowns
gmwax_total = period3['GMWAX'].dropna()
gmgex_total = period3['GMGEX'].dropna()

gmwax_cumulative = (1 + gmwax_total).cumprod()
gmwax_running_max = gmwax_cumulative.expanding().max()
gmwax_drawdown = (gmwax_cumulative - gmwax_running_max) / gmwax_running_max
gmwax_max_dd = gmwax_drawdown.min()

gmgex_cumulative = (1 + gmgex_total).cumprod()
gmgex_running_max = gmgex_cumulative.expanding().max()
gmgex_drawdown = (gmgex_cumulative - gmgex_running_max) / gmgex_running_max
gmgex_max_dd = gmgex_drawdown.min()

# Calculate Alpha and Beta for both funds
# GMWAX
y_gmwax = period3['GMWAX_excess'].dropna()
x_spy = period3['SPY_excess'].dropna()
common_idx_gmwax = y_gmwax.index.intersection(x_spy.index)
y_gmwax_clean = y_gmwax.loc[common_idx_gmwax].values
x_spy_clean = x_spy.loc[common_idx_gmwax].values

n_gmwax = len(y_gmwax_clean)
X_matrix_gmwax = np.column_stack([np.ones(n_gmwax), x_spy_clean])
params_gmwax = np.linalg.lstsq(X_matrix_gmwax, y_gmwax_clean, rcond=None)[0]
gmwax_alpha = params_gmwax[0] * 12
gmwax_beta = params_gmwax[1]

# GMGEX
y_gmgex = period3['GMGEX_excess'].dropna()
common_idx_gmgex = y_gmgex.index.intersection(x_spy.index)
y_gmgex_clean = y_gmgex.loc[common_idx_gmgex].values
x_spy_clean_gmgex = x_spy.loc[common_idx_gmgex].values

n_gmgex = len(y_gmgex_clean)
X_matrix_gmgex = np.column_stack([np.ones(n_gmgex), x_spy_clean_gmgex])
params_gmgex = np.linalg.lstsq(X_matrix_gmgex, y_gmgex_clean, rcond=None)[0]
gmgex_alpha = params_gmgex[0] * 12
gmgex_beta = params_gmgex[1]

comparison_df = pd.DataFrame({
    'Fund': ['GMWAX', 'GMGEX', 'Difference'],
    'Sharpe Ratio': [round(gmwax_sharpe, 4), round(gmgex_sharpe, 4), round(gmwax_sharpe - gmgex_sharpe, 4)],
    'Max Drawdown': [round(gmwax_max_dd, 4), round(gmgex_max_dd, 4), round(gmwax_max_dd - gmgex_max_dd, 4)],
    'Alpha (annualized)': [round(gmwax_alpha, 4), round(gmgex_alpha, 4), round(gmwax_alpha - gmgex_alpha, 4)],
    'Beta': [round(gmwax_beta, 4), round(gmgex_beta, 4), round(gmwax_beta - gmgex_beta, 4)]
})

print("\n--- KEY DIFFERENCES: GMWAX vs GMGEX ---")
display(comparison_df)


--- PERFORMANCE METRICS - GMGEX ---


Unnamed: 0,Period,Mean Return (annualized),Volatility (annualized),Sharpe Ratio
0,Inception → 2011,-0.3155,0.1645,-1.9184
1,2012 → Present,-0.1593,0.2317,-0.6876
2,Inception → Present,-0.2408,0.2004,-1.2013



--- TAIL RISK METRICS - GMGEX ---


Unnamed: 0,Period,Minimum Return,VaR-5%,Maximum Drawdown
0,Inception → 2011,-0.1769,-0.0958,-0.5556
1,2012 → Present,-0.6612,-0.0846,-0.7374
2,Inception → Present,-0.6612,-0.0926,-0.7618



--- MARKET EXPOSURE - GMGEX vs SPY ---


Unnamed: 0,Period,Alpha (annualized),Beta,R²
0,Inception → 2011,-0.0908,0.8147,0.7645
1,2012 → Present,-0.1291,0.8042,0.2811
2,Inception → Present,-0.1107,0.804,0.4588



--- KEY DIFFERENCES: GMWAX vs GMGEX ---


Unnamed: 0,Fund,Sharpe Ratio,Max Drawdown,Alpha (annualized),Beta
0,GMWAX,-1.6012,-0.2936,-0.0966,0.6224
1,GMGEX,-1.2013,-0.7618,-0.1107,0.804
2,Difference,-0.3999,0.4682,0.014,-0.1816


## 3.1

In [61]:
# Prepare data for forecasting
# We predict SPY_excess at t+1 using signals at t
df_forecast = df[['date', 'SPY_excess', 'DP', 'EP', 'Yield']].copy()
df_forecast = df_forecast.dropna()

# Create lead variable (t+1)
df_forecast['SPY_excess_lead'] = df_forecast['SPY_excess'].shift(-1)

# Remove last row (no future return to predict)
df_forecast = df_forecast[:-1].copy()
df_forecast = df_forecast.dropna()

In [62]:
# Regression 1: D/P only
print("\n--- REGRESSION 1: D/P ONLY ---")
y = df_forecast['SPY_excess_lead'].values
X = df_forecast['DP'].values.reshape(-1, 1)

n = len(y)
X_with_const = np.column_stack([np.ones(n), X])

params_1 = np.linalg.lstsq(X_with_const, y, rcond=None)[0]
alpha_1 = params_1[0]
beta_dp_1 = params_1[1]

y_pred_1 = X_with_const @ params_1
ss_res_1 = np.sum((y - y_pred_1) ** 2)
ss_tot_1 = np.sum((y - y.mean()) ** 2)
r2_1 = 1 - (ss_res_1 / ss_tot_1)

print(f"  Alpha (const): {alpha_1:.6f}")
print(f"  Beta (D/P):    {beta_dp_1:.6f}")
print(f"  R²:            {r2_1:.6f}")


--- REGRESSION 1: D/P ONLY ---
  Alpha (const): -0.082401
  Beta (D/P):    3.846271
  R²:            0.103106


In [63]:
# Regression 2: E/P only
print("\n--- REGRESSION 2: E/P ONLY ---")
X = df_forecast['EP'].values.reshape(-1, 1)
X_with_const = np.column_stack([np.ones(n), X])

params_2 = np.linalg.lstsq(X_with_const, y, rcond=None)[0]
alpha_2 = params_2[0]
beta_ep_2 = params_2[1]

y_pred_2 = X_with_const @ params_2
ss_res_2 = np.sum((y - y_pred_2) ** 2)
ss_tot_2 = np.sum((y - y.mean()) ** 2)
r2_2 = 1 - (ss_res_2 / ss_tot_2)

print(f"  Alpha (const): {alpha_2:.6f}")
print(f"  Beta (E/P):    {beta_ep_2:.6f}")
print(f"  R²:            {r2_2:.6f}")


--- REGRESSION 2: E/P ONLY ---
  Alpha (const): -0.041517
  Beta (E/P):    0.523302
  R²:            0.018921


In [64]:
# Regression 3: D/P, E/P, and 10-year Yield
print("\n--- REGRESSION 3: D/P, E/P, AND 10-YEAR YIELD ---")
X = df_forecast[['DP', 'EP', 'Yield']].values
X_with_const = np.column_stack([np.ones(n), X])

params_3 = np.linalg.lstsq(X_with_const, y, rcond=None)[0]
alpha_3 = params_3[0]
beta_dp_3 = params_3[1]
beta_ep_3 = params_3[2]
beta_yield_3 = params_3[3]

y_pred_3 = X_with_const @ params_3
ss_res_3 = np.sum((y - y_pred_3) ** 2)
ss_tot_3 = np.sum((y - y.mean()) ** 2)
r2_3 = 1 - (ss_res_3 / ss_tot_3)

print(f"  Alpha (const):   {alpha_3:.6f}")
print(f"  Beta (D/P):      {beta_dp_3:.6f}")
print(f"  Beta (E/P):      {beta_ep_3:.6f}")
print(f"  Beta (10Y Yld):  {beta_yield_3:.6f}")
print(f"  R²:              {r2_3:.6f}")


--- REGRESSION 3: D/P, E/P, AND 10-YEAR YIELD ---
  Alpha (const):   -0.013337
  Beta (D/P):      1.926131
  Beta (E/P):      0.070757
  Beta (10Y Yld):  -1.083381
  R²:              0.183655


## 3.2

In [65]:
# Strategy 1: D/P only
print("\n--- STRATEGY 1: D/P ONLY ---")

# Generate forecasts using estimated parameters
X_dp = df_forecast['DP'].values.reshape(-1, 1)
X_dp_const = np.column_stack([np.ones(len(X_dp)), X_dp])
forecasts_1 = X_dp_const @ params_1

# Portfolio weight: w_t = 100 * forecast_t
weights_1 = 100 * forecasts_1

# Strategy return: r_x,t+1 = w_t * r_SPY,t+1
# Here we use the actual SPY excess returns at t+1
strategy_returns_1 = weights_1 * df_forecast['SPY_excess_lead'].values

# Performance metrics
mean_strat_1 = np.mean(strategy_returns_1) * 12 * 100  # Annualized %
vol_strat_1 = np.std(strategy_returns_1, ddof=1) * np.sqrt(12) * 100  # Annualized %
sharpe_strat_1 = (np.mean(strategy_returns_1) / np.std(strategy_returns_1, ddof=1)) * np.sqrt(12)

# Max drawdown (approximate using excess returns)
cum_strat_1 = np.cumprod(1 + strategy_returns_1 / 100)
running_max_1 = np.maximum.accumulate(cum_strat_1)
dd_strat_1 = (cum_strat_1 - running_max_1) / running_max_1
max_dd_1 = np.min(dd_strat_1) * 100

# Market regression: strategy vs SPY
y_strat_1 = strategy_returns_1
x_market = df_forecast['SPY_excess_lead'].values
X_mkt_const = np.column_stack([np.ones(len(y_strat_1)), x_market])
params_mkt_1 = np.linalg.lstsq(X_mkt_const, y_strat_1, rcond=None)[0]
alpha_mkt_1 = params_mkt_1[0] * 12 * 100  # Annualized %
beta_mkt_1 = params_mkt_1[1]

y_pred_mkt_1 = X_mkt_const @ params_mkt_1
ss_res_mkt_1 = np.sum((y_strat_1 - y_pred_mkt_1) ** 2)
ss_tot_mkt_1 = np.sum((y_strat_1 - y_strat_1.mean()) ** 2)
r2_mkt_1 = 1 - (ss_res_mkt_1 / ss_tot_mkt_1)

# Information ratio
residuals_1 = y_strat_1 - y_pred_mkt_1
tracking_error_1 = np.std(residuals_1, ddof=1) * np.sqrt(12) * 100
info_ratio_1 = alpha_mkt_1 / tracking_error_1 if tracking_error_1 > 0 else np.nan

print(f"  Mean Return (annualized):  {mean_strat_1:.4f}%")
print(f"  Volatility (annualized):   {vol_strat_1:.4f}%")
print(f"  Sharpe Ratio:              {sharpe_strat_1:.4f}")
print(f"  Max Drawdown:              {max_dd_1:.4f}%")
print(f"  Market Alpha (annualized): {alpha_mkt_1:.4f}%")
print(f"  Market Beta:               {beta_mkt_1:.4f}")
print(f"  Market R²:                 {r2_mkt_1:.4f}")
print(f"  Information Ratio:         {info_ratio_1:.4f}")


--- STRATEGY 1: D/P ONLY ---
  Mean Return (annualized):  50.5219%
  Volatility (annualized):   41.5142%
  Sharpe Ratio:              1.2170
  Max Drawdown:              -1.2995%
  Market Alpha (annualized): 30.1537%
  Market Beta:               -1.2754
  Market R²:                 0.2687
  Information Ratio:         0.8493


In [66]:
# Strategy 2: E/P only
print("\n--- STRATEGY 2: E/P ONLY ---")

X_ep = df_forecast['EP'].values.reshape(-1, 1)
X_ep_const = np.column_stack([np.ones(len(X_ep)), X_ep])
forecasts_2 = X_ep_const @ params_2

weights_2 = 100 * forecasts_2
strategy_returns_2 = weights_2 * df_forecast['SPY_excess_lead'].values

mean_strat_2 = np.mean(strategy_returns_2) * 12 * 100
vol_strat_2 = np.std(strategy_returns_2, ddof=1) * np.sqrt(12) * 100
sharpe_strat_2 = (np.mean(strategy_returns_2) / np.std(strategy_returns_2, ddof=1)) * np.sqrt(12)

cum_strat_2 = np.cumprod(1 + strategy_returns_2 / 100)
running_max_2 = np.maximum.accumulate(cum_strat_2)
dd_strat_2 = (cum_strat_2 - running_max_2) / running_max_2
max_dd_2 = np.min(dd_strat_2) * 100

y_strat_2 = strategy_returns_2
X_mkt_const = np.column_stack([np.ones(len(y_strat_2)), x_market])
params_mkt_2 = np.linalg.lstsq(X_mkt_const, y_strat_2, rcond=None)[0]
alpha_mkt_2 = params_mkt_2[0] * 12 * 100
beta_mkt_2 = params_mkt_2[1]

y_pred_mkt_2 = X_mkt_const @ params_mkt_2
ss_res_mkt_2 = np.sum((y_strat_2 - y_pred_mkt_2) ** 2)
ss_tot_mkt_2 = np.sum((y_strat_2 - y_strat_2.mean()) ** 2)
r2_mkt_2 = 1 - (ss_res_mkt_2 / ss_tot_mkt_2)

residuals_2 = y_strat_2 - y_pred_mkt_2
tracking_error_2 = np.std(residuals_2, ddof=1) * np.sqrt(12) * 100
info_ratio_2 = alpha_mkt_2 / tracking_error_2 if tracking_error_2 > 0 else np.nan

print(f"  Mean Return (annualized):  {mean_strat_2:.4f}%")
print(f"  Volatility (annualized):   {vol_strat_2:.4f}%")
print(f"  Sharpe Ratio:              {sharpe_strat_2:.4f}")
print(f"  Max Drawdown:              {max_dd_2:.4f}%")
print(f"  Market Alpha (annualized): {alpha_mkt_2:.4f}%")
print(f"  Market Beta:               {beta_mkt_2:.4f}")
print(f"  Market R²:                 {r2_mkt_2:.4f}")
print(f"  Information Ratio:         {info_ratio_2:.4f}")


--- STRATEGY 2: E/P ONLY ---
  Mean Return (annualized):  26.6259%
  Volatility (annualized):   25.0071%
  Sharpe Ratio:              1.0647
  Max Drawdown:              -1.2753%
  Market Alpha (annualized): 6.2406%
  Market Beta:               -1.2764
  Market R²:                 0.7417
  Information Ratio:         0.4910


In [67]:
# Strategy 3: Multi-factor (D/P, E/P, Yield)
print("\n--- STRATEGY 3: MULTI-FACTOR (D/P, E/P, YIELD) ---")

X_multi = df_forecast[['DP', 'EP', 'Yield']].values
X_multi_const = np.column_stack([np.ones(len(X_multi)), X_multi])
forecasts_3 = X_multi_const @ params_3

weights_3 = 100 * forecasts_3
strategy_returns_3 = weights_3 * df_forecast['SPY_excess_lead'].values

mean_strat_3 = np.mean(strategy_returns_3) * 12 * 100
vol_strat_3 = np.std(strategy_returns_3, ddof=1) * np.sqrt(12) * 100
sharpe_strat_3 = (np.mean(strategy_returns_3) / np.std(strategy_returns_3, ddof=1)) * np.sqrt(12)

cum_strat_3 = np.cumprod(1 + strategy_returns_3 / 100)
running_max_3 = np.maximum.accumulate(cum_strat_3)
dd_strat_3 = (cum_strat_3 - running_max_3) / running_max_3
max_dd_3 = np.min(dd_strat_3) * 100

y_strat_3 = strategy_returns_3
X_mkt_const = np.column_stack([np.ones(len(y_strat_3)), x_market])
params_mkt_3 = np.linalg.lstsq(X_mkt_const, y_strat_3, rcond=None)[0]
alpha_mkt_3 = params_mkt_3[0] * 12 * 100
beta_mkt_3 = params_mkt_3[1]

y_pred_mkt_3 = X_mkt_const @ params_mkt_3
ss_res_mkt_3 = np.sum((y_strat_3 - y_pred_mkt_3) ** 2)
ss_tot_mkt_3 = np.sum((y_strat_3 - y_strat_3.mean()) ** 2)
r2_mkt_3 = 1 - (ss_res_mkt_3 / ss_tot_mkt_3)

residuals_3 = y_strat_3 - y_pred_mkt_3
tracking_error_3 = np.std(residuals_3, ddof=1) * np.sqrt(12) * 100
info_ratio_3 = alpha_mkt_3 / tracking_error_3 if tracking_error_3 > 0 else np.nan

print(f"  Mean Return (annualized):  {mean_strat_3:.4f}%")
print(f"  Volatility (annualized):   {vol_strat_3:.4f}%")
print(f"  Sharpe Ratio:              {sharpe_strat_3:.4f}")
print(f"  Max Drawdown:              {max_dd_3:.4f}%")
print(f"  Market Alpha (annualized): {alpha_mkt_3:.4f}%")
print(f"  Market Beta:               {beta_mkt_3:.4f}")
print(f"  Market R²:                 {r2_mkt_3:.4f}")
print(f"  Information Ratio:         {info_ratio_3:.4f}")


--- STRATEGY 3: MULTI-FACTOR (D/P, E/P, YIELD) ---
  Mean Return (annualized):  73.3855%
  Volatility (annualized):   47.4515%
  Sharpe Ratio:              1.5465
  Max Drawdown:              -0.5813%
  Market Alpha (annualized): 49.8960%
  Market Beta:               -1.4708
  Market R²:                 0.2735
  Information Ratio:         1.2337


## 3.3

In [68]:
# VaR at π = 0.05 for strategies
print("\n--- MONTHLY VaR-5% (HISTORICAL QUANTILE) ---")

var_spy = np.quantile(df_forecast['SPY_excess_lead'], 0.05) * 100
var_gmwax = np.quantile(df['GMWAX_excess'].dropna(), 0.05) * 100
var_gmgex = np.quantile(df['GMGEX_excess'].dropna(), 0.05) * 100
var_strat_1 = np.quantile(strategy_returns_1, 0.05)
var_strat_2 = np.quantile(strategy_returns_2, 0.05)
var_strat_3 = np.quantile(strategy_returns_3, 0.05)

print(f"  SPY:                    {var_spy:.4f}%")
print(f"  GMWAX:                  {var_gmwax:.4f}%")
print(f"  GMGEX:                  {var_gmgex:.4f}%")
print(f"  Strategy 1 (D/P):       {var_strat_1:.4f}%")
print(f"  Strategy 2 (E/P):       {var_strat_2:.4f}%")
print(f"  Strategy 3 (Multi):     {var_strat_3:.4f}%")


--- MONTHLY VaR-5% (HISTORICAL QUANTILE) ---
  SPY:                    -9.8918%
  GMWAX:                  -7.7006%
  GMGEX:                  -9.2647%
  Strategy 1 (D/P):       -0.0790%
  Strategy 2 (E/P):       -0.0740%
  Strategy 3 (Multi):     -0.0656%


In [86]:
# Performance during 2000-2011 period
print("\n--- PERFORMANCE 2000-2011 (STOCKS VS BONDS) ---")
print("Question: Does the dynamic portfolio under-perform the risk-free rate over this time?")

start_2000 = pd.Timestamp('2000-01-01')
end_2011 = pd.Timestamp('2011-12-31')

# Add strategy returns to dataframe for proper comparison
df_forecast['strategy_return_1'] = strategy_returns_1
df_forecast['strategy_return_2'] = strategy_returns_2
df_forecast['strategy_return_3'] = strategy_returns_3

# Create RF_lead by taking the next period's RF
df_forecast['RF_lead'] = df['TBill 3M'].shift(-1).values[:len(df_forecast)]

# Create excess returns for strategies
df_forecast['excess_return_1'] = df_forecast['strategy_return_1'] - df_forecast['RF_lead']
df_forecast['excess_return_2'] = df_forecast['strategy_return_2'] - df_forecast['RF_lead']
df_forecast['excess_return_3'] = df_forecast['strategy_return_3'] - df_forecast['RF_lead']

# Filter for 2000-2011 period
mask_2000_2011 = (df_forecast['date'] >= start_2000) & (df_forecast['date'] <= end_2011)

# Calculate mean excess returns for 2000-2011
excess_1_2000_2011 = df_forecast.loc[mask_2000_2011, 'excess_return_1'].mean() * 12 * 100  # Annualized %
excess_2_2000_2011 = df_forecast.loc[mask_2000_2011, 'excess_return_2'].mean() * 12 * 100
excess_3_2000_2011 = df_forecast.loc[mask_2000_2011, 'excess_return_3'].mean() * 12 * 100

# Also get SPY for comparison
spy_excess_2000_2011 = df_forecast.loc[mask_2000_2011, 'SPY_excess_lead'].mean() * 12 * 100

print(f"\nAnnualized Excess Returns over Risk-Free Rate (2000-2011):")
print(f"  SPY:                           {spy_excess_2000_2011:.4f}%")
print(f"  Strategy 1 (D/P):              {excess_1_2000_2011:.4f}%")
print(f"  Strategy 2 (E/P):              {excess_2_2000_2011:.4f}%")
print(f"  Strategy 3 (Multi-factor):     {excess_3_2000_2011:.4f}%")


--- PERFORMANCE 2000-2011 (STOCKS VS BONDS) ---
Question: Does the dynamic portfolio under-perform the risk-free rate over this time?

Annualized Excess Returns over Risk-Free Rate (2000-2011):
  SPY:                           -24.3718%
  Strategy 1 (D/P):              36.7500%
  Strategy 2 (E/P):              11.3737%
  Strategy 3 (Multi-factor):     66.7320%


#### No, the dynamic portfolio does not under-perform the risk-free rate over this time as the annualized excess returns are positive for all dynamic strategies.

In [95]:
# Count how many periods each strategy beat risk-free
periods_beat_rf_1 = (df_forecast.loc[mask_2000_2011, 'excess_return_1'] > 0).sum()
periods_beat_rf_2 = (df_forecast.loc[mask_2000_2011, 'excess_return_2'] > 0).sum()
periods_beat_rf_3 = (df_forecast.loc[mask_2000_2011, 'excess_return_3'] > 0).sum()
total_periods_2000_2011 = mask_2000_2011.sum()

print(f"\nMonths beating risk-free rate (2000-2011):")
print(f"  Strategy 1 (D/P):     {periods_beat_rf_1}/{total_periods_2000_2011} ({100*periods_beat_rf_1/total_periods_2000_2011:.1f}%)")
print(f"  Strategy 2 (E/P):     {periods_beat_rf_2}/{total_periods_2000_2011} ({100*periods_beat_rf_2/total_periods_2000_2011:.1f}%)")
print(f"  Strategy 3 (Multi):   {periods_beat_rf_3}/{total_periods_2000_2011} ({100*periods_beat_rf_3/total_periods_2000_2011:.1f}%)")

# Count negative risk premium forecasts
print("\n--- NEGATIVE RISK PREMIUM FORECASTS ---")
print("Question: Based on regression estimates, in how many periods do we estimate a negative risk premium?")

num_negative_1 = np.sum(forecasts_1 < 0)
num_negative_2 = np.sum(forecasts_2 < 0)
num_negative_3 = np.sum(forecasts_3 < 0)
total_periods = len(forecasts_3)

print(f"\nPeriods with NEGATIVE forecasted risk premium:")
print(f"  Strategy 1 (D/P):     {num_negative_1}/{total_periods} periods ({100*num_negative_1/total_periods:.2f}%)")
print(f"  Strategy 2 (E/P):     {num_negative_2}/{total_periods} periods ({100*num_negative_2/total_periods:.2f}%)")
print(f"  Strategy 3 (Multi):   {num_negative_3}/{total_periods} periods ({100*num_negative_3/total_periods:.2f}%)")


Months beating risk-free rate (2000-2011):
  Strategy 1 (D/P):     71/144 (49.3%)
  Strategy 2 (E/P):     69/144 (47.9%)
  Strategy 3 (Multi):   91/144 (63.2%)

--- NEGATIVE RISK PREMIUM FORECASTS ---
Question: Based on regression estimates, in how many periods do we estimate a negative risk premium?

Periods with NEGATIVE forecasted risk premium:
  Strategy 1 (D/P):     306/346 periods (88.44%)
  Strategy 2 (E/P):     331/346 periods (95.66%)
  Strategy 3 (Multi):   230/346 periods (66.47%)


#### Yes, the dynamic strategy takes on extra risk. The volatility is significantly higher than SPY, and the strategies can take leveraged positions (weights = 100 × forecast), which amplifies both gains and losses. Additionally, the strategies can go short when forecasts are negative, adding directional risk beyond a simple buy-and-hold approach.


## 4.1

In [99]:
print("4.1 OUT-OF-SAMPLE R² CALCULATION")
print("-"*80)

# Rolling OOS forecasting procedure
start_window = 60  # Start at t=60
T = len(df_forecast)

# Storage for forecasts and errors
oos_forecasts = []
forecast_errors = []
null_errors = []
oos_dates = []

# Rolling estimation
for t in range(start_window, T - 1):
    # Training sample: observations 0 to t-1
    y_train = df_forecast['SPY_excess_lead'].iloc[:t].values
    X_train = df_forecast[['DP', 'EP']].iloc[:t].values
    
    # Add constant
    X_train_const = np.column_stack([np.ones(len(y_train)), X_train])
    
    # Estimate parameters using training data
    params_t = np.linalg.lstsq(X_train_const, y_train, rcond=None)[0]
    
    # Forecast for t+1 using signals at t
    X_t = df_forecast[['DP', 'EP']].iloc[t].values
    X_t_const = np.concatenate([[1], X_t])
    forecast_t = X_t_const @ params_t
    
    # Actual return at t+1
    actual_t = df_forecast['SPY_excess_lead'].iloc[t]
    
    # Null forecast: historical mean up to time t
    null_forecast_t = np.mean(df_forecast['SPY_excess_lead'].iloc[:t])
    
    # Forecast errors
    error_forecast = actual_t - forecast_t
    error_null = actual_t - null_forecast_t
    
    # Store results
    oos_forecasts.append(forecast_t)
    forecast_errors.append(error_forecast)
    null_errors.append(error_null)
    oos_dates.append(df_forecast['date'].iloc[t])

# Convert to arrays
oos_forecasts = np.array(oos_forecasts)
forecast_errors = np.array(forecast_errors)
null_errors = np.array(null_errors)

# Calculate OOS R²
sse_forecast = np.sum(forecast_errors ** 2)
sse_null = np.sum(null_errors ** 2)
r2_oos = 1 - (sse_forecast / sse_null)

print(f"\n--- OOS R² RESULTS ---")
print(f"  SSE (forecast model): {sse_forecast:.6f}")
print(f"  SSE (null model):     {sse_null:.6f}")
print(f"  R² OOS:               {r2_oos:.6f}")


4.1 OUT-OF-SAMPLE R² CALCULATION
--------------------------------------------------------------------------------

--- OOS R² RESULTS ---
  SSE (forecast model): 0.604962
  SSE (null model):     0.661496
  R² OOS:               0.085464


#### Yes, the forecasting produced positive R-squared

## 4.2

In [101]:
print("4.2 OUT-OF-SAMPLE TRADING STRATEGY")
print("-"*80)

# Build OOS strategy
# weights = 100 × forecast
# strategy return = weight × actual SPY return
weights_oos = 100 * oos_forecasts

# Get actual returns (t+1) corresponding to forecasts made at t
actual_returns_oos = df_forecast['SPY_excess_lead'].iloc[start_window:T-1].values

# Strategy returns
strategy_returns_oos = weights_oos * actual_returns_oos

# Performance metrics
mean_oos = np.mean(strategy_returns_oos) * 12 * 100  # Annualized %
vol_oos = np.std(strategy_returns_oos, ddof=1) * np.sqrt(12) * 100  # Annualized %
sharpe_oos = (np.mean(strategy_returns_oos) / np.std(strategy_returns_oos, ddof=1)) * np.sqrt(12)

# Max drawdown
cum_oos = np.cumprod(1 + strategy_returns_oos / 100)
running_max_oos = np.maximum.accumulate(cum_oos)
dd_oos = (cum_oos - running_max_oos) / running_max_oos
max_dd_oos = np.min(dd_oos) * 100

# Market regression
x_market_oos = actual_returns_oos
y_strat_oos = strategy_returns_oos

X_mkt_oos_const = np.column_stack([np.ones(len(y_strat_oos)), x_market_oos])
params_mkt_oos = np.linalg.lstsq(X_mkt_oos_const, y_strat_oos, rcond=None)[0]
alpha_mkt_oos = params_mkt_oos[0] * 12 * 100  # Annualized %
beta_mkt_oos = params_mkt_oos[1]

y_pred_mkt_oos = X_mkt_oos_const @ params_mkt_oos
ss_res_mkt_oos = np.sum((y_strat_oos - y_pred_mkt_oos) ** 2)
ss_tot_mkt_oos = np.sum((y_strat_oos - y_strat_oos.mean()) ** 2)
r2_mkt_oos = 1 - (ss_res_mkt_oos / ss_tot_mkt_oos)

# Information ratio
residuals_oos = y_strat_oos - y_pred_mkt_oos
tracking_error_oos = np.std(residuals_oos, ddof=1) * np.sqrt(12) * 100
info_ratio_oos = alpha_mkt_oos / tracking_error_oos if tracking_error_oos > 0 else np.nan

print(f"\n--- OOS STRATEGY PERFORMANCE ---")
print(f"  Mean Return (annualized):  {mean_oos:.4f}%")
print(f"  Volatility (annualized):   {vol_oos:.4f}%")
print(f"  Sharpe Ratio:              {sharpe_oos:.4f}")
print(f"  Max Drawdown:              {max_dd_oos:.4f}%")
print(f"  Market Alpha (annualized): {alpha_mkt_oos:.4f}%")
print(f"  Market Beta:               {beta_mkt_oos:.4f}")
print(f"  Market R²:                 {r2_mkt_oos:.4f}")
print(f"  Information Ratio:         {info_ratio_oos:.4f}")

# Compare to in-sample (using D/P and E/P only for fair comparison)
print("\n--- COMPARISON: IN-SAMPLE VS OUT-OF-SAMPLE ---")
print("(Using D/P and E/P predictors for both)")

# For fair comparison, create in-sample strategy with D/P and E/P only
X_dp_ep = df_forecast[['DP', 'EP']].values
X_dp_ep_const = np.column_stack([np.ones(len(X_dp_ep)), X_dp_ep])

# Estimate in-sample model
y_full = df_forecast['SPY_excess_lead'].values
params_is = np.linalg.lstsq(X_dp_ep_const, y_full, rcond=None)[0]

# In-sample forecasts
forecasts_is = X_dp_ep_const @ params_is
weights_is = 100 * forecasts_is
strategy_returns_is = weights_is * y_full

# In-sample metrics
mean_is = np.mean(strategy_returns_is) * 12 * 100
vol_is = np.std(strategy_returns_is, ddof=1) * np.sqrt(12) * 100
sharpe_is = (np.mean(strategy_returns_is) / np.std(strategy_returns_is, ddof=1)) * np.sqrt(12)

cum_is = np.cumprod(1 + strategy_returns_is / 100)
running_max_is = np.maximum.accumulate(cum_is)
dd_is = (cum_is - running_max_is) / running_max_is
max_dd_is = np.min(dd_is) * 100

# Market alpha for in-sample
X_mkt_is_const = np.column_stack([np.ones(len(strategy_returns_is)), y_full])
params_mkt_is = np.linalg.lstsq(X_mkt_is_const, strategy_returns_is, rcond=None)[0]
alpha_mkt_is = params_mkt_is[0] * 12 * 100

print(f"\n{'Metric':<30} {'In-Sample':<15} {'Out-of-Sample':<15} {'Difference':<15}")
print("-" * 75)
print(f"{'Mean Return (%)':<30} {mean_is:>14.4f} {mean_oos:>14.4f} {mean_oos - mean_is:>14.4f}")
print(f"{'Volatility (%)':<30} {vol_is:>14.4f} {vol_oos:>14.4f} {vol_oos - vol_is:>14.4f}")
print(f"{'Sharpe Ratio':<30} {sharpe_is:>14.4f} {sharpe_oos:>14.4f} {sharpe_oos - sharpe_is:>14.4f}")
print(f"{'Max Drawdown (%)':<30} {max_dd_is:>14.4f} {max_dd_oos:>14.4f} {max_dd_oos - max_dd_is:>14.4f}")
print(f"{'Market Alpha (%)':<30} {alpha_mkt_is:>14.4f} {alpha_mkt_oos:>14.4f} {alpha_mkt_oos - alpha_mkt_is:>14.4f}")

4.2 OUT-OF-SAMPLE TRADING STRATEGY
--------------------------------------------------------------------------------

--- OOS STRATEGY PERFORMANCE ---
  Mean Return (annualized):  21.1307%
  Volatility (annualized):   25.9470%
  Sharpe Ratio:              0.8144
  Max Drawdown:              -1.1275%
  Market Alpha (annualized): 9.1941%
  Market Beta:               -1.2504
  Market R²:                 0.5991
  Information Ratio:         0.5596

--- COMPARISON: IN-SAMPLE VS OUT-OF-SAMPLE ---
(Using D/P and E/P predictors for both)

Metric                         In-Sample       Out-of-Sample   Difference     
---------------------------------------------------------------------------
Mean Return (%)                       52.9267        21.1307       -31.7959
Volatility (%)                        42.5870        25.9470       -16.6400
Sharpe Ratio                           1.2428         0.8144        -0.4284
Max Drawdown (%)                      -1.2601        -1.1275         0.1326
Market

## 4.3

In [105]:
print("4.3 OUT-OF-SAMPLE RISK CHARACTERISTICS")
print("-"*80)

# VaR comparison
var_is = np.quantile(strategy_returns_is, 0.05)
var_oos_strat = np.quantile(strategy_returns_oos, 0.05)

print(f"\n--- VaR-5% COMPARISON ---")
print(f"  In-Sample Strategy:   {var_is:.4f}%")
print(f"  Out-of-Sample Strategy: {var_oos_strat:.4f}%")
print(f"  Difference:           {var_oos_strat - var_is:.4f}%")

if abs(var_oos_strat) > abs(var_is):
    print(f"  → OOS strategy has HIGHER tail risk")
else:
    print(f"  → OOS strategy has LOWER tail risk")
    
if vol_oos > vol_is * 1.01:
    print(f"  → OOS strategy has higher volatility")
elif vol_oos < vol_is * 0.99:
    print(f"  → OOS strategy has lower volatility")
else:
    print(f"  → OOS and in-sample strategies have SIMILAR risk")

4.3 OUT-OF-SAMPLE RISK CHARACTERISTICS
--------------------------------------------------------------------------------

--- VaR-5% COMPARISON ---
  In-Sample Strategy:   -0.0763%
  Out-of-Sample Strategy: -0.0802%
  Difference:           -0.0038%
  → OOS strategy has HIGHER tail risk
  → OOS strategy has lower volatility


In [106]:
# Count negative forecasts
num_negative_oos = np.sum(oos_forecasts < 0)
pct_negative_oos = 100 * num_negative_oos / len(oos_forecasts)

print(f"\n--- NEGATIVE FORECASTS (OOS) ---")
print(f"  Periods with negative forecast: {num_negative_oos}/{len(oos_forecasts)} ({pct_negative_oos:.2f}%)")



--- NEGATIVE FORECASTS (OOS) ---
  Periods with negative forecast: 264/285 (92.63%)


In [107]:
# Performance in 2000-2011 for OOS
oos_dates_ts = pd.to_datetime(oos_dates)
mask_2000_2011_oos = (oos_dates_ts >= start_2000) & (oos_dates_ts <= end_2011)

if np.sum(mask_2000_2011_oos) > 0:
    strat_oos_2000_2011 = strategy_returns_oos[mask_2000_2011_oos]
    mean_oos_2000_2011 = np.mean(strat_oos_2000_2011) * 12 * 100
    
    print(f"\n--- OOS PERFORMANCE 2000-2011 ---")
    print(f"  OOS Strategy (annualized): {mean_oos_2000_2011:.4f}%")
    
    if mean_oos_2000_2011 < 0:
        print(f"  → OOS strategy UNDER-PERFORMS risk-free rate")
    else:
        print(f"  → OOS strategy OUT-PERFORMS risk-free rate")


--- OOS PERFORMANCE 2000-2011 ---
  OOS Strategy (annualized): 19.6829%
  → OOS strategy OUT-PERFORMS risk-free rate


#### Yes, the point in time version of the strategy is more riskier