In [5]:
import numpy as np
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
import cvxpy as cp
from datetime import date, timedelta
from scipy.optimize import minimize

### Step 1: instantiate returns and Fama French data

In [None]:
etfs = ['FXE', 'EWJ', 'GLD', 'QQQ', 'SPY', 'SHV', 'DBA', 'USO', 'XBI', 'ILF', 'EPP', 'FEZ']
etf_data = yf.download(etfs, start='2006-03-01', end='2024-12-31')['Close']


etf_returns = etf_data.pct_change().dropna()

ff_factors = pd.read_csv('F-F_Research_Data_Factors_daily.CSV', skiprows=3, index_col=0)
ff_factors.dropna(inplace=True)
ff_factors.tail()
ff_factors = ff_factors.loc['2006-03-01':]
ff_factors.head()

Failed to get ticker 'EWJ' reason: HTTPSConnectionPool(host='fc.yahoo.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)')))


YF.download() has changed argument auto_adjust default to True


Failed to get ticker 'SPY' reason: HTTPSConnectionPool(host='fc.yahoo.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)')))
[                       0%                       ]Failed to get ticker 'FEZ' reason: HTTPSConnectionPool(host='fc.yahoo.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)')))
[********              17%                       ]  2 of 12 completedFailed to get ticker 'QQQ' reason: HTTPSConnectionPool(host='fc.yahoo.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)')))
[****


$$
\text{(Strategy I)}
\begin{cases}
\underset{\omega\in\mathbb{R}^n}{\max\;}\rho^T \omega-\lambda\sqrt{\omega^T \Sigma\omega}\\
-0.5\leq\sum_{i=1}^{n}\beta_i^m\omega_i\leq 0.5\\
\sum_{i=1}^{n}\omega_i=1,\; -2\leq\omega_i\leq 2,
\end{cases}
$$

In [None]:
def strategy_I(expected_returns, cov_matrix, betas, beta_constraints, lambd):
    """
    Optimizes portfolio weights using the best aspects of previous implementations.
    
    Parameters:
    - expected_returns: Expected returns for each asset (1D array)
    - cov_matrix: Covariance matrix of returns (2D array)
    - factor_loadings: DataFrame of factor exposures (rows: assets, columns: factors)
    - beta_col: The column name in factor_loadings to use for beta constraint (e.g., 'Mkt-RF')
    - beta_constraints: Tuple of (min_beta, max_beta)
    - lambd: Risk aversion parameter
    
    Returns:
    - Optimal portfolio weights (1D numpy array)
    """
    n = len(expected_returns)
    w = cp.Variable(n)
    # Ensure covariance matrix is symmetric for numerical stability
    cov_matrix = (cov_matrix + cov_matrix.T) / 2
    # Portfolio return
    portfolio_return = expected_returns @ w
    # Portfolio variance
    portfolio_risk = cp.quad_form(w, cov_matrix)
    # Portfolio beta (generalized to any factor column)
    portfolio_beta = betas.values @ w
    # Constraints
    constraints = [
        cp.sum(w) == 1,
        portfolio_beta >= beta_constraints[0],
        portfolio_beta <= beta_constraints[1],
        w >= -2,
        w <= 2
    ]
    # Objective: maximize risk-adjusted return (using standard deviation)
    objective = cp.Maximize(portfolio_return - lambd * cp.sqrt(portfolio_risk))
    prob = cp.Problem(objective, constraints)
    prob.solve()
    return w.value


$$
\text{(Strategy II)}
\begin{cases}
\underset{\omega\in\mathbb{R}^n}{\max\;}\frac{\rho^T \omega - r_{spy}}{TEV(\omega)} -\lambda\sqrt{\omega^T \Sigma\omega}\\
-2\leq\sum_{i=1}^{n}\beta_i^m\omega_i\leq 2\\
\sum_{i=1}^{n}\omega_i=1,\; -2\leq\omega_i\leq 2,
\end{cases}
$$
$$
TEV(\omega) = \sqrt{\omega^T\sigma\omega - 2\omega^T cov(r,r_{SPY})+ \sigma^2_{SPY}}
$$

In [None]:
def TEV(weights, returns_data, benchmark_returns):
    """Calculate tracking error volatility between portfolio and benchmark returns"""
    portfolio_returns = returns_data @ weights
    tracking_error = portfolio_returns - benchmark_returns
    return np.sqrt(np.var(tracking_error))

def portRisk(weights, covar):
    """calculate portfolio risk"""
    risk = np.sqrt(weights.T @ covar @ weights)
    return risk

def strategy_II(expected_returns, returns_data, betas, beta_constraints, lambd, benchmark_returns, covar):
    """
    Optimize portfolio weights using Strategy II approach
    
    Parameters:
    - expected_returns: Expected returns for each asset
    - returns_data: Historical returns data
    - betas: calculated betas of each ETF
    - beta_constraints: Tuple of (min_beta, max_beta)
    - lambd: Risk aversion parameter
    - benchmark_returns: Returns of benchmark portfolio
    - covar: covariance matrix
    """
    n = len(expected_returns)
    beta_values = betas
    
    def objective(weights):
        portfolio_return = expected_returns @ weights - expected_returns['SPY']
        tracking_vol = TEV(weights, returns_data, benchmark_returns)
        portfolio_risk = portRisk(weights, covar)
        return -(portfolio_return/tracking_vol - lambd * portfolio_risk)
    
    # Define constraints
    constraints = [
        {'type': 'eq', 'fun': lambda w: np.sum(w) - 1},  # Full investment
        {'type': 'ineq', 'fun': lambda w: beta_constraints[1] - np.sum(w * beta_values)},  # Max beta
        {'type': 'ineq', 'fun': lambda w: np.sum(w * beta_values) - beta_constraints[0]},  # Min beta
        {'type': 'ineq', 'fun': lambda w: 2 - w},  # Upper bound
        {'type': 'ineq', 'fun': lambda w: w + 2}   # Lower bound
    ]
    
    # Set bounds and initial guess
    bounds = [(-2, 2) for _ in range(n)]
    initial_weights = np.ones(n) / n
    
    # Solve optimization
    result = minimize(objective, initial_weights, bounds=bounds, constraints=constraints)
    return result.x

In [None]:

def getRetCovEst(retLag, covLag, date):
    etf_returns = etf_data.pct_change().dropna()
    #change to grab # of obs instead of cal days
    covRets = etf_returns[(etf_returns['Date'] <= date)].sort_values(ascending=False).head(covLag)
    cov = covRets.cov()
    factorRets = etf_returns[etf_returns['Date'] <= date].sort_values(ascending=False).head(retLag)
    tfactors = ff_factors.copy()
    market_risk_premiums = pd.DataFrame(index=etf_returns.columns, columns=['Market Risk Premium'])
    return_est = pd.DataFrame(index=etf_returns.columns, columns=['ExpectedReturn'])

    # Convert ff_factors index to datetime if it's not already
    tfactors.index = pd.to_datetime(tfactors.index, format='%Y%m%d')

    # Align the data
    factorRets = factorRets.reindex(tfactors.index).dropna()
    tfactors = tfactors.reindex(factorRets.index).dropna()

    # Create merged data
    merged_data = pd.concat([factorRets, tfactors], axis=1)

    # Run regression for each ETF using only the market factor
    X = sm.add_constant(merged_data['Mkt-RF','SMB','HML'])

    for etf in factorRets.columns:
        y = merged_data[etf]
        # Run regression
        model = sm.OLS(y, X).fit()
        # Calculate market risk premium
        beta1 = model.params['Mkt-RF']
        avg_market_return = merged_data['Mkt-RF'].mean()
        market_risk_premium = beta1 * avg_market_return
        beta2 = model.params['SMB']
        avg_SMB = merged_data['SMB'].mean()
        beta3 = model.params['HML']
        avg_HML = merged_data['HML'].mean()
        market_risk_premiums.loc[etf, 'Market Risk Premium'] = market_risk_premium
        expected_ret = merged_data['RF'].mean() + market_risk_premium + beta2 * avg_SMB + beta3 * avg_HML
        return_est.loc[etf,'ExpectedReturn'] = expected_ret

    # Display results
    #print("Market Risk Premiums for each ETF:")
    #print(market_risk_premiums.round(4))

    # Display the betas and average market return for reference
    #print("\nMarket Betas:")
    betas = pd.DataFrame(index=factorRets.columns, columns=['Beta'])
    for etf in factorRets.columns:
        tickRet = factorRets[['Date',etf]]
        spyrets = factorRets[['Date','SPY']]
        mergedRet = pd.merge(tickRet, spyrets,how='left',on='Date')
        tcov = mergedRet[[etf,'SPY']].cov()
        beta = tcov.iloc[0][1]/tcov.iloc[1][1]
        betas.loc[etf, 'Beta']= beta
        
    #print(betas.round(4))

    return cov, betas, expected_ret, factorRets

In [20]:

def getRebalDates(ret_dates, min, max):
    rebalDates = []
    mondays = []
    current_date = min
    #get all mondays between our min and max date
    while current_date.weekday() != 0:  # Monday is 0
        current_date += timedelta(days=1)
    
    while current_date <= max:
        mondays.append(current_date)
        current_date += timedelta(days=7)
    #add our min date 
    rebalDates.append[min]
    for d in mondays:
        #get all dates in the future
        forward = [date for date in ret_dates if date >= d]
        #append the min date from the forward list
        #this allows for monday holidays and then takes the tuesday 
        if min(forward) not in rebalDates:
            rebalDates.append(min(forward))
    
    return rebalDates



In [None]:
def runBacktest(startdt, enddt, ret_est_per, cov_est_per, lambda_val):
    
    ret_dates = etf_returns[(etf_returns['Date'] >= startdt) & (etf_returns['Date'] <= enddt)][['Date']]
    
    rebalDates = getRebalDates(ret_dates, startdt, enddt)
    rebalDates = rebalDates.sort()
    strat1results = pd.DataFrame(index=rebalDates, columns=etfs)
    strat2results = pd.DataFrame(index=rebalDates, columns=etfs)
    for d in rebalDates:
        #get covar, ret, betas for the given estimate periods
        cov, betas, retest, inputRets = getRetCovEst(ret_est_per, cov_est_per, d)
        s1 = strategy_I(retest, cov,betas,[-0.5,0.5],lambda_val)
        s2 = strategy_II(retest,inputRets,betas,[-2,2],lambda_val,inputRets['SPY'],cov)
        ##need to append s1 and s2 to strat1results and strat2results
        strat1results.loc[d] = s1
        strat2results.loc[d] = s2

""" for performance analysis
convert all returns to 1 based
for each rebal date d:
- get weights on d
- multiple weights * returns
- sum weighted return as daily return
- drift stock weight
    - new weight = weight * ret / portfolio ret
- repear for each day until next rebal day
-repeat for each rebal day
"""

### running the backtest here

In [None]:
#test
#############note: you may need to do some rotation of beta and expected return dataframes i.e. making sure theyre able to be multiplied in the optimizations 
#sorry that I cant test that, but i think right now theyre both 1xN so you may need to pass in betas.T to the optimization methods
t = runBacktest('2007-03-01', '2007-04-01',40,60,1)

### deprecated

In [17]:
# Run regression for each ETF using only the market factor
X = sm.add_constant(merged_data['Mkt-RF'])

for etf in etf_returns.columns:
    y = merged_data[etf]
    # Run regression
    model = sm.OLS(y, X).fit()
    # Calculate market risk premium
    beta = model.params['Mkt-RF']
    avg_market_return = merged_data['Mkt-RF'].mean()
    market_risk_premium = beta * avg_market_return
    market_risk_premiums.loc[etf, 'Market Risk Premium'] = market_risk_premium

# Display results
print("Market Risk Premiums for each ETF:")
print(market_risk_premiums.round(4))

# Display the betas and average market return for reference
print("\nMarket Betas:")
betas = pd.DataFrame(index=etf_returns.columns, columns=['Beta'])
for etf in etf_returns.columns:
    y = merged_data[etf]
    model = sm.OLS(y, X).fit()
    betas.loc[etf, 'Beta'] = model.params['Mkt-RF']
print(betas.round(4))

print("\nAverage Market Return (Mkt-RF):")
print(f"{merged_data['Mkt-RF'].mean():.4f}")


Market Risk Premiums for each ETF:
       Market Risk Premium
Ticker                    
DBA               0.000102
EPP               0.000452
EWJ               0.000332
FEZ                0.00048
FXE               0.000047
GLD               0.000019
ILF               0.000537
QQQ               0.000439
SHV              -0.000001
SPY               0.000417
USO               0.000294
XBI                0.00045

Market Betas:
            Beta
Ticker          
DBA     0.002379
EPP     0.010523
EWJ     0.007733
FEZ     0.011182
FXE     0.001094
GLD     0.000448
ILF     0.012523
QQQ     0.010224
SHV    -0.000031
SPY     0.009721
USO     0.006845
XBI     0.010491

Average Market Return (Mkt-RF):
0.0429


In [None]:

etf_returns = etf_data.pct_change().dropna()

# Convert ff_factors index to datetime if it's not already
ff_factors.index = pd.to_datetime(ff_factors.index, format='%Y%m%d')

# Align the data
etf_returns = etf_returns.reindex(ff_factors.index).dropna()
ff_factors = ff_factors.reindex(etf_returns.index).dropna()

# Create merged data
merged_data = pd.concat([etf_returns, ff_factors], axis=1)

# Verify we have data
if len(merged_data) == 0:
    raise ValueError("No data after merging. Check date ranges and data alignment.")

# Calculate market risk premium for each ETF
market_risk_premiums = pd.DataFrame(index=etf_returns.columns, columns=['Market Risk Premium'])
