In [None]:

#%%
import yfinance as yf
import numpy as np
import pandas as pd
import bottleneck as bn
import datetime
from numpy.lib.stride_tricks import sliding_window_view as sliding
from scipy.stats import chi2, norm
import scipy.stats as stats
import matplotlib.pyplot as plt
import scipy.optimize as opt
import cvxpy as cp
from scipy.linalg import cholesky, inv
from tqdm.auto import tqdm
import os
import time
import itertools
# from google.colab import drive
# drive.mount('/content/drive')


ValueError: mount failed

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
#%%
# Read data
sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
tickers = sp500['Symbol'].tolist()
sectors = sp500.set_index('Symbol')

end_date = datetime.datetime.today()
start_date = end_date - datetime.timedelta(days=3*365+1)

data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=True)
adj_close = data['Close']
adj_close.dropna(how="all", axis=1, inplace=True)
adj_ret = adj_close.pct_change().iloc[1:, :]

sector_data = {}
for sector in sectors['GICS Sector'].unique():
    tickers_in_sector = sectors[sectors['GICS Sector'] == sector].index.tolist()
    tickers_in_sector = [t for t in tickers_in_sector if t in adj_ret.columns]
    sector_data[sector] = adj_ret[tickers_in_sector]


In [None]:
data = np.var(sector_data['Financials']).max()
data

0.000750552224582452

# Benchmark

In [None]:
# #%%
# # Benchmark Model
# def benchmark(mean_vec, cov_mat, hyper_params, risk_free_rate=0.0):
#   num_assets = len(mean_vec)

#   def neg_sharpe_ratio(weights, mean_vec, cov_mat, risk_free_rate):
#       port_return = np.dot(weights, mean_vec)
#       port_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_mat, weights)))
#       sharpe_ratio = (port_return - risk_free_rate) / port_volatility
#       return -sharpe_ratio

#   cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1}]
#   bounds = [(0, 1) for _ in range(num_assets)]
#   x0 = np.ones(num_assets) / num_assets

#   result = opt.minimize(neg_sharpe_ratio, x0, args=(mean_vec, cov_mat, risk_free_rate),
#                           method='SLSQP', bounds=bounds, constraints=cons)
#   return result.x

def benchmark(mean_vec, cov_mat, hyper_params, risk_free_rate=0.0):
    num_assets = len(mean_vec)
    v = hyper_params.get("V", 0.02)
    # print(v)
    def neg_expected_return(weights, mean_vec):
        return -np.dot(weights, mean_vec)

    cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
            {'type': 'ineq', 'fun': lambda x: v - np.dot(x.T, cov_mat @ x)}]

    bounds = tuple((0, 1) for _ in range(num_assets))
    x0 = np.ones(num_assets) / num_assets

    result = opt.minimize(neg_expected_return, x0,
                          args=(mean_vec,),
                          method='SLSQP',
                          bounds=bounds,
                          constraints=cons)

    optimal_weights = result.x
    optimal_return = -result.fun

    return optimal_weights

# Revised Optimizers: mean vector&cov mat&VaR

In [None]:

# # Revised Optimizers: mean vector

# def robust_opt_one(df, mean_vec, cov_mat, hyper_params, risk_free_rate=0.0):

#   num_assets = len(mean_vec)
#   kappa = hyper_params.get("kappa", 1.0)
#   var_thres = hyper_params.get('var_thres', 0.05)

#   def neg_sharpe_ratio_robust(weights, mean_vec, cov_mat, risk_free_rate, kappa):
#     def estimation_risk_1(x, mean_vec, cov_mat, kappa):
#       return kappa * np.sqrt(x.T @ cov_mat @ x)

#     penalty = estimation_risk_1(weights, mean_vec, cov_mat, kappa)

#     port_return = np.dot(weights, mean_vec)
#     port_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_mat, weights)))
#     sharpe_ratio = (port_return - risk_free_rate) / port_volatility
#     return -sharpe_ratio + penalty

#   cons = [
#         {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
#         {'type': 'ineq', 'fun': lambda x: var_thres - worst_case_var(df, x)}
#     ]

#   # cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1}]
#   bounds = [(0, 1) for _ in range(num_assets)]
#   x0 = np.ones(num_assets) / num_assets

#   result = opt.minimize(neg_sharpe_ratio_robust, x0, args=(mean_vec, cov_mat, risk_free_rate, kappa),
#                           method='SLSQP', bounds=bounds, constraints=cons)
#   if not result.success:
#     print(result.success)
#   return result.x

# def robust_opt_two(df, mean_vec, cov_mat, hyper_params, risk_free_rate=0.0):
#   num_assets = len(mean_vec)
#   kappa = hyper_params.get("kappa", 1.0)
#   var_thres = hyper_params.get('var_thres', 0.05)

#   def neg_sharpe_ratio_robust(weights, mean_vec, cov_mat, risk_free_rate, kappa):
#     def estimation_risk_2(x, mean_vec, cov_mat, kappa):
#         e = np.ones((cov_mat.shape[0], 1))
#         D = np.eye(cov_mat.shape[0])
#         denom = (e.T @ D @ cov_mat @ D.T @ e)[0, 0]
#         adjustment = (1 / denom) * (cov_mat @ D.T @ e @ e.T @ D @ cov_mat)
#         adjusted_cov = cov_mat - adjustment
#         # print(x.T @ adjusted_cov @ x)
#         penalty = np.sqrt(max((x.T @ adjusted_cov @ x).item(), 1e-12))
#         return kappa * penalty

#     penalty = estimation_risk_2(weights, mean_vec, cov_mat, kappa)

#     port_return = np.dot(weights, mean_vec)
#     port_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_mat, weights)))
#     sharpe_ratio = (port_return - risk_free_rate) / port_volatility
#     return -sharpe_ratio + penalty

#   cons = [
#         {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
#         {'type': 'ineq', 'fun': lambda x: var_thres - worst_case_var(df, x)}
#     ]

#   # cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1}]
#   bounds = [(0, 1) for _ in range(num_assets)]
#   x0 = np.ones(num_assets) / num_assets

#   result = opt.minimize(neg_sharpe_ratio_robust, x0, args=(mean_vec, cov_mat, risk_free_rate, kappa),
#                           method='SLSQP', bounds=bounds, constraints=cons)
#   if not result.success:
#     print(result.success)

#   return result.x

# def robust_opt_three(df, mean_vec, cov_mat, hyper_params, risk_free_rate=0.0):
#   num_assets = len(mean_vec)
#   kappa = hyper_params.get("kappa", 1.0)
#   var_thres = hyper_params.get('var_thres', 0.05)

#   def neg_sharpe_ratio_robust(weights, mean_vec, cov_mat, risk_free_rate, kappa):
#     def estimation_risk_3(x, mean_vec, cov_mat, kappa):
#         L = cholesky(cov_mat, lower=True)
#         D = inv(L)
#         e = np.ones((cov_mat.shape[0], 1))
#         denom = (e.T @ D @ cov_mat @ D.T @ e)[0, 0]
#         adjustment = (1 / denom) * (cov_mat @ D.T @ e @ e.T @ D @ cov_mat)
#         adjusted_cov = cov_mat - adjustment
#         # print(x.T @ adjusted_cov @ x)
#         penalty = np.sqrt(max((x.T @ adjusted_cov @ x).item(), 1e-12))
#         return kappa * penalty

#     penalty = estimation_risk_3(weights, mean_vec, cov_mat, kappa)

#     port_return = np.dot(weights, mean_vec)
#     port_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_mat, weights)))
#     sharpe_ratio = (port_return - risk_free_rate) / port_volatility
#     return -sharpe_ratio+penalty


#   # cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1}]
#   cons = [
#         {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
#         {'type': 'ineq', 'fun': lambda x: var_thres - worst_case_var(df, x)}
#     ]

#   bounds = [(0, 1) for _ in range(num_assets)]
#   x0 = np.ones(num_assets) / num_assets

#   result = opt.minimize(neg_sharpe_ratio_robust, x0, args=(mean_vec, cov_mat, risk_free_rate, kappa),
#                           method='SLSQP', bounds = bounds, constraints=cons)

#   if not result.success:
#     print(result.success)

#   return result.x

In [None]:
def robust_opt_one(mean_vec, cov_mat, hyper_params, risk_free_rate=0.0):
    num_assets = len(mean_vec)
    kappa = hyper_params.get("kappa", 1.0)
    v = hyper_params.get("V", 0.02)
    var_thres = hyper_params.get('var_thres', 0.05)
    # print(kappa, v, var_thres)

    def objective(weights, mean_vec, cov_mat, kappa):
      def estimation_risk_1(x):
        return kappa * np.sqrt(x.T @ cov_mat @ x)

      estimation_risk1 = estimation_risk_1(weights)
      return -(np.dot(mean_vec, weights) - estimation_risk1)

    cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
            {'type': 'ineq', 'fun': lambda x: v - np.dot(x.T, cov_mat @ x)},
            {'type': 'ineq', 'fun': lambda x: var_thres - tail_risk_normal(x, mean_vec, cov_mat)}]

    bounds = tuple((0, 1) for _ in range(num_assets))
    x0 = np.ones(num_assets) / num_assets

    # iteration_counter = {'count': 0}

  #   def callbackF(xk):
  #     iteration_counter['count'] += 1
  #     if iteration_counter['count'] % 10 == 0:
  #         print(f"Iter {iteration_counter['count']}: x = {xk}")

  #   result = opt.minimize(
  #       objective, x0,
  #       args=(mean_vec, cov_mat, kappa),
  #       method='SLSQP',
  #       bounds=bounds,
  #       constraints=cons,
  #       options={'disp': False, 'maxiter': 100},
  #       callback=callbackF
  # )


    result = opt.minimize(objective, x0,
                          args=(mean_vec, cov_mat, kappa),
                          method='SLSQP',
                          bounds=bounds,
                          constraints=cons)

    optimal_weights = result.x
    return optimal_weights

def robust_opt_two(mean_vec, cov_mat, hyper_params, risk_free_rate=0.0):
    num_assets = len(mean_vec)
    kappa = hyper_params.get("kappa", 1.0)
    v = hyper_params.get("V", 0.02)
    var_thres = hyper_params.get('var_thres', 0.05)

    def objective(weights, mean_vec, cov_mat, kappa):
        e = np.ones((cov_mat.shape[0], 1))
        D = np.eye(cov_mat.shape[0])
        denom = (e.T @ D @ cov_mat @ D.T @ e)[0, 0]
        middle = cov_mat - (1 / denom) * (cov_mat @ D.T @ e @ e.T @ D @ cov_mat)
        penalty = np.sqrt(weights.T @ middle @ weights)
        return -(np.dot(mean_vec, weights) - kappa * penalty)

    cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
            {'type': 'ineq', 'fun': lambda x: v - np.dot(x.T, cov_mat @ x)},
            {'type': 'ineq', 'fun': lambda x: var_thres - tail_risk_normal(x, mean_vec, cov_mat)}]

    bounds = tuple((0, 1) for _ in range(num_assets))
    x0 = np.ones(num_assets) / num_assets

    result = opt.minimize(objective, x0,
                          args=(mean_vec, cov_mat, kappa),
                          method='SLSQP',
                          bounds=bounds,
                          constraints=cons)

    optimal_weights = result.x
    return optimal_weights


def robust_opt_three(mean_vec, cov_mat,hyper_params, risk_free_rate=0.0):
    num_assets = len(mean_vec)
    kappa = hyper_params.get("kappa", 1.0)
    v = hyper_params.get("V", 0.02)
    var_thres = hyper_params.get('var_thres', 0.05)
    # print(cov_mat)
    L = cholesky(cov_mat, lower=True)
    # print(L)
    D = inv(L)

    def objective(weights, mean_vec, cov_mat, kappa, D):
        e = np.ones((cov_mat.shape[0], 1))
        denom = (e.T @ D @ cov_mat @ D.T @ e)[0, 0]
        middle = cov_mat - (1 / denom) * (cov_mat @ D.T @ e @ e.T @ D @ cov_mat)
        penalty = np.sqrt(weights.T @ middle @ weights)
        return -(np.dot(mean_vec, weights) - kappa * penalty)

    cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
            {'type': 'ineq', 'fun': lambda x: v - np.dot(x.T, cov_mat @ x)},
            {'type': 'ineq', 'fun': lambda x: var_thres - tail_risk_normal(x, mean_vec, cov_mat)}]

    bounds = tuple((0, 1) for _ in range(num_assets))
    x0 = np.ones(num_assets) / num_assets

    result = opt.minimize(objective, x0,
                          args=(mean_vec, cov_mat, kappa, D),
                          method='SLSQP',
                          bounds=bounds,
                          constraints=cons)

    optimal_weights = result.x
    return optimal_weights


# Cov Mat

In [None]:
#%%
# Revised Optimizers: covariance matrix
def SCM(ret_data, decay_function=None):
    """
    Compute (possibly weighted) sample covariance matrix.

    Parameters:
        ret_data (pd.DataFrame): Asset returns, shape (T, N)
        decay_function (callable): Function that returns a weight vector of length T

    Returns:
        pd.DataFrame: Covariance matrix, shape (N, N)
    """

    T, N = ret_data.shape

    if decay_function is not None:
        weights = decay_function(T)  # Should return an array of length T (normalized to 1)
    else:
        weights = np.full(T, 1/T)      # Equal weights if no decay function is provided

    # Center the returns
    weighted_mean = np.average(ret_data, axis=0, weights=weights)

    demeaned = ret_data - weighted_mean  # shape (T, N)

    # Compute weighted covariance matrix
    cov_matrix = (demeaned.T * weights) @ demeaned  # shape (N, N)

    return pd.DataFrame(cov_matrix, index=ret_data.columns, columns=ret_data.columns)

# alpha设置建议: 0.2,0.4,0.6在我的训练数据上表现都差不多，其它的值没试过
def LedoitWolf(ret_data: pd.DataFrame, alpha, decay_function=None) -> np.ndarray:
    """
    Vectorized implementation of the target covariance matrix F under the single-index (market) model
    for the Ledoit-Wolf estimator, incorporating a decay function to weight time observations.

    Parameters:
        ret_data: DataFrame of shape (T, N), where each column represents a stock's returns,
                  T is the number of time periods and N is the number of stocks.
        decay_function: A callable that takes an integer T and returns an array of weights of length T.
                        If None, equal weights are used.

    Returns:
        F: ndarray of shape (N, N), the covariance matrix under the single-index model, defined as:
           Diagonal: F_ii = β_i^2 * Var(m) + Var(ε_i)
           Off-diagonal: F_ij = β_i * β_j * Var(m)
    """
    T, N = ret_data.shape

    # 1. Compute the market return: using the equal-weight average of all stocks.
    #    (This is done on each time observation)
    market = ret_data.mean(axis=1)  # Series of length T

    # Use the decay function if provided, otherwise use equal weights.
    if decay_function is not None:
        weights = decay_function(T)  # Should return an array of length T (normalized to 1)
    else:
        weights = np.full(T, 1/T)      # Equal weights if no decay function is provided

    # 2. Compute the weighted mean of the market returns using decay weights.
    m_mean = np.average(market, weights=weights)
    # Compute the weighted variance of market returns.
    m_var = np.average((market - m_mean)**2, weights=weights)

    # 3. Compute each stock's weighted mean.
    asset_means = np.average(ret_data, axis=0, weights=weights)  # Array of length N

    # 4. Demean the stock returns.
    ret_data_demeaned = ret_data - asset_means

    # 5. Vectorized computation of each stock's beta:
    #    β_i = Cov(r_i, market) / Var(market)
    #    where Cov(r_i, market) = sum_{t=1}^T weights[t]*(r_{it} - E[r_i])*(market[t] - m_mean)
    cov_with_market = np.average(ret_data_demeaned.multiply(market - m_mean, axis=0), axis=0, weights=weights)
    beta = cov_with_market / m_var  # Series of length N

    # 6. Compute each stock's intercept (alpha):
    #    α_i = E[r_i] - β_i * m_mean
    alpha = asset_means - beta * m_mean

    # 7. Compute the predicted returns and residuals in a fully vectorized manner.
    #    For each stock i: predicted[t, i] = α_i + β_i * market[t]
    predicted = market.values.reshape(-1, 1) * beta.reshape(1, -1) + alpha.reshape(1, -1)
    residuals = ret_data.values - predicted
    # Compute the weighted variance of residuals (using the provided decay weights)
    resid_var = np.array([np.average(residuals[:, i]**2, weights=weights) for i in range(N)])

    # 8. Construct the target covariance matrix F.
    #    Use the outer product to compute inter-stock covariance terms.
    #    For i != j: F_ij = β_i * β_j * Var(m)
    #    For i == j: F_ii = β_i^2 * Var(m) + Var(ε_i)
    b = beta  # Convert beta Series to ndarray with shape (N,)
    F = m_var * np.outer(b, b) + np.diag(resid_var)

    F = pd.DataFrame(F, index=ret_data.columns, columns=ret_data.columns)

    resulting_cov=alpha*F+(1-alpha)*SCM(ret_data,decay_function=decay_function)

    return resulting_cov

# ES

In [None]:
def tail_risk_normal(weights: np.array, mu: np.array, sigma: np.array,
                       significance_level: float = 0.05):
    """
    assume:
    w @ r is Normal(w.T @ mu, w.T @ sigma @ w)

    weights: shape (n,)
    mu: shape (n, 1)
    sigma: shape (n, n)

    """
    assert weights.shape[0] == mu.shape[0] == sigma.shape[0] == sigma.shape[1], "Dimension mismatch"

    n = mu.shape[0]
    mu = mu.reshape(n, 1)
    w = weights.reshape(n, 1)

    # Portfolio mean and standard deviation
    portfolio_mean = w.T @ mu
    portfolio_std = np.sqrt(w.T @ sigma @ w)
    # print(mu, sigma)

    # Value at Risk
    var = stats.norm.ppf(significance_level) * portfolio_std + portfolio_mean

    # Expected Shortfall
    es = -1 * stats.norm.pdf(stats.norm.ppf(significance_level)) / significance_level
    es = es * portfolio_std + portfolio_mean

    var = var.flatten()[0]
    es = es.flatten()[0]
    return -es

In [None]:
data = sector_data['Financials']
n = data.shape[1]
mu = np.mean(data, axis=0).values
sigma = SCM(data).values
w = np.full(n, 1/n)
tail_risk_normal(w, mu, sigma)

np.float64(0.02560708317453509)

In [None]:
# #%%
# # VaR
# def worst_case_var(returns,
#                    w: np.ndarray,
#                    confidence_level: float = 0.05,
#                    n_bootstrap: int = 100):
#     """
#     Given a fixed portfolio weight vector w, calculate the worst-case VaR
#     using bootstrapped confidence intervals.
#     """

#     # returns = np.array(returns.fillna(0))
#     n_assets = returns.shape[1]
#     T = returns.shape[0]

#     # Bootstrap sampling
#     bootstrap_means = np.zeros((n_bootstrap, n_assets))
#     bootstrap_covs = np.zeros((n_bootstrap, n_assets, n_assets))

#     for b in range(n_bootstrap):
#         indices = np.random.choice(T, size=T, replace=True)
#         bootstrap_sample = returns[indices, :]
#         bootstrap_means[b] = np.mean(bootstrap_sample, axis=0)
#         bootstrap_covs[b] = np.cov(bootstrap_sample, rowvar=False)

#     # Confidence intervals
#     alpha = confidence_level
#     mu_lower = np.percentile(bootstrap_means, alpha/2*100, axis=0)
#     mu_upper = np.percentile(bootstrap_means, (1-alpha/2)*100, axis=0)

#     Sigma_lower = np.zeros((n_assets, n_assets))
#     Sigma_upper = np.zeros((n_assets, n_assets))
#     for i in range(n_assets):
#         for j in range(n_assets):
#             Sigma_lower[i, j] = np.percentile(bootstrap_covs[:, i, j], alpha/2*100)
#             Sigma_upper[i, j] = np.percentile(bootstrap_covs[:, i, j], (1-alpha/2)*100)

#     # SDP formulation
#     epsilon = 0.05  # For 95% VaR
#     y_epsilon = np.sqrt((1-epsilon)/epsilon)

#     Q_plus = cp.Variable((n_assets, n_assets), symmetric=True)
#     Q_minus = cp.Variable((n_assets, n_assets), symmetric=True)
#     u_plus = cp.Variable(n_assets)
#     u_minus = cp.Variable(n_assets)
#     v = cp.Variable()

#     # Here: w is passed in as input
#     w_cp = cp.Parameter(n_assets)
#     w_cp.value = w
#     w_col = cp.reshape(w_cp, (n_assets, 1))

#     mat = [[Q_plus - Q_minus, w_col/2],
#            [w_col.T/2, cp.reshape(v, (1, 1))]]

#     constraints = [
#         Q_plus >> 0,
#         Q_minus >> 0,
#         u_plus >= 0,
#         u_minus >= 0,
#         cp.bmat(mat) >> 0,
#         u_minus - u_plus == w_cp
#     ]

#     objective = cp.Minimize(
#         cp.trace(Q_plus @ Sigma_upper) - cp.trace(Q_minus @ Sigma_lower)
#         + y_epsilon**2 * v
#         + u_plus.T @ mu_upper - u_minus.T @ mu_lower
#     )

#     problem = cp.Problem(objective, constraints)
#     problem.solve(solver=cp.SCS)  # 注意这里 solver 改对

#     # if problem.status == cp.OPTIMAL or problem.status == cp.OPTIMAL_INACCURATE:
#     #   print("Optimization succeeded.")
#     # else:
#     #   print(f"Optimization failed. Status: {problem.status}")

#     return problem.value

# #%%
# # # Time Decaying Functions  没什么用 直接等权得了
# # def equal_decay(data):
# #   return data

# # def exponential_decay(data, decay_param = 0.01) :
# #     T = data.shape[0]
# #     t = np.arange(T)[::-1]
# #     weights = np.exp(-decay_param * t)
# #     print(weights)
# #     weights /= weights.sum()
# #     return weights

# # def linear_decay(data):
# #     T = data.shape[0]
# #     t = np.arange(T)[::-1]
# #     weights = 1 - t / (t.max()+1)
# #     weights[weights < 0] = 0
# #     weights /= weights.sum()
# #     return weights

# # def sqrt_decay(data):
# #     T = data.shape[0]
# #     t = np.arange(T)[::-1]
# #     weights = 1 / np.sqrt(t + 1)
# #     weights /= weights.sum()
# #     return weights

In [None]:
#%%
# Others
def make_dirs(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
    return directory

def sharpe(returns_df):
    mean_returns = returns_df.mean(axis=0)
    std_returns = returns_df.std(axis=0, ddof=1)
    sharpe_ratio = (mean_returns / std_returns) * np.sqrt(252)
    return sharpe_ratio

# Backtest

In [None]:

#%%
# Backtest

"""
Hyper params to tune:

optimizer : robust opt 1 or 3
kappa  # mean vec
alpha # cov mat
5%var_thres  # worst case VaR
n_lbw: default is 100  也可以不调整
"""

def calculate_portf_weights(lbw_data_, tomorrow_ret, optimizer,
                            hyper_params, lbw = 100):

    # mean vec
    lbw_data = lbw_data_.copy()
    mean_vec_esti = np.mean(lbw_data, axis=0)
    num_assets = lbw_data.shape[1]
    alpha = hyper_params.get("alpha", 0.02)
    kappa = hyper_params.get("kappa", 2)
    var_thres = hyper_params.get("var_thres", 0.05)
    V = hyper_params.get("V", 0.0009)
    # print(alpha, kappa, var_thres, V)

    # cov mat
    if optimizer.__name__ == 'benchmark':
      # print('hh')
      cov_mat = (SCM(pd.DataFrame(lbw_data))).values

    if lbw<=int(num_assets*1.5):
      cov_mat = (LedoitWolf(pd.DataFrame(lbw_data), alpha=alpha)).values
    else:
      cov_mat = (SCM(pd.DataFrame(lbw_data))).values


    # print(cov_mat)
    portf_weights = optimizer(mean_vec_esti, cov_mat, {'kappa':kappa, 'var_thres':var_thres, "V":V})

    # print("opt weight: \n w:{}".format(np.round(opt_w,3)))

    portf_ret=portf_weights.dot(tomorrow_ret)  # actual

    return portf_weights, portf_ret



def backtest_single(ret_data, optimizer, hyper_params, lbw = 100):   # Backtest for a single sector, no parameter tuning..

    current_param_ret=[]

    for row_i in tqdm(range(lbw, ret_data.shape[0]),desc="iterating dates",leave=False):
        # row_i is the tomorrow of the last day of the look-back window
        lbw_data=ret_data[row_i-lbw:row_i,:]

        tomorrow_ret=ret_data[row_i,:]

        portf_weights, portf_ret = calculate_portf_weights(lbw_data, tomorrow_ret,
                            optimizer, hyper_params)

        current_param_ret.append(portf_ret)

    return current_param_ret

def backtest_single_eq(ret_data, optimizer, hyper_params, lbw = 100):   # Backtest for a single sector, no parameter tuning..

    current_param_ret=[]

    for row_i in tqdm(range(lbw, ret_data.shape[0]),desc="iterating dates",leave=False):
        # row_i is the tomorrow of the last day of the look-back window
        lbw_data=ret_data[row_i-lbw:row_i,:]

        tomorrow_ret=ret_data[row_i,:]

        portf_ret = np.nanmean(tomorrow_ret)
        print(portf_ret)

        current_param_ret.append(portf_ret)

    return current_param_ret

#%%
# def backtest_single_combs(return_data, optimizer, hyper_params_grid, lbw = 100, output_dir:str="./output"):

#     # # Backtest with paramter tuning..

#     # # Construct the output file path using the optimizer's name and save the DataFrame as a CSV file
#     # complete_output_path = os.path.join(output_dir, f"{optimizer.__name__}.csv")

#     # if os.path.exists(complete_output_path):
#     #     print(f"Backtest result {complete_output_path} already exists, skip computation.")
#     #     strat_rets=pd.read_csv(complete_output_path,index_col=0)
#     #     strat_rets.index=pd.to_datetime(strat_rets.index)

#     # else:
#     #     print("Backtest result doesn't exist, start computing...")

#     return_data = return_data.fillna(0)
#     dates=pd.to_datetime(list(return_data.index)[lbw:]) # get dates, starting from the first date that has a complete look-back window

#     # strat_rets=pd.DataFrame(columns=hyper_params_grid,index=dates) # to store strategy returns
#     strat_rets = pd.DataFrame(columns=[str(x) for x in hyper_params_grid], index=dates)
#     ret_data = return_data.values

#     for hyper_params in tqdm(hyper_params_grid,desc="iterating hyper parameters"):
#         # print(hyper_params)
#         cur_param_ret = backtest_single(ret_data, optimizer, hyper_params, lbw)
#         cur_param_name = str(hyper_params)
#         strat_rets[cur_param_name] = cur_param_ret

#     sharps = sharpe(strat_rets)
#     best_param = sharps.idxmax()

#     # # Ensure the output directory exists; create it if it does not
#     #     os.makedirs(output_dir)

#     # strat_rets.to_csv(complete_output_path)
#     # print("{} successfully saved!".format(complete_output_path))

#     return strat_rets, best_param

def backtest_single_combs(return_data, sect, optimizer, hyper_params_grid, lbw=100,
                           output_dir="/content/drive/MyDrive/backtest_results"):
    """
    Run backtests for each hyperparameter combination and save results permanently to Google Drive.
    """
    print("🔁 Starting backtests with parameter tuning...")

    return_data = return_data.fillna(0)
    dates = pd.to_datetime(list(return_data.index)[lbw:])
    strat_rets = pd.DataFrame(columns=[str(x) for x in hyper_params_grid], index=dates)

    ret_data = return_data.values

    for hyper_params in tqdm(hyper_params_grid, desc="🔧 Iterating hyperparameters"):
        cur_param_ret = backtest_single(ret_data, optimizer, hyper_params, lbw)
        cur_param_name = str(hyper_params)
        strat_rets[cur_param_name] = cur_param_ret

    sharps = sharpe(strat_rets)
    best_param = sharps.idxmax()

    os.makedirs(output_dir, exist_ok=True)

    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    complete_output_path = os.path.join(output_dir, f"{sect}_{optimizer.__name__}_{timestamp}.csv")

    #strat_rets.to_csv(complete_output_path)

    return strat_rets, best_param

In [None]:
#%%
data = sector_data['Financials']
kappas = [0.5, 1, 2, 3]
alphas = [0.4, 0.6]
var_thress = [0.01, 0.015]

param_list = [{'kappa': k, 'alpha': a, 'var_thres': v, 'V':0.0008} for k, a, v in itertools.product(kappas, alphas, var_thress )]
#%%
strat_rets, best_p = backtest_single_combs(data, 'Financials', robust_opt_one, param_list, 100)

🔁 Starting backtests with parameter tuning...


🔧 Iterating hyperparameters:   0%|          | 0/16 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

In [None]:
strat_rets

Unnamed: 0,"{'kappa': 0.5, 'alpha': 0.4, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 0.5, 'alpha': 0.4, 'var_thres': 0.015, 'V': 0.0008}","{'kappa': 0.5, 'alpha': 0.6, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 0.5, 'alpha': 0.6, 'var_thres': 0.015, 'V': 0.0008}","{'kappa': 1, 'alpha': 0.4, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 1, 'alpha': 0.4, 'var_thres': 0.015, 'V': 0.0008}","{'kappa': 1, 'alpha': 0.6, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 1, 'alpha': 0.6, 'var_thres': 0.015, 'V': 0.0008}","{'kappa': 2, 'alpha': 0.4, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 2, 'alpha': 0.4, 'var_thres': 0.015, 'V': 0.0008}","{'kappa': 2, 'alpha': 0.6, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 2, 'alpha': 0.6, 'var_thres': 0.015, 'V': 0.0008}","{'kappa': 3, 'alpha': 0.4, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 3, 'alpha': 0.4, 'var_thres': 0.015, 'V': 0.0008}","{'kappa': 3, 'alpha': 0.6, 'var_thres': 0.01, 'V': 0.0008}","{'kappa': 3, 'alpha': 0.6, 'var_thres': 0.015, 'V': 0.0008}"
2022-09-27,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030,-0.002030
2022-09-28,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197,0.013197
2022-09-29,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287,0.000287
2022-09-30,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111,-0.002111
2022-10-03,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777,0.021777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-28,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636,0.003636
2025-04-29,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482,0.017482
2025-04-30,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904,0.009904
2025-05-01,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321,0.002321


In [None]:
sharpe(strat_rets)

Unnamed: 0,0
"{'kappa': 0.5, 'alpha': 0.4, 'var_thres': 0.01, 'V': 0.0008}",1.569199
"{'kappa': 0.5, 'alpha': 0.4, 'var_thres': 0.015, 'V': 0.0008}",1.289908
"{'kappa': 0.5, 'alpha': 0.6, 'var_thres': 0.01, 'V': 0.0008}",1.569199
"{'kappa': 0.5, 'alpha': 0.6, 'var_thres': 0.015, 'V': 0.0008}",1.289908
"{'kappa': 1, 'alpha': 0.4, 'var_thres': 0.01, 'V': 0.0008}",1.610558
"{'kappa': 1, 'alpha': 0.4, 'var_thres': 0.015, 'V': 0.0008}",1.523449
"{'kappa': 1, 'alpha': 0.6, 'var_thres': 0.01, 'V': 0.0008}",1.610558
"{'kappa': 1, 'alpha': 0.6, 'var_thres': 0.015, 'V': 0.0008}",1.523449
"{'kappa': 2, 'alpha': 0.4, 'var_thres': 0.01, 'V': 0.0008}",1.613306
"{'kappa': 2, 'alpha': 0.4, 'var_thres': 0.015, 'V': 0.0008}",1.609281


In [None]:
output_dir="/content/drive/MyDrive/backtest_results"
optimizer = robust_opt_one
sect = 'Financials'
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
complete_output_path = os.path.join(output_dir, f"{sect}_{optimizer.__name__}_{timestamp}.csv")
# sharpe(strat_rets).to_csv(complete_output_path)

In [None]:
data = sector_data['Financials']

In [None]:
strat_rets_b, best_p_b = backtest_single_combs(data, 'Financials', benchmark, [{'V':0.0008}], 100)

🔁 Starting backtests with parameter tuning...


🔧 Iterating hyperparameters:   0%|          | 0/1 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/653 [00:00<?, ?it/s]

In [None]:
sharpe(strat_rets_b)

Unnamed: 0,0
{'V': 0.0008},-0.182546


In [None]:
strat_rets_e, best_p_e = backtest_single_combs(data, 'Financials', benchmark, [{'V':0.0008}], 100)

🔁 Starting backtests with parameter tuning...


🔧 Iterating hyperparameters:   0%|          | 0/1 [00:00<?, ?it/s]

iterating dates:   0%|          | 0/652 [00:00<?, ?it/s]

-0.003176526022795135
0.01927606331719249
-0.010809847876972676
-0.009638923578095034
0.029190865937165727
0.03935120988546296
-0.004402405221545368
-0.012616746082532508
-0.02307584839921314
-0.0030105327128383003
-0.010827452338206901
-0.005126662594855948
0.0361363680953838
-0.022250665422298685
0.02415820058486753
0.01372588955363246
-0.015249092305495161
-0.018953547865908287
0.026852594552362417
0.015438574785588093
0.014555981208329873
0.0027820328016444056
0.008080458903662761
0.027691320855418398
-0.006582770135417393
0.002603442918130091
-0.015495014811287948
-0.016789780984114894
0.018969658178280693
0.01047838184878114
0.007477732767594404
-0.018630225599579137
0.056070341128652376
0.005398678266278113
-0.017976527564520298
0.0038049398309625204
-0.005341560881231337
-0.006582887559670437
0.007673559268934129
0.002815869824205958
0.010381272690071906
0.00391336207843114
0.005255129530325102
-0.018466394491927853
0.005387662657208107
0.02219886728125405
-0.002647266511829267

In [None]:
strat_rets_e

Unnamed: 0,{'V': 0.0008}
2022-09-27,-0.003177
2022-09-28,0.019276
2022-09-29,-0.010810
2022-09-30,-0.009639
2022-10-03,0.029191
...,...
2025-04-28,0.004447
2025-04-29,0.009431
2025-04-30,0.000327
2025-05-01,-0.002304


In [None]:
sharpe(strat_rets_e)

Unnamed: 0,0
{'V': 0.0008},1.127366
