#### 1. Load Packages

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon
from joblib import Parallel, delayed
import multiprocessing

# Hidden Markov Model utilities
from hmmlearn.hmm import GaussianHMM
from sklearn.cluster import KMeans

# PyPortfolioOpt
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns
from pypfopt.black_litterman import BlackLittermanModel

# Sparse Jump Model utilities
from jumpmodels.sparse_jump import SparseJumpModel
from jumpmodels.preprocess import StandardScalerPD, DataClipperStd

#### 2. Data simulation

In [16]:
ASSETS = ["Value", "Growth", "LowVol", "Size", "Momentum", "Quality"]
N_ASSETS = len(ASSETS)

# For demonstration, these are the base parameters for the simulation only:
SIM_MEAN_1STATE = 0.000461
SIM_SIG_1STATE  = 0.008388

RISK_FREE_RATE = 0.0
TRANSACTION_COST = 0.0005  
BL_TAU = 0.1  # Black-Litterman parameter

def simulate_1state_data(num_days, seed=None):
    np_rng = np.random.default_rng(seed)
    mu = SIM_MEAN_1STATE
    sig = SIM_SIG_1STATE

    corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(corr, 1.0)
    cov = np.outer(np.full(N_ASSETS, sig), np.full(N_ASSETS, sig)) * corr

    rets = np_rng.multivariate_normal(mean=np.full(N_ASSETS, mu), 
                                      cov=cov, 
                                      size=num_days)
    return pd.DataFrame(rets, columns=ASSETS)

def simulate_2state_data(num_days, seed=None):
    np_rng = np.random.default_rng(seed)
    transmat = np.array([[0.9976, 0.0024],
                         [0.0232, 0.9768]])
    mu_dict  = {0: 0.0006,   1: -0.000881}
    sig_dict = {0: 0.00757, 1: 0.0163}

    base_corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(base_corr, 1.0)

    all_states = np.zeros((num_days, N_ASSETS), dtype=int)
    for i in range(N_ASSETS):
        s = np.zeros(num_days, dtype=int)
        s[0] = np_rng.integers(2)
        for t in range(1, num_days):
            s[t] = np_rng.choice(2, p=transmat[s[t - 1]])
        all_states[:, i] = s

    rets = np.zeros((num_days, N_ASSETS))
    for t in range(num_days):
        mu_vec  = np.zeros(N_ASSETS)
        sig_vec = np.zeros(N_ASSETS)
        for i in range(N_ASSETS):
            curr_state = all_states[t, i]
            mu_vec[i]  = mu_dict[curr_state]
            sig_vec[i] = sig_dict[curr_state]
        cov_t = np.outer(sig_vec, sig_vec) * base_corr
        rets[t] = np_rng.multivariate_normal(mean=mu_vec, cov=cov_t)

    return pd.DataFrame(rets, columns=ASSETS), all_states

def simulate_3state_data(num_days, seed=None):
    np_rng = np.random.default_rng(seed)
    transmat = np.array([[0.9989, 0.0004, 0.0007],
                         [0.0089, 0.9904, 0.0007],
                         [0.0089, 0.0004, 0.9907]])
    mu_list  = [0.0008, 0.0,     -0.003586]
    sig_list = [0.0070, 0.0050,  0.01897]

    base_corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(base_corr, 1.0)

    all_states = np.zeros((num_days, N_ASSETS), dtype=int)
    for i in range(N_ASSETS):
        s = np.zeros(num_days, dtype=int)
        s[0] = np_rng.integers(3)
        for t in range(1, num_days):
            s[t] = np_rng.choice(3, p=transmat[s[t - 1]])
        all_states[:, i] = s

    rets = np.zeros((num_days, N_ASSETS))
    for t in range(num_days):
        mu_vec  = np.zeros(N_ASSETS)
        sig_vec = np.zeros(N_ASSETS)
        for i in range(N_ASSETS):
            st_i   = all_states[t, i]
            mu_vec[i]  = mu_list[st_i]
            sig_vec[i] = sig_list[st_i]
        cov_t = np.outer(sig_vec, sig_vec) * base_corr
        rets[t] = np_rng.multivariate_normal(mean=mu_vec, cov=cov_t)

    return pd.DataFrame(rets, columns=ASSETS), all_states


#### 3. Training Regime Models

#### 3.1 Hidden Markov Model

In [17]:
# %%
def run_mle(observations, n_components=2, init_type='default', seed=None):
    model = GaussianHMM(n_components=n_components, covariance_type='diag',
                        n_iter=100, random_state=seed)

    if init_type == 'default':
        model.startprob_ = np.array([1.0, 0.0])
        model.transmat_  = np.array([[0.9, 0.1],
                                     [0.1, 0.9]])
        model.means_  = np.zeros((n_components, observations.shape[1]))
        model.covars_ = np.full((n_components, observations.shape[1]), 1e-10)
        model.init_params = ''
    elif init_type == 'kmeans':
        km = KMeans(n_clusters=n_components, n_init=10, random_state=seed)
        labels = km.fit_predict(observations)
        means, covars = [], []
        for i in range(n_components):
            obs_i = observations[labels == i]
            means.append(np.mean(obs_i, axis=0))
            covars.append(np.var(obs_i, axis=0) + 1e-10)
        model.startprob_ = np.ones(n_components) / n_components
        model.transmat_  = np.ones((n_components, n_components)) / n_components
        model.means_     = np.array(means)
        model.covars_    = np.array(covars)
        model.init_params = ''

    model.fit(observations)
    pred_states = model.predict(observations)
    return model, pred_states

def run_mle_default(observations, seed=None):
    return run_mle(observations, init_type='default', seed=seed)

def run_mle_kmeans(observations, seed=None):
    return run_mle(observations, init_type='kmeans', seed=seed)

def train_hmm_single_asset_default(series, n_components=2, random_state=42):
    X = series.values.reshape(-1, 1)
    model, _ = run_mle_default(X, seed=random_state)
    return model

def train_hmm_single_asset_kmeans(series, n_components=2, random_state=42):
    X = series.values.reshape(-1, 1)
    model, _ = run_mle_kmeans(X, seed=random_state)
    return model


#### 3.2 Feature selection and SJM training

In [18]:
# Feature selection
def compute_sjm_features(factor_ser: pd.Series) -> pd.DataFrame:
    """
    Build strictly backward-looking features for a single factor 'factor_ser'.
    Returns a DataFrame with 12 columns (EWMAs, RSI, Stoch, MACD, DownsideDev).
    """
    import numpy as np
    factor_price = 100.0 * (1.0 + factor_ser).cumprod()

    def ewma_return(returns, halflife):
        return returns.ewm(halflife=halflife).mean()

    def compute_rsi(price, window):
        delta = price.diff()
        gain  = delta.clip(lower=0)
        loss  = -delta.clip(upper=0)
        avg_gain = gain.rolling(window).mean()
        avg_loss = loss.rolling(window).mean()
        rs = avg_gain / avg_loss.replace(0, np.nan)
        return 100.0 - (100.0 / (1.0 + rs))

    def compute_stoch(price, window):
        rolling_min = price.rolling(window).min()
        rolling_max = price.rolling(window).max()
        return 100.0 * (price - rolling_min) / (rolling_max - rolling_min)

    def compute_macd(price, fast, slow):
        ema_fast = price.ewm(halflife=fast).mean()
        ema_slow = price.ewm(halflife=slow).mean()
        return ema_fast - ema_slow

    def compute_downside_dev_log(returns, window):
        def _downside(subarray):
            negatives = np.where(subarray < 0, subarray, 0.0)
            return np.sqrt((negatives**2).mean())
        dd = returns.rolling(window).apply(_downside, raw=True)
        return np.log(dd.replace(0, np.nan))

    feats = {}
    for hl in [8, 21, 63]:
        feats[f"FactorRet_EWMA_{hl}"] = ewma_return(factor_ser, hl)
    for w in [8, 21, 63]:
        feats[f"RSI_{w}"] = compute_rsi(factor_price, w)
    for w in [8, 21, 63]:
        feats[f"Stoch%K_{w}"] = compute_stoch(factor_price, w)

    feats["MACD_8_21"]   = compute_macd(factor_price, 8, 21)
    feats["MACD_21_63"]  = compute_macd(factor_price, 21, 63)
    feats["DownsideDev_log_21"] = compute_downside_dev_log(factor_ser, 21)
    
    return pd.DataFrame(feats)


In [19]:
# Train SJM
def train_sjm_single_asset(series, n_components=2, max_feats=12, lam=90, random_state=42):
    """
    Train SJM on one asset, using the backward-looking features from above.
    """
    feats_df = compute_sjm_features(series)
    feats_df = feats_df.replace([np.inf, -np.inf], np.nan).fillna(0.0)

    clipper = DataClipperStd(mul=3.0)
    scaler  = StandardScalerPD()

    X_clipped = clipper.fit_transform(feats_df)
    X_scaled  = scaler.fit_transform(X_clipped)
    X_arr = X_scaled.values

    sjm = SparseJumpModel(
        n_components=n_components,
        max_feats=max_feats,
        jump_penalty=lam,
        cont=False,
        max_iter=20,
        random_state=random_state
    )
    sjm.fit(X_arr)

    return sjm, clipper, scaler

#### 4. Allocation simulation

#### 4.1 Backtest a static portfolio with single allocaiton

In [20]:
# %%
def backtest_portfolio(returns, weights):
    """
    Backtest a static portfolio with a single allocation across the entire test period.
    Includes initial transaction cost.
    """
    T = len(returns)
    portfolio_vals = np.zeros(T)
    cost_init = np.sum(np.abs(weights)) * TRANSACTION_COST
    portfolio_vals[0] = 1.0 - cost_init

    for t in range(T - 1):
        ret_t = returns.iloc[t].values
        portfolio_vals[t + 1] = portfolio_vals[t] * (1.0 + np.dot(weights, ret_t))

    return portfolio_vals


#### 5.0 Performance Metric

In [21]:
# Performance Metric
def compute_performance_metrics(portfolio_vals, weight_history=None, annual_factor=250):
    """
    Calculate performance stats on the final portfolio_vals series.
    """
    pv = np.asarray(portfolio_vals)
    rets = np.diff(pv) / pv[:-1]

    ann_ret = rets.mean() * annual_factor
    cum_ret = pv[-1]/pv[0] - 1
    ann_vol = rets.std() * np.sqrt(annual_factor)

    negative_rets = rets[rets < 0]
    ddev = negative_rets.std() * np.sqrt(annual_factor) if len(negative_rets) > 0 else 0.0
    max_dd = (pv / np.maximum.accumulate(pv) - 1).min()

    sharpe = ann_ret/(ann_vol + 1e-12)
    sortino = ann_ret/ddev if ddev > 1e-12 else np.nan
    calmar  = ann_ret/abs(max_dd) if max_dd < 0 else np.nan

    if weight_history is not None and len(weight_history) > 1:
        turnovers = []
        for t in range(1, len(weight_history)):
            turnovers.append(np.sum(np.abs(weight_history[t] - weight_history[t-1])))
        avg_turnover = np.mean(turnovers)
    else:
        avg_turnover = 0.0

    return {
        "Annualized Return": ann_ret,
        "Cumulative Return": cum_ret,
        "Volatility": ann_vol,
        "Downside Deviation": ddev,
        "Max Drawdown": max_dd,
        "Sharpe Ratio": sharpe,
        "Sortino Ratio": sortino,
        "Calmar Ratio": calmar,
        "Turnover Rate": avg_turnover,
    }

#### 6. Helper Function: get per-regime means & std

In [22]:
def get_regime_means_stds_single_asset(asset_series, regime_assignments):
    """
    Returns two dicts:
      means_dict = {state: mean_return_in_that_state}
      stds_dict  = {state: std_return_in_that_state}
    """
    unique_states = np.unique(regime_assignments)
    regime_means = {}
    regime_stds  = {}
    for s in unique_states:
        data_in_s = asset_series[regime_assignments == s]
        if len(data_in_s) > 0:
            regime_means[s] = data_in_s.mean()
            regime_stds[s]  = data_in_s.std()
        else:
            # fallback if somehow empty
            regime_means[s] = asset_series.mean()
            regime_stds[s]  = asset_series.std()
    return regime_means, regime_stds

#### 7. Equal Unconditional Prior

In [23]:
# %%
def build_equal_unconditional_prior(df_train):
    """
    Build an 'equal unconditional prior' for BL:
      - One scalar unconditional return (average of all returns in df_train).
      - One scalar stdev (average stdev across assets).
      - One scalar correlation (average correlation).
    => Covariance has stdev^2 on diagonal and stdev^2 * avg_corr off-diagonal.
    => pi (prior) is a vector of identical means.
    """
    import numpy as np
    import pandas as pd

    overall_mean = df_train.stack().mean()            # scalar
    avg_stdev    = df_train.std(axis=0).mean()        # scalar
    corr_matrix  = df_train.corr()
    avg_corr     = corr_matrix.stack().mean()         # scalar

    n_assets = df_train.shape[1]
    uniform_corr = np.full((n_assets, n_assets), avg_corr)
    np.fill_diagonal(uniform_corr, 1.0)

    cov_flat = (avg_stdev**2) * uniform_corr

    assets = df_train.columns
    pi_series = pd.Series(np.full(n_assets, overall_mean), index=assets)
    cov_df    = pd.DataFrame(cov_flat, index=assets, columns=assets)
    return pi_series, cov_df


#### 8.0 Regime-Based BL with the 'Equal Unconditional Prior'

In [24]:
# %%
def regime_based_bl_backtest_flatprior(
    df_test, 
    states_test,
    regime_means_list,  # list of dict {state: mean}
    flat_pi,            # from build_equal_unconditional_prior
    flat_cov,           # from build_equal_unconditional_prior
    train_means_per_asset
):
    """
    Daily rebalancing using a 'flat' prior (equal unconditional mean, volatility, corr).
    The regime means are the 'views'. We do absolute views in Black-Litterman.
    """
    T_test = len(df_test)
    n_assets = len(df_test.columns)
    assets = df_test.columns

    portfolio_vals = np.zeros(T_test)
    portfolio_vals[0] = 1.0

    weight_history = np.zeros((T_test, n_assets))
    w_prev = np.ones(n_assets) / n_assets  # start equal weighted
    weight_history[0] = w_prev

    for t in range(T_test - 1):
        # 1) Construct daily 'absolute_views' from the regime
        view_vector = np.zeros(n_assets)
        for i in range(n_assets):
            current_regime = states_test[t, i]
            # fallback if not found
            mean_i = regime_means_list[i].get(current_regime, train_means_per_asset[i])
            view_vector[i] = mean_i
        day_views = dict(zip(assets, view_vector))

        # 2) Build BL with flat prior
        bl = BlackLittermanModel(
            cov_matrix    = flat_cov,
            pi            = flat_pi,
            absolute_views= day_views,
            tau           = BL_TAU,
            risk_aversion = 1.0
        )
        bl_rets = bl.bl_returns()
        bl_cov  = bl.bl_cov()

        # 3) Solve for weights
        ef = EfficientFrontier(bl_rets, bl_cov, weight_bounds=(0,1), solver="SCS")
        try:
            w_dict = ef.max_sharpe(risk_free_rate=RISK_FREE_RATE)
        except:
            w_dict = ef.min_volatility()
        w_array = np.array([w_dict[a] for a in assets])

        # 4) Transaction cost & portfolio update
        ret_t = df_test.iloc[t].values
        gross_growth = portfolio_vals[t] * (1.0 + np.dot(w_prev, ret_t))
        traded_fraction = np.sum(np.abs(w_array - w_prev))
        cost = gross_growth * traded_fraction * TRANSACTION_COST

        portfolio_vals[t + 1] = gross_growth - cost
        weight_history[t + 1] = w_array
        w_prev = w_array

    return portfolio_vals, weight_history


#### 9. Wrapper to run all strategies

In [25]:
# Strategy dedinition
def equal_weight_allocation(n_assets):
    return np.ones(n_assets)/n_assets

def inverse_vol_weights(returns):
    stds = returns.std(axis=0).values + 1e-12
    w = 1.0/stds
    return w / w.sum()

def static_mvo_allocation(returns):
    """
    Static mean-variance optimization using historical mean and sample covariance.
    A small ridge is added to improve numerical stability.
    If the SCS solver fails, fall back to the ECOS solver.
    """
    mu = expected_returns.mean_historical_return(returns, frequency=250)
    raw_cov = risk_models.sample_cov(returns)
    ridge_lambda = 1e-5
    cov = raw_cov + np.eye(len(raw_cov)) * ridge_lambda
    try:
        ef = EfficientFrontier(mu, cov, weight_bounds=(0,1), solver="SCS")
        ef_weights = ef.max_sharpe(risk_free_rate=RISK_FREE_RATE)
    except Exception as e:
        print("SCS solver in static MVO failed, switching to ECOS:", e)
        ef = EfficientFrontier(mu, cov, weight_bounds=(0,1), solver="ECOS")
        ef_weights = ef.max_sharpe(risk_free_rate=RISK_FREE_RATE)
    return ef.clean_weights()

In [26]:
#Full Run
def run_allocation(df, lam_sjm=90):
    """
    Trains HMM & SJM, runs:
     1) Equal Weight
     2) Inverse Vol
     3) Static MVO
     4) HMM-BL (flat prior)
     5) HMM-BL-KMeans (flat prior)
     6) SJM-BL (flat prior)
    """
    split_idx = int(len(df)*0.8)
    df_train = df.iloc[:split_idx]
    df_test  = df.iloc[split_idx:]

    # 1) Train per-asset models
    hmm_models_default = []
    hmm_models_kmeans  = []
    sjm_models         = []
    sjm_clippers       = []
    sjm_scalers        = []

    hmm_states_default_train = np.zeros((split_idx, N_ASSETS), dtype=int)
    hmm_states_kmeans_train  = np.zeros((split_idx, N_ASSETS), dtype=int)
    sjm_states_train         = np.zeros((split_idx, N_ASSETS), dtype=int)

    for i, asset in enumerate(ASSETS):
        series_train = df_train[asset]

        # HMM default
        hmm_d = train_hmm_single_asset_default(series_train)
        st_def = hmm_d.predict(series_train.values.reshape(-1,1))
        hmm_models_default.append(hmm_d)
        hmm_states_default_train[:, i] = st_def

        # HMM kmeans
        hmm_k = train_hmm_single_asset_kmeans(series_train)
        st_km = hmm_k.predict(series_train.values.reshape(-1,1))
        hmm_models_kmeans.append(hmm_k)
        hmm_states_kmeans_train[:, i] = st_km

        # SJM
        sjm_mod, sjm_clip, sjm_scale = train_sjm_single_asset(
            series_train, n_components=2, max_feats=12, lam=lam_sjm
        )
        feats_train = compute_sjm_features(series_train)
        feats_train = feats_train.replace([np.inf, -np.inf], np.nan).fillna(0.0)
        X_train_clip = sjm_clip.transform(feats_train)
        X_train_scl  = sjm_scale.transform(X_train_clip)
        st_sjm = sjm_mod.predict(X_train_scl)

        sjm_models.append(sjm_mod)
        sjm_clippers.append(sjm_clip)
        sjm_scalers.append(sjm_scale)
        sjm_states_train[:, i] = st_sjm

    # 2) In-sample regime means
    hmm_regime_means_default = []
    hmm_regime_means_kmeans  = []
    sjm_regime_means         = []
    train_means_per_asset    = []
    for i in range(N_ASSETS):
        asset_train = df_train.iloc[:, i]
        train_means_per_asset.append(asset_train.mean())

        m_def, _ = get_regime_means_stds_single_asset(asset_train, hmm_states_default_train[:, i])
        hmm_regime_means_default.append(m_def)

        m_km, _ = get_regime_means_stds_single_asset(asset_train, hmm_states_kmeans_train[:, i])
        hmm_regime_means_kmeans.append(m_km)

        m_sjm, _ = get_regime_means_stds_single_asset(asset_train, sjm_states_train[:, i])
        sjm_regime_means.append(m_sjm)

    # 3) Predict states on test
    T_test = len(df_test)
    hmm_states_default_test = np.zeros((T_test, N_ASSETS), dtype=int)
    hmm_states_kmeans_test  = np.zeros((T_test, N_ASSETS), dtype=int)
    sjm_states_test         = np.zeros((T_test, N_ASSETS), dtype=int)
    for i, asset in enumerate(ASSETS):
        arr_test = df_test[asset].values.reshape(-1,1)
        hmm_states_default_test[:, i] = hmm_models_default[i].predict(arr_test)
        hmm_states_kmeans_test[:, i]  = hmm_models_kmeans[i].predict(arr_test)

        feats_test = compute_sjm_features(df_test[asset])
        feats_test = feats_test.replace([np.inf, -np.inf], np.nan).fillna(0.0)
        X_test_clip = sjm_clippers[i].transform(feats_test)
        X_test_scl  = sjm_scalers[i].transform(X_test_clip)
        sjm_states_test[:, i] = sjm_models[i].predict(X_test_scl)

    # 4) Build the 'flat' unconditional prior
    flat_pi, flat_cov = build_equal_unconditional_prior(df_train)

    # 5) Evaluate strategies

    # (A) Equal Weight
    w_ew = equal_weight_allocation(N_ASSETS)
    pv_ew = backtest_portfolio(df_test, w_ew)
    w_hist_ew = np.tile(w_ew, (T_test, 1))

    # (B) Inverse Vol
    w_iv = inverse_vol_weights(df_test)
    pv_iv = backtest_portfolio(df_test, w_iv)
    w_hist_iv = np.tile(w_iv, (T_test, 1))

    # (C) Static MVO
    w_mvo_dict = static_mvo_allocation(df_train)
    w_mvo_arr = np.array([w_mvo_dict[a] for a in ASSETS])
    pv_mvo = backtest_portfolio(df_test, w_mvo_arr)
    w_hist_mvo = np.tile(w_mvo_arr, (T_test, 1))

    # (D) HMM-BL (Default)
    pv_hmmbl_def, w_hmmbl_def = regime_based_bl_backtest_flatprior(
        df_test,
        hmm_states_default_test,
        hmm_regime_means_default,
        flat_pi,
        flat_cov,
        train_means_per_asset
    )

    # (E) HMM-BL (KMeans)
    pv_hmmbl_km, w_hmmbl_km = regime_based_bl_backtest_flatprior(
        df_test,
        hmm_states_kmeans_test,
        hmm_regime_means_kmeans,
        flat_pi,
        flat_cov,
        train_means_per_asset
    )

    # (F) SJM-BL
    pv_sjmbl, w_sjmbl = regime_based_bl_backtest_flatprior(
        df_test,
        sjm_states_test,
        sjm_regime_means,
        flat_pi,
        flat_cov,
        train_means_per_asset
    )

    perf = {
        "EW": compute_performance_metrics(pv_ew, w_hist_ew),
        "IV": compute_performance_metrics(pv_iv, w_hist_iv),
        "MVO": compute_performance_metrics(pv_mvo, w_hist_mvo),
        "HMM-BL-Default": compute_performance_metrics(pv_hmmbl_def, w_hmmbl_def),
        "HMM-BL-KMeans":  compute_performance_metrics(pv_hmmbl_km, w_hmmbl_km),
        "SJM-BL":         compute_performance_metrics(pv_sjmbl, w_sjmbl)
    }

    return perf

#### 10. FUll scenario 1-state, 2-state, 3-state

In [27]:
def run_scenario_123(T_sim=1000, seed1=None, seed2=None, seed3=None):
    """
    Simulate & run 1-state, 2-state, 3-state data sets.
    """
    df1_full = simulate_1state_data(T_sim, seed=seed1)
    perf_1 = run_allocation(df1_full)

    df2_full, _ = simulate_2state_data(T_sim, seed=seed2)
    perf_2 = run_allocation(df2_full)

    df3_full, _ = simulate_3state_data(T_sim, seed=seed3)
    perf_3 = run_allocation(df3_full)

    return {
        "1state": perf_1,
        "2state": perf_2,
        "3state": perf_3
    }

def single_monte_carlo_run(run_id, T_sim=1000):
    print(f"Running simulation {run_id}...")
    seed_for_1state = run_id * 1000 + 11
    seed_for_2state = run_id * 1000 + 22
    seed_for_3state = run_id * 1000 + 33

    results = run_scenario_123(
        T_sim=T_sim,
        seed1=seed_for_1state,
        seed2=seed_for_2state,
        seed3=seed_for_3state
    )
    return results

def run_monte_carlo_study(n_runs=10, T_sim=1000):
    """
    Runs multiple replications in parallel. Then does Wilcoxon test on Sharpe Ratios.
    """
    n_cores = multiprocessing.cpu_count()
    print(f"detected {n_cores} cores")

    all_results = Parallel(n_jobs=n_cores)(
        delayed(single_monte_carlo_run)(i+1, T_sim) for i in range(n_runs)
    )

    # Strategies
    strategies = ["EW", "IV", "MVO", "HMM-BL-Default", "HMM-BL-KMeans", "SJM-BL"]
    scenarios  = ["1state", "2state", "3state"]

    # Sharpe data for Wilcoxon
    sharpe_data = {sc: {st: [] for st in strategies} for sc in scenarios}
    all_metrics = {sc: {} for sc in scenarios}
    for sc in scenarios:
        all_metrics[sc] = {}
        for st in strategies:
            all_metrics[sc][st] = {
                "Annualized Return": [],
                "Cumulative Return": [],
                "Volatility": [],
                "Downside Deviation": [],
                "Max Drawdown": [],
                "Sharpe Ratio": [],
                "Sortino Ratio": [],
                "Calmar Ratio": [],
                "Turnover Rate": [],
            }

    # Collect distribution of metrics
    for run_res in all_results:
        for sc in scenarios:
            for st in strategies:
                metrics_dict = run_res[sc][st]
                sharpe_data[sc][st].append(metrics_dict["Sharpe Ratio"])
                for mkey in all_metrics[sc][st]:
                    all_metrics[sc][st][mkey].append(metrics_dict[mkey])

    # Wilcoxon test: SJM-BL vs others (Sharpe)
    print("\n==== Wilcoxon Tests (SJM-BL vs. others) ====")
    wilcoxon_rows = []
    for sc in scenarios:
        sjm_sharpes = sharpe_data[sc]["SJM-BL"]
        for st in strategies:
            if st == "SJM-BL":
                continue
            other_sharpes = sharpe_data[sc][st]
            try:
                stat, pval = wilcoxon(sjm_sharpes, other_sharpes, alternative='two-sided')
            except ValueError:
                stat, pval = np.nan, np.nan
            print(f"{sc} | SJM-BL vs {st}: stat={stat:.4f}, p={pval:.4g}")
            wilcoxon_rows.append({
                "Scenario": sc,
                "Comparison": f"SJM-BL vs {st}",
                "Statistic": stat,
                "p-value": pval
            })

    df_wilcoxon = pd.DataFrame(wilcoxon_rows)
    print("\nWilcoxon Results Table:")
    print(df_wilcoxon.to_string(index=False))

    # Print average metrics
    print("\n==== Average Performance Metrics (across runs) ====")
    for sc in scenarios:
        rows = []
        for st in strategies:
            metric_means = {}
            for mkey, vals in all_metrics[sc][st].items():
                metric_means[mkey] = np.mean(vals)
            row = {"Strategy": st}
            row.update(metric_means)
            rows.append(row)
        df_avg = pd.DataFrame(rows)
        df_avg.set_index("Strategy", inplace=True)
        print(f"\n--- {sc.upper()} ---")
        print(df_avg.to_string())

    return sharpe_data, all_metrics, all_results, df_wilcoxon

#### Main Exectution

In [28]:

if __name__ == "__main__":
    # Example: run 5 replications
    n_simulations = 16
    T_sim = 5000

    # Run parallel simulation
    sharpe_data, all_metrics, all_runs, df_wilcoxon = run_monte_carlo_study(
        n_runs=n_simulations,
        T_sim=T_sim
    )


detected 8 cores
Running simulation 4...
Running simulation 3...
Running simulation 2...
Running simulation 8...
Running simulation 6...
Running simulation 7...
Running simulation 5...
Running simulation 1...


Model is not converging.  Current: 13399.97468682194 is not greater than 13400.026712611634. Delta is -0.05202578969328897
Model is not converging.  Current: 13430.843406489204 is not greater than 13430.876458913983. Delta is -0.033052424778361456
Model is not converging.  Current: 13417.123647420682 is not greater than 13417.135908083997. Delta is -0.012260663315828424
Model is not converging.  Current: 13461.444512960772 is not greater than 13461.462574048224. Delta is -0.018061087452224456
Model is not converging.  Current: 13477.861014403064 is not greater than 13477.897984230327. Delta is -0.03696982726251008
Model is not converging.  Current: 13384.939811275972 is not greater than 13384.963567917232. Delta is -0.023756641259751632
Model is not converging.  Current: 13458.44335717629 is not greater than 13458.466881097735. Delta is -0.02352392144530313
Model is not converging.  Current: 13447.338113765592 is not greater than 13447.353301179308. Delta is -0.015187413715466391
Model

Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!


Model is not converging.  Current: 13448.95435023117 is not greater than 13448.955919831802. Delta is -0.0015696006321377354
Model is not converging.  Current: 13476.085714253271 is not greater than 13476.164444167523. Delta is -0.07872991425210785


Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!




Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!




Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!




Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!




Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!




Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!




Error in LDL factorization when computing the nonzero elements. The problem seems to be non-convex.
factor_status: 0, num_vars: 7
Error in LDL initial factorization.
ERROR: init_lin_sys_work failure
SCS solver in static MVO failed, switching to ECOS: ScsWork allocation error!


Model is not converging.  Current: 13475.803503442388 is not greater than 13475.807757561965. Delta is -0.004254119576216908
Model is not converging.  Current: 13475.802548561913 is not greater than 13475.804507009967. Delta is -0.0019584480542107485
Model is not converging.  Current: 13519.865165012987 is not greater than 13519.95135763685. Delta is -0.08619262386309856
Model is not converging.  Current: 13392.537879476446 is not greater than 13392.62322514505. Delta is -0.08534566860362247
Model is not converging.  Current: 13392.563560121065 is not greater than 13392.637893202103. Delta is -0.07433308103827585
Model is not converging.  Current: 13448.980465230197 is not greater than 13449.056881712593. Delta is -0.07641648239587084
Model is not converging.  Current: 13448.071973352226 is not greater than 13448.135226222103. Delta is -0.06325286987703294
Model is not converging.  Current: 13272.79084790031 is not greater than 13272.799533903004. Delta is -0.008686002693139017
Model i

SCS solver in static MVO failed, switching to ECOS: at least one of the assets must have an expected return exceeding the risk-free rate


ValueError: at least one of the assets must have an expected return exceeding the risk-free rate