### SJM-BL Simulation Study with Parallel Monte Carlo

This code runs multiple Monte Carlo simulations of the 1-state, 2-state, and 3-state processes, computes performance metrics for each run, and then uses a Wilcoxon test to compare SJM-BL against all other strategies.

#### 1.0 Loading packages

In [17]:

# 1.0 Loading packages (no change)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon
from joblib import Parallel, delayed
import multiprocessing

# Hidden Markov Model utilities
from hmmlearn.hmm import GaussianHMM
from sklearn.cluster import KMeans

# PyPortfolioOpt
from pypfopt.black_litterman import BlackLittermanModel
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models

# Sparse Jump Model utilities
from jumpmodels.sparse_jump import SparseJumpModel
from jumpmodels.preprocess import StandardScalerPD, DataClipperStd


### 2.0 Data Simulation

#### 2.1 Simulating the 1-state data (Gaussian, not Student-t)
- All assets share the same mean/vol and correlation structure.

In [18]:
ASSETS = ["Value", "Growth", "LowVol", "Size", "Momentum", "Quality"]
N_ASSETS = len(ASSETS)
CONST_RET = 0.000461   # Hypothetical daily return used
RISK_FREE_RATE = 0.02 / 252
TRANSACTION_COST = 0.0005  # Cost per dollar transacted
BL_TAU = 0.1  # Black-Litterman tau parameter

In [19]:
def simulate_1state_data(num_days, seed=None):
    """
    Single-state returns (Gaussian). 
    Kept correlation and variance, removed Student-t scaling.
    """
    local_rng = np.random.default_rng(seed)
    mu = 0.000461
    sig = 0.008388

    corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(corr, 1.0)
    cov = np.outer(np.full(N_ASSETS, sig), np.full(N_ASSETS, sig)) * corr

    # Gaussian draws
    rets = local_rng.multivariate_normal(mean=np.full(N_ASSETS, mu), 
                                         cov=cov, 
                                         size=num_days)
    return pd.DataFrame(rets, columns=ASSETS)

#### 2.2 Simulating 2-state data

This function simulates a 2-state HMM (bull/bear) with state‐dependent Student‑t returns.

In [20]:
def simulate_2state_data(num_days, seed=None):
    """
    2-state HMM-like simulation (Gaussian).
    """
    local_rng = np.random.default_rng(seed)

    # Transition matrix for 2-state
    transmat = np.array([
        [0.9976, 0.0024],
        [0.0232, 0.9768]
    ])
    states = np.zeros(num_days, dtype=int)
    states[0] = local_rng.integers(2)

    for t in range(1, num_days):
        states[t] = local_rng.choice(2, p=transmat[states[t - 1]])

    mu_dict = {0: 0.0006, 1: -0.000881}
    sig_dict = {0: 0.00757, 1: 0.0163}
    corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(corr, 1.0)

    rets = np.zeros((num_days, N_ASSETS))
    for t in range(num_days):
        s = states[t]
        mu_s = np.full(N_ASSETS, mu_dict[s])
        sig_s = np.full(N_ASSETS, sig_dict[s])
        cov_s = np.outer(sig_s, sig_s) * corr
        # Gaussian sample using the state's mean/cov
        rets[t] = local_rng.multivariate_normal(mean=mu_s, cov=cov_s)

    return pd.DataFrame(rets, columns=ASSETS), states

#### 2.3 Simulating 3-state data

We are simulating 6 fictional assets which are representing the 6 factors in our framework

In [21]:
def simulate_3state_data(num_days, seed=None):
    """
    3-state HMM-like simulation (Gaussian).
    """
    local_rng = np.random.default_rng(seed)

    transmat = np.array([
        [0.9989, 0.0004, 0.0007],
        [0.0089, 0.9904, 0.0007],
        [0.0089, 0.0004, 0.9907]
    ])
    states = np.zeros(num_days, dtype=int)
    states[0] = local_rng.integers(3)

    for t in range(1, num_days):
        states[t] = local_rng.choice(3, p=transmat[states[t - 1]])

    mu_list = [0.0008, 0.0, -0.003586]
    sig_list = [0.0070, 0.0050, 0.01897]
    corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(corr, 1.0)

    rets = np.zeros((num_days, N_ASSETS))
    for t in range(num_days):
        s = states[t]
        mu_s = np.full(N_ASSETS, mu_list[s])
        sig_s = np.full(N_ASSETS, sig_list[s])
        cov_s = np.outer(sig_s, sig_s) * corr
        # Gaussian sample
        rets[t] = local_rng.multivariate_normal(mean=mu_s, cov=cov_s)

    return pd.DataFrame(rets, columns=ASSETS), states

### 3.0 Training Regime Models

#### 3.1 Training HMM using kmeans clustering initialization

In [22]:
def run_mle(observations, n_components=2, init_type='default', seed=None):
    model = GaussianHMM(n_components=n_components, covariance_type='diag',
                        n_iter=100, random_state=seed)

    if init_type == 'default':
        model.startprob_ = np.array([1.0, 0.0])
        model.transmat_ = np.array([[0.9, 0.1],
                                    [0.1, 0.9]])
        model.means_ = np.zeros((n_components, observations.shape[1]))
        model.covars_ = np.full((n_components, observations.shape[1]), 1e-10)
        model.init_params = ''  # stops re-initializing
    elif init_type == 'kmeans':
        km = KMeans(n_clusters=n_components, n_init=10, random_state=seed)
        labels = km.fit_predict(observations)
        means, covars = [], []
        for i in range(n_components):
            obs_i = observations[labels == i]
            means.append(np.mean(obs_i, axis=0))
            covars.append(np.var(obs_i, axis=0) + 1e-10)
        model.startprob_ = np.ones(n_components) / n_components
        model.transmat_ = np.ones((n_components, n_components)) / n_components
        model.means_ = np.array(means)
        model.covars_ = np.array(covars)
        model.init_params = ''

    model.fit(observations)
    pred_states = model.predict(observations)
    return model, pred_states

In [23]:
def run_mle_default(observations, seed=None):
    return run_mle(observations, init_type='default', seed=seed)

def run_mle_kmeans(observations, seed=None):
    return run_mle(observations, init_type='kmeans', seed=seed)

def train_hmm_single_asset_default(series, n_components=2, random_state=42):
    X = series.values.reshape(-1, 1)
    model, _ = run_mle_default(X, seed=random_state)
    return model

def train_hmm_single_asset_kmeans(series, n_components=2, random_state=42):
    X = series.values.reshape(-1, 1)
    model, _ = run_mle_kmeans(X, seed=random_state)
    return model

#### 3.2 Training Sparse Jump model with max_feats=9 and lambda=90
##### 3.2.1 Defining feature selection framework

In [24]:
def compute_temporal_features_1d(y, window_len):
    """
    Creates simple temporal features (eg. mean, std in a window).
    """
    T = len(y)
    feats = np.zeros((T, 9))
    half = (window_len - 1) // 2

    for t in range(T):
        feats[t, 0] = y[t]
        feats[t, 1] = abs(y[t] - y[t - 1]) if t > 0 else 0.0
        feats[t, 2] = abs(y[t + 1] - y[t]) if t < T - 1 else 0.0

        left_c = max(0, t - half)
        right_c = min(T, t + half + 1)
        window_c = y[left_c:right_c]
        feats[t, 3] = np.mean(window_c)
        feats[t, 4] = np.std(window_c)

        left_l = max(0, t - window_len)
        window_l = y[left_l:t]
        feats[t, 5] = np.mean(window_l) if len(window_l) > 0 else 0.0
        feats[t, 6] = np.std(window_l) if len(window_l) > 0 else 0.0

        window_r = y[t:t + window_len]
        feats[t, 7] = np.mean(window_r) if len(window_r) > 0 else 0.0
        feats[t, 8] = np.std(window_r) if len(window_r) > 0 else 0.0

    return feats

def combine_features_1d(y, window_list=[5, 13]):
    """
    Stacks multiple window-based feature blocks.
    """
    feat_list = []
    for w in window_list:
        feat_list.append(compute_temporal_features_1d(y, w))
    return np.hstack(feat_list)

In [25]:
def train_sjm_single_asset(series, n_components=2, max_feats=9, lam=120, random_state=42):
    """
    Sparse Jump Model training for a single asset.
    """
    y = series.values
    X_raw = combine_features_1d(y)
    clipper = DataClipperStd(mul=3.0)
    scaler = StandardScalerPD()
    X_clipped = clipper.fit_transform(pd.DataFrame(X_raw))
    X_scaled = scaler.fit_transform(X_clipped)
    X_arr = X_scaled.values

    sjm = SparseJumpModel(
        n_components=n_components,
        max_feats=max_feats,
        jump_penalty=lam,
        cont=False,
        max_iter=20,
        random_state=random_state
    )
    sjm.fit(X_arr)
    return sjm

### 4.0 Allocation simulation

#### 4.1 Allocation workhorse functions
In this code we create the in which we fit the following models (each done in a seperate for loop such that we can store the relevant data such as return, weights, etc. in seperate dfs):
1. Equal weigted
2. Inverse volatility weighted
3. Mean-Variance-Optimal static portfolio
4. Hidden Markov Model Black Litterman where infered states are the identified regimes
5. Sparse Jump Model Black Litterman where infered states are the identified regimes

In [26]:
def backtest_portfolio(returns, weights):
    """
    Backtest a static portfolio with single allocation.
    Includes initial transaction cost for entering the position.
    """
    T = len(returns)
    portfolio_vals = np.zeros(T)
    # Subtract initial transaction cost on day 0
    cost_init = 1.0 * np.sum(np.abs(weights)) * TRANSACTION_COST
    portfolio_vals[0] = 1.0 - cost_init  # start net of initial cost

    for t in range(T - 1):
        ret_t = returns.iloc[t].values
        # No daily rebalancing, so no daily cost
        portfolio_vals[t + 1] = portfolio_vals[t] * (1.0 + np.dot(weights, ret_t))

    return portfolio_vals

def equal_weight_allocation(n_assets):
    """Equal weight."""
    return np.ones(n_assets) / n_assets

def inverse_vol_weights(returns):
    """Inverse volatility weights."""
    stds = returns.std(axis=0).values + 1e-12
    w = 1.0 / stds
    return w / w.sum()

def static_mvo_allocation(returns):
    """
    Simple static mean-variance optimization using PyPortfolioOpt.
    """
    from pypfopt import expected_returns
    # We keep a constant mu for demonstration
    mu = pd.Series(CONST_RET, index=returns.columns)
    cov = risk_models.sample_cov(returns)

    ef = EfficientFrontier(mu, cov, weight_bounds=(0, 1), solver="SCS")
    ef_weights = ef.max_sharpe(risk_free_rate=RISK_FREE_RATE)
    return ef.clean_weights()

def black_litterman_allocation(view_vector, prior_cov):
    """
    Basic Black-Litterman with absolute views on each asset.
    """
    pi = pd.Series(CONST_RET, index=prior_cov.columns)
    viewdict = {asset: v for asset, v in zip(ASSETS, view_vector)}

    bl = BlackLittermanModel(
        cov_matrix=prior_cov,
        pi=pi,
        absolute_views=viewdict,
        tau=BL_TAU,
        risk_aversion=1
    )
    bl_rets = bl.bl_returns()  
    bl_cov = bl.bl_cov()

    ef = EfficientFrontier(bl_rets, bl_cov, weight_bounds=(0, 1), solver="SCS")
    if max(bl_rets) <= RISK_FREE_RATE:
        ef_weights = ef.min_volatility()
    else:
        ef_weights = ef.max_sharpe(risk_free_rate=RISK_FREE_RATE)

    clean_weights = ef.clean_weights()
    w_array = np.array([clean_weights[a] for a in prior_cov.columns])
    return w_array

### 5.0 Performance metric evaluation:
Here we divide the performance metric into. We assume 250 data points to be 1 year off trading:
1. Return-Based Metrics 

Annualized Return: Average return per year. 

Cumulative Return: Total portfolio growth over time. 

2. Risk-Based Metrics 

Volatility: Standard deviation of returns. 

Downside Deviation: Measures negative return fluctuations. 

Max Drawdown (MDD): Largest portfolio decline from peak to trough. 

3. Risk-Adjusted Metrics 

Sharpe Ratio: Return per unit of total risk. 

Sortino Ratio: Return per unit of downside risk. 

Calmar Ratio: Return relative to max drawdown. 

4. Portfolio Stability & Adaptation 

Turnover Rate: Measures frequency of asset reallocation. 


We further split the performance three seperate tables with 1-state process, 2-state process, 3-state process




In [27]:
def compute_performance_metrics(portfolio_vals, weight_history=None, annual_factor=250):
    """
    Calculate performance stats on the final portfolio_vals series.
    """
    pv = np.asarray(portfolio_vals)
    rets = np.diff(pv) / pv[:-1]

    ann_ret = rets.mean() * annual_factor
    cum_ret = pv[-1] / pv[0] - 1
    ann_vol = rets.std() * np.sqrt(annual_factor)

    negative_rets = rets[rets < 0]
    ddev = negative_rets.std() * np.sqrt(annual_factor) if len(negative_rets) > 0 else 0.0
    max_dd = (pv / np.maximum.accumulate(pv) - 1).min()

    sharpe = ann_ret / (ann_vol + 1e-12)
    sortino = ann_ret / ddev if ddev > 1e-12 else np.nan
    calmar = ann_ret / abs(max_dd) if max_dd < 0 else np.nan

    if weight_history is not None and len(weight_history) > 1:
        turnovers = []
        for t in range(1, len(weight_history)):
            turnovers.append(np.sum(np.abs(weight_history[t] - weight_history[t - 1])))
        avg_turnover = np.mean(turnovers)
    else:
        avg_turnover = 0.0

    return {
        "Annualized Return": ann_ret,  # net of costs included in PV path
        "Cumulative Return": cum_ret,
        "Volatility": ann_vol,
        "Downside Deviation": ddev,
        "Max Drawdown": max_dd,
        "Sharpe Ratio": sharpe,
        "Sortino Ratio": sortino,
        "Calmar Ratio": calmar,
        "Turnover Rate": avg_turnover,
    }

#### 6.0 Helper function

In [28]:
def get_regime_means_single_asset(asset_series, regime_assignments):
    """
    Returns {regime_label: mean_return_in_that_regime}.
    """
    unique_states = np.unique(regime_assignments)
    regime_means = {}
    for s in unique_states:
        regime_means[s] = asset_series[regime_assignments == s].mean()
    return regime_means

#### 7.0 Regime Based Asset Allocaiton

In [29]:
def regime_based_bl_backtest(df_test, states_test, regime_means_list, prior_cov, train_means_per_asset):
    """
    Daily rebalancing. On each day t:
      1) Identify regime per asset -> set BL views
      2) Compute new BL weights
      3) Deduct transaction cost on rebal
    """
    T_test = len(df_test)
    portfolio_vals = np.zeros(T_test)
    portfolio_vals[0] = 1.0

    weight_history = np.zeros((T_test, N_ASSETS))
    w_prev = equal_weight_allocation(N_ASSETS)
    weight_history[0] = w_prev

    for t in range(T_test - 1):
        # Construct BL views from current states
        view_vector = []
        for i in range(N_ASSETS):
            current_regime = states_test[t, i]
            # if regime unknown, fallback to train mean
            view_val = regime_means_list[i].get(current_regime, train_means_per_asset[i])
            view_vector.append(view_val)

        # Rebalance
        w_bl = black_litterman_allocation(view_vector, prior_cov)

        # Calculate daily return
        ret_t = df_test.iloc[t].values
        gross_growth = portfolio_vals[t] * (1.0 + np.dot(w_prev, ret_t))

        # Calculate transaction cost (portfolio-based)
        traded_fraction = np.sum(np.abs(w_bl - w_prev))
        cost = gross_growth * traded_fraction * TRANSACTION_COST

        # Net portfolio after cost
        portfolio_vals[t + 1] = gross_growth - cost
        weight_history[t + 1] = w_bl
        w_prev = w_bl

    return portfolio_vals, weight_history

#### 8.0 Wrapper to run full allocation

In [30]:
def run_allocation(df, lam_sjm=120):
    """
    Splits df into train/test. Trains HMM and SJM per asset. 
    Then runs:
      1) Equal Weight (static)
      2) Inverse Vol (static)
      3) Static MVO
      4) HMM-BL (default)
      5) HMM-BL (kmeans)
      6) SJM-BL
    Returns performance dict for each strategy.
    """
    split_idx = int(len(df) * 0.8)
    df_train = df.iloc[:split_idx]
    df_test = df.iloc[split_idx:]
    prior_cov = df_train.cov()

    # Train per-asset models
    hmm_models_default = []
    hmm_models_kmeans = []
    sjm_models = []
    hmm_states_default_train = np.zeros((split_idx, N_ASSETS), dtype=int)
    hmm_states_kmeans_train = np.zeros((split_idx, N_ASSETS), dtype=int)
    sjm_states_train = np.zeros((split_idx, N_ASSETS), dtype=int)

    for i, asset in enumerate(ASSETS):
        series_train = df_train[asset]

        # HMM default
        hmm_mod_default = train_hmm_single_asset_default(series_train)
        hmm_states_default = hmm_mod_default.predict(series_train.values.reshape(-1, 1))
        hmm_models_default.append(hmm_mod_default)
        hmm_states_default_train[:, i] = hmm_states_default

        # HMM kmeans
        hmm_mod_kmeans = train_hmm_single_asset_kmeans(series_train)
        hmm_states_kmeans = hmm_mod_kmeans.predict(series_train.values.reshape(-1, 1))
        hmm_models_kmeans.append(hmm_mod_kmeans)
        hmm_states_kmeans_train[:, i] = hmm_states_kmeans

        # SJM
        sjm_mod = train_sjm_single_asset(series_train, n_components=2, lam=lam_sjm)
        X_raw = combine_features_1d(series_train.values)
        clipper = DataClipperStd(mul=3.0)
        scaler = StandardScalerPD()
        X_test_clipped = clipper.fit_transform(pd.DataFrame(X_raw))
        X_test_scaled = scaler.fit_transform(X_test_clipped)
        sjm_states = sjm_mod.predict(X_test_scaled.values)
        sjm_models.append(sjm_mod)
        sjm_states_train[:, i] = sjm_states

    # In-sample regime means
    hmm_regime_means_default = []
    hmm_regime_means_kmeans = []
    sjm_regime_means = []
    train_means_per_asset = []

    for i in range(N_ASSETS):
        asset_train = df_train.iloc[:, i]
        train_means_per_asset.append(asset_train.mean())

        hmm_regime_means_default.append(get_regime_means_single_asset(asset_train, 
                                            hmm_states_default_train[:, i]))
        hmm_regime_means_kmeans.append(get_regime_means_single_asset(asset_train, 
                                            hmm_states_kmeans_train[:, i]))
        sjm_regime_means.append(get_regime_means_single_asset(asset_train, 
                                            sjm_states_train[:, i]))

    # Predict states on test set
    T_test = len(df_test)
    hmm_states_default_test = np.zeros((T_test, N_ASSETS), dtype=int)
    hmm_states_kmeans_test = np.zeros((T_test, N_ASSETS), dtype=int)
    sjm_states_test = np.zeros((T_test, N_ASSETS), dtype=int)

    for i, asset in enumerate(ASSETS):
        asset_series_test = df_test[asset].values.reshape(-1, 1)
        hmm_states_default_test[:, i] = hmm_models_default[i].predict(asset_series_test)
        hmm_states_kmeans_test[:, i] = hmm_models_kmeans[i].predict(asset_series_test)

        X_test_raw = combine_features_1d(df_test[asset].values)
        clipper = DataClipperStd(mul=3.0)
        scaler = StandardScalerPD()
        X_test_clipped = clipper.fit_transform(pd.DataFrame(X_test_raw))
        X_test_scaled = scaler.fit_transform(X_test_clipped)
        sjm_states_test[:, i] = sjm_models[i].predict(X_test_scaled.values)

    # Strategy 1: Equal Weight (static)
    w_ew = equal_weight_allocation(N_ASSETS)
    pv_ew = backtest_portfolio(df_test, w_ew)
    w_hist_ew = np.tile(w_ew, (T_test, 1))

    # Strategy 2: Inverse Vol (static)
    w_iv = inverse_vol_weights(df_test)
    pv_iv = backtest_portfolio(df_test, w_iv)
    w_hist_iv = np.tile(w_iv, (T_test, 1))

    # Strategy 3: Static MVO
    w_mvo_dict = static_mvo_allocation(df_train)
    w_mvo_arr = np.array([w_mvo_dict[a] for a in ASSETS])
    pv_mvo = backtest_portfolio(df_test, w_mvo_arr)
    w_hist_mvo = np.tile(w_mvo_arr, (T_test, 1))

    # Strategy 4: HMM-BL (Default)
    pv_hmmbl_default, w_hmmbl_default = regime_based_bl_backtest(
        df_test, hmm_states_default_test,
        hmm_regime_means_default, prior_cov, train_means_per_asset
    )

    # Strategy 5: HMM-BL (KMeans)
    pv_hmmbl_kmeans, w_hmmbl_kmeans = regime_based_bl_backtest(
        df_test, hmm_states_kmeans_test,
        hmm_regime_means_kmeans, prior_cov, train_means_per_asset
    )

    # Strategy 6: SJM-BL
    pv_sjmbl, w_sjmbl = regime_based_bl_backtest(
        df_test, sjm_states_test,
        sjm_regime_means, prior_cov, train_means_per_asset
    )

    # Perf metrics for each
    perf = {
        "EW": compute_performance_metrics(pv_ew, w_hist_ew),
        "IV": compute_performance_metrics(pv_iv, w_hist_iv),
        "MVO": compute_performance_metrics(pv_mvo, w_hist_mvo),
        "HMM-BL-Default": compute_performance_metrics(pv_hmmbl_default, w_hmmbl_default),
        "HMM-BL-KMeans": compute_performance_metrics(pv_hmmbl_kmeans, w_hmmbl_kmeans),
        "SJM-BL": compute_performance_metrics(pv_sjmbl, w_sjmbl)
    }

    return perf

#### 9.0 Full scenario: 1-state, 2-state, 3-state runs



In [31]:
def run_scenario_123(T_sim=1000, seed1=None, seed2=None, seed3=None):
    """
    Simulate & run 1-state, 2-state, 3-state data sets.
    """
    df1_full = simulate_1state_data(T_sim, seed=seed1)
    perf_1 = run_allocation(df1_full)

    df2_full, _ = simulate_2state_data(T_sim, seed=seed2)
    perf_2 = run_allocation(df2_full)

    df3_full, _ = simulate_3state_data(T_sim, seed=seed3)
    perf_3 = run_allocation(df3_full)

    return {
        "1state": perf_1,
        "2state": perf_2,
        "3state": perf_3
    }

def single_monte_carlo_run(run_id, T_sim=1000):
    """
    A single replication with 3 scenarios (1-,2-,3-state).
    """
    print(f"Running simulation {run_id}...")
    seed_for_1state = run_id * 1000 + 11
    seed_for_2state = run_id * 1000 + 22
    seed_for_3state = run_id * 1000 + 33

    results = run_scenario_123(
        T_sim=T_sim,
        seed1=seed_for_1state,
        seed2=seed_for_2state,
        seed3=seed_for_3state
    )
    return results

def run_monte_carlo_study(n_runs=10, T_sim=1000):
    """
    Runs multiple replications in parallel. Then does Wilcoxon test on Sharpe Ratios.
    """
    n_cores = multiprocessing.cpu_count()
    print(f"detected {n_cores} cores")

    all_results = Parallel(n_jobs=n_cores)(
        delayed(single_monte_carlo_run)(i+1, T_sim) for i in range(n_runs)
    )

    # Strategies
    strategies = ["EW", "IV", "MVO", "HMM-BL-Default", "HMM-BL-KMeans", "SJM-BL"]
    scenarios = ["1state", "2state", "3state"]

    # Sharpe data for Wilcoxon
    sharpe_data = {sc: {st: [] for st in strategies} for sc in scenarios}

    # Store entire distribution of metrics
    all_metrics = {}
    for sc in scenarios:
        all_metrics[sc] = {}
        for st in strategies:
            all_metrics[sc][st] = {
                "Annualized Return": [],
                "Cumulative Return": [],
                "Volatility": [],
                "Downside Deviation": [],
                "Max Drawdown": [],
                "Sharpe Ratio": [],
                "Sortino Ratio": [],
                "Calmar Ratio": [],
                "Turnover Rate": [],
            }

    # Collect metrics
    for run_res in all_results:
        for sc in scenarios:
            for st in strategies:
                metrics_dict = run_res[sc][st]
                sharpe_data[sc][st].append(metrics_dict["Sharpe Ratio"])
                for mkey in all_metrics[sc][st]:
                    all_metrics[sc][st][mkey].append(metrics_dict[mkey])

    # Wilcoxon test: SJM-BL vs others (Sharpe)
    print("\n==== Wilcoxon Tests (SJM-BL vs. others) ====")
    wilcoxon_rows = []
    for sc in scenarios:
        sjm_sharpes = sharpe_data[sc]["SJM-BL"]
        for st in strategies:
            if st == "SJM-BL":
                continue
            other_sharpes = sharpe_data[sc][st]
            stat, pval = wilcoxon(sjm_sharpes, other_sharpes, alternative='two-sided')
            print(f"{sc} | SJM-BL vs {st}: stat={stat:.4f}, p={pval:.4g}")
            wilcoxon_rows.append({
                "Scenario": sc,
                "Comparison": f"SJM-BL vs {st}",
                "Statistic": stat,
                "p-value": pval
            })

    df_wilcoxon = pd.DataFrame(wilcoxon_rows)
    print("\nWilcoxon Results Table:")
    print(df_wilcoxon.to_string(index=False))

    # Print average metrics
    print("\n==== Average Performance Metrics (across runs) ====")
    for sc in scenarios:
        rows = []
        for st in strategies:
            metric_means = {}
            for mkey, vals in all_metrics[sc][st].items():
                metric_means[mkey] = np.mean(vals)
            row = {"Strategy": st}
            row.update(metric_means)
            rows.append(row)
        df_avg = pd.DataFrame(rows)
        df_avg.set_index("Strategy", inplace=True)
        print(f"\n--- {sc.upper()} ---")
        print(df_avg.to_string())

    return sharpe_data, all_metrics, all_results, df_wilcoxon

### 10.0 Main execution: Run simulation and output performance metrics

In [32]:
if __name__ == "__main__":
    # Example: run 5 replications
    n_simulations = 24
    T_sim = 5000

    # Run parallel simulation
    sharpe_data, all_metrics, all_runs, df_wilcoxon = run_monte_carlo_study(
        n_runs=n_simulations,
        T_sim=T_sim
    )

detected 8 cores
Running simulation 1...
Running simulation 3...Running simulation 5...
Running simulation 2...Running simulation 4...

Running simulation 7...

Running simulation 8...
Running simulation 6...


Model is not converging.  Current: 13399.97468682194 is not greater than 13400.026712611634. Delta is -0.05202578969328897
Model is not converging.  Current: 13461.444512960772 is not greater than 13461.462574048224. Delta is -0.018061087452224456
Model is not converging.  Current: 13430.843406489204 is not greater than 13430.876458913983. Delta is -0.033052424778361456
Model is not converging.  Current: 13417.123647420682 is not greater than 13417.135908083997. Delta is -0.012260663315828424
Model is not converging.  Current: 13458.44335717629 is not greater than 13458.466881097735. Delta is -0.02352392144530313
Model is not converging.  Current: 13477.861014403064 is not greater than 13477.897984230327. Delta is -0.03696982726251008
Model is not converging.  Current: 13384.283015944642 is not greater than 13384.291657760492. Delta is -0.008641815849841805
Model is not converging.  Current: 13384.939811275972 is not greater than 13384.963567917232. Delta is -0.023756641259751632
Model

Running simulation 9...


Model is not converging.  Current: 13441.111122645774 is not greater than 13441.278215071468. Delta is -0.16709242569413618


Running simulation 10...
Running simulation 11...


Model is not converging.  Current: 13501.287998620848 is not greater than 13501.35608111622. Delta is -0.06808249537243682
Model is not converging.  Current: 13350.612405658294 is not greater than 13350.694256326968. Delta is -0.0818506686737237


Running simulation 12...
Running simulation 13...
Running simulation 14...


Model is not converging.  Current: 13391.312943307968 is not greater than 13391.321683719358. Delta is -0.00874041138922621
Model is not converging.  Current: 13424.002702103417 is not greater than 13424.009890215908. Delta is -0.007188112491348875
Model is not converging.  Current: 13460.147096765184 is not greater than 13460.149037876685. Delta is -0.001941111500855186
Model is not converging.  Current: 13362.825559522327 is not greater than 13362.836050140642. Delta is -0.010490618315088795


Running simulation 15...


Model is not converging.  Current: 13428.56798655874 is not greater than 13428.568863662867. Delta is -0.0008771041266300017


Running simulation 16...


Model is not converging.  Current: 13386.609967656319 is not greater than 13386.663039708545. Delta is -0.05307205222561606
Model is not converging.  Current: 13421.852146701824 is not greater than 13421.994462960345. Delta is -0.1423162585215323
Model is not converging.  Current: 13466.133829042172 is not greater than 13466.20364730947. Delta is -0.06981826729861496
Model is not converging.  Current: 13440.440116597048 is not greater than 13440.457665303096. Delta is -0.017548706047818996
Model is not converging.  Current: 13504.409534598817 is not greater than 13504.422883945816. Delta is -0.013349346998438705
Model is not converging.  Current: 13489.590602488648 is not greater than 13489.611823687817. Delta is -0.021221199169303873
Model is not converging.  Current: 13484.500505571692 is not greater than 13484.546952155819. Delta is -0.046446584126897505
Model is not converging.  Current: 13491.561806120206 is not greater than 13491.580038901171. Delta is -0.018232780965263373
Model

Running simulation 17...


Model is not converging.  Current: 13394.138145633517 is not greater than 13394.138211438629. Delta is -6.580511217180174e-05


Running simulation 18...


Model is not converging.  Current: 13370.169138487212 is not greater than 13370.179664070394. Delta is -0.010525583182243281


Running simulation 19...


Model is not converging.  Current: 13460.029115354886 is not greater than 13460.071400553283. Delta is -0.042285198396712076
Model is not converging.  Current: 14029.146400294378 is not greater than 14029.200452579595. Delta is -0.05405228521703975
Model is not converging.  Current: 14029.165986752825 is not greater than 14029.183608114407. Delta is -0.017621361581404926
Model is not converging.  Current: 14267.894492712867 is not greater than 14267.924576564335. Delta is -0.030083851468589273
Model is not converging.  Current: 13547.851132413787 is not greater than 13547.853400464779. Delta is -0.0022680509919155156
Model is not converging.  Current: 13464.94356690127 is not greater than 13464.960022798152. Delta is -0.016455896882689558
Model is not converging.  Current: 13437.6790216329 is not greater than 13437.6849410678. Delta is -0.00591943490144331


Running simulation 20...


Model is not converging.  Current: 13473.204918454023 is not greater than 13473.206972505383. Delta is -0.0020540513596642995
Model is not converging.  Current: 13440.805976431695 is not greater than 13440.811281520917. Delta is -0.0053050892220198875
Model is not converging.  Current: 13369.590072539348 is not greater than 13369.597748396938. Delta is -0.007675857590584201


Running simulation 21...


Model is not converging.  Current: 13408.275005413312 is not greater than 13408.275534154029. Delta is -0.0005287407166179037


Running simulation 22...


Model is not converging.  Current: 13477.638338012399 is not greater than 13477.692898305653. Delta is -0.0545602932543261
Model is not converging.  Current: 13490.30394504551 is not greater than 13490.48785366415. Delta is -0.18390861863917962
Model is not converging.  Current: 13428.636882648263 is not greater than 13428.638880987106. Delta is -0.001998338842895464
Model is not converging.  Current: 13392.938164590169 is not greater than 13392.947773072821. Delta is -0.009608482652765815
Model is not converging.  Current: 13452.149420906348 is not greater than 13452.156253442077. Delta is -0.0068325357296998845


Running simulation 23...


Model is not converging.  Current: 13438.028018536314 is not greater than 13438.243258998333. Delta is -0.2152404620192101
Model is not converging.  Current: 13558.351905831527 is not greater than 13558.35418775756. Delta is -0.002281926033901982


Running simulation 24...


Model is not converging.  Current: 13382.785678680428 is not greater than 13382.805647562176. Delta is -0.01996888174835476
Model is not converging.  Current: 13486.146854520031 is not greater than 13486.175755882263. Delta is -0.02890136223140871
Model is not converging.  Current: 13425.006320928293 is not greater than 13425.011057698655. Delta is -0.00473677036279696
Model is not converging.  Current: 13451.169187163314 is not greater than 13451.196617909753. Delta is -0.027430746438767528
Model is not converging.  Current: 13400.069631411581 is not greater than 13400.103308893365. Delta is -0.03367748178425245
Model is not converging.  Current: 13490.28296921142 is not greater than 13490.312914608254. Delta is -0.02994539683459152
Model is not converging.  Current: 13391.404207861628 is not greater than 13391.41277504056. Delta is -0.008567178932935349
Model is not converging.  Current: 13364.293263604644 is not greater than 13364.349539818119. Delta is -0.056276213474120595
Model i


==== Wilcoxon Tests (SJM-BL vs. others) ====
1state | SJM-BL vs EW: stat=4.0000, p=8.345e-07
1state | SJM-BL vs IV: stat=4.0000, p=8.345e-07
1state | SJM-BL vs MVO: stat=0.0000, p=1.192e-07
1state | SJM-BL vs HMM-BL-Default: stat=1.0000, p=2.384e-07
1state | SJM-BL vs HMM-BL-KMeans: stat=0.0000, p=1.192e-07
2state | SJM-BL vs EW: stat=101.0000, p=0.1688
2state | SJM-BL vs IV: stat=100.0000, p=0.16
2state | SJM-BL vs MVO: stat=59.0000, p=0.007921
2state | SJM-BL vs HMM-BL-Default: stat=138.0000, p=0.7469
2state | SJM-BL vs HMM-BL-KMeans: stat=123.0000, p=0.4559
3state | SJM-BL vs EW: stat=137.0000, p=0.7257
3state | SJM-BL vs IV: stat=137.0000, p=0.7257
3state | SJM-BL vs MVO: stat=61.0000, p=0.009576
3state | SJM-BL vs HMM-BL-Default: stat=113.0000, p=0.3029
3state | SJM-BL vs HMM-BL-KMeans: stat=77.0000, p=0.03665

Wilcoxon Results Table:
Scenario               Comparison  Statistic      p-value
  1state             SJM-BL vs EW        4.0 8.344650e-07
  1state             SJM-BL vs 