#### 1. Load Packages

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon
from joblib import Parallel, delayed
import multiprocessing

# Hidden Markov Model utilities
from hmmlearn.hmm import GaussianHMM
from sklearn.cluster import KMeans

# PyPortfolioOpt
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns
from pypfopt.black_litterman import BlackLittermanModel

# Sparse Jump Model utilities
from jumpmodels.sparse_jump import SparseJumpModel
from jumpmodels.preprocess import StandardScalerPD, DataClipperStd

#### 2. Data simulation

In [None]:
ASSETS = ["Value", "Growth", "LowVol", "Size", "Momentum", "Quality"]
N_ASSETS = len(ASSETS)

def simulate_1state_data(num_days, seed=None):
    np_rng = np.random.default_rng(seed)
    SIM_MEAN_1STATE = 0.000461
    SIM_SIG_1STATE  = 0.008388
    corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(corr, 1.0)
    cov = np.outer(np.full(N_ASSETS, SIM_SIG_1STATE),
                   np.full(N_ASSETS, SIM_SIG_1STATE)) * corr
    rets = np_rng.multivariate_normal(
        mean=np.full(N_ASSETS, SIM_MEAN_1STATE),
        cov=cov,
        size=num_days
    )
    return pd.DataFrame(rets, columns=ASSETS)

def simulate_2state_data(num_days, seed=None):
    np_rng = np.random.default_rng(seed)
    transmat = np.array([[0.9976, 0.0024],
                         [0.0232, 0.9768]])
    mu_dict  = {0: 0.0006,   1: -0.000881}
    sig_dict = {0: 0.00757, 1: 0.0163}
    base_corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(base_corr, 1.0)

    all_states = np.zeros((num_days, N_ASSETS), dtype=int)
    for i in range(N_ASSETS):
        s = np.zeros(num_days, dtype=int)
        s[0] = np_rng.integers(2)
        for t in range(1, num_days):
            s[t] = np_rng.choice(2, p=transmat[s[t - 1]])
        all_states[:, i] = s

    rets = np.zeros((num_days, N_ASSETS))
    for t in range(num_days):
        mu_vec  = np.zeros(N_ASSETS)
        sig_vec = np.zeros(N_ASSETS)
        for i in range(N_ASSETS):
            curr_state = all_states[t, i]
            mu_vec[i]  = mu_dict[curr_state]
            sig_vec[i] = sig_dict[curr_state]
        cov_t = np.outer(sig_vec, sig_vec) * base_corr
        rets[t] = np_rng.multivariate_normal(mean=mu_vec, cov=cov_t)

    return pd.DataFrame(rets, columns=ASSETS), all_states

def simulate_3state_data(num_days, seed=None):
    np_rng = np.random.default_rng(seed)
    transmat = np.array([
        [0.9989, 0.0004, 0.0007],
        [0.0089, 0.9904, 0.0007],
        [0.0089, 0.0004, 0.9907]
    ])
    mu_list  = [0.0008, 0.0,     -0.003586]
    sig_list = [0.0070, 0.0050,  0.01897]
    base_corr = np.full((N_ASSETS, N_ASSETS), 0.185)
    np.fill_diagonal(base_corr, 1.0)

    all_states = np.zeros((num_days, N_ASSETS), dtype=int)
    for i in range(N_ASSETS):
        s = np.zeros(num_days, dtype=int)
        s[0] = np_rng.integers(3)
        for t in range(1, num_days):
            s[t] = np_rng.choice(3, p=transmat[s[t - 1]])
        all_states[:, i] = s

    rets = np.zeros((num_days, N_ASSETS))
    for t in range(num_days):
        mu_vec  = np.zeros(N_ASSETS)
        sig_vec = np.zeros(N_ASSETS)
        for i in range(N_ASSETS):
            st_i   = all_states[t, i]
            mu_vec[i]  = mu_list[st_i]
            sig_vec[i] = sig_list[st_i]
        cov_t = np.outer(sig_vec, sig_vec) * base_corr
        rets[t] = np_rng.multivariate_normal(mean=mu_vec, cov=cov_t)

    return pd.DataFrame(rets, columns=ASSETS), all_states

#### 3. Training Regime Models

#### 3.1 Hidden Markov Model

In [None]:
def run_mle(observations, n_components=2, init_type='default', seed=None):
    model = GaussianHMM(
        n_components=n_components,
        covariance_type='diag',
        n_iter=100,
        random_state=seed
    )

    if init_type == 'default':
        model.startprob_ = np.array([1.0, 0.0])
        model.transmat_  = np.array([[0.9, 0.1],
                                     [0.1, 0.9]])
        model.means_  = np.zeros((n_components, observations.shape[1]))
        model.covars_ = np.full((n_components, observations.shape[1]), 1e-10)
        model.init_params = ''
    elif init_type == 'kmeans':
        km = KMeans(n_clusters=n_components, n_init=10, random_state=seed)
        labels = km.fit_predict(observations)
        means, covars = [], []
        for i in range(n_components):
            obs_i = observations[labels == i]
            means.append(np.mean(obs_i, axis=0))
            covars.append(np.var(obs_i, axis=0) + 1e-10)
        model.startprob_ = np.ones(n_components) / n_components
        model.transmat_  = np.ones((n_components, n_components)) / n_components
        model.means_     = np.array(means)
        model.covars_    = np.array(covars)
        model.init_params = ''

    model.fit(observations)
    pred_states = model.predict(observations)
    return model, pred_states

def run_mle_default(observations, seed=None):
    return run_mle(observations, init_type='default', seed=seed)

def run_mle_kmeans(observations, seed=None):
    return run_mle(observations, init_type='kmeans', seed=seed)

def train_hmm_single_asset_default(series, n_components=2, random_state=42):
    X = series.values.reshape(-1, 1)
    model, _ = run_mle_default(X, seed=random_state)
    return model

def train_hmm_single_asset_kmeans(series, n_components=2, random_state=42):
    X = series.values.reshape(-1, 1)
    model, _ = run_mle_kmeans(X, seed=random_state)
    return model

#### 3.2 Feature selection and SJM training

In [None]:
def compute_sjm_features(factor_ser: pd.Series) -> pd.DataFrame:
    factor_price = 100.0 * (1.0 + factor_ser).cumprod()

    def ewma_return(returns, halflife):
        return returns.ewm(halflife=halflife).mean()

    def compute_rsi(price, window):
        delta = price.diff()
        gain  = delta.clip(lower=0)
        loss  = -delta.clip(upper=0)
        avg_gain = gain.rolling(window).mean()
        avg_loss = loss.rolling(window).mean()
        rs = avg_gain / avg_loss.replace(0, np.nan)
        return 100.0 - (100.0 / (1.0 + rs))

    def compute_stoch(price, window):
        rolling_min = price.rolling(window).min()
        rolling_max = price.rolling(window).max()
        return 100.0 * (price - rolling_min) / (rolling_max - rolling_min)

    def compute_macd(price, fast, slow):
        ema_fast = price.ewm(halflife=fast).mean()
        ema_slow = price.ewm(halflife=slow).mean()
        return ema_fast - ema_slow

    def compute_downside_dev_log(returns, window):
        def _downside(subarray):
            negatives = np.where(subarray < 0, subarray, 0.0)
            return np.sqrt((negatives**2).mean())
        dd = returns.rolling(window).apply(_downside, raw=True)
        return np.log(dd.replace(0, np.nan))

    feats = {}
    for hl in [8, 21, 63]:
        feats[f"FactorRet_EWMA_{hl}"] = ewma_return(factor_ser, hl)
    for w in [8, 21, 63]:
        feats[f"RSI_{w}"] = compute_rsi(factor_price, w)
    for w in [8, 21, 63]:
        feats[f"Stoch%K_{w}"] = compute_stoch(factor_price, w)
    feats["MACD_8_21"]   = compute_macd(factor_price, 8, 21)
    feats["MACD_21_63"]  = compute_macd(factor_price, 21, 63)
    feats["DownsideDev_log_21"] = compute_downside_dev_log(factor_ser, 21)
    
    return pd.DataFrame(feats)

In [None]:
def train_sjm_single_asset(series, n_components=2, max_feats=12, lam=50, random_state=42):
    feats_df = compute_sjm_features(series)
    feats_df = feats_df.replace([np.inf, -np.inf], np.nan).fillna(0.0)
    clipper = DataClipperStd(mul=3.0)
    scaler  = StandardScalerPD()
    X_clipped = clipper.fit_transform(feats_df)
    X_scaled  = scaler.fit_transform(X_clipped)
    X_arr = X_scaled.values

    sjm = SparseJumpModel(
        n_components=n_components,
        max_feats=max_feats,
        jump_penalty=lam,
        cont=False,
        max_iter=20,
        random_state=random_state
    )
    sjm.fit(X_arr)
    return sjm, clipper, scaler

#### 4. Allocation simulation

#### 4.1 Backtest a static portfolio with single allocaiton

In [None]:
def backtest_portfolio(returns, weights, transaction_cost=0.0007):
    T = len(returns)
    portfolio_vals = np.zeros(T)
    cost_init = np.sum(np.abs(weights)) * transaction_cost
    portfolio_vals[0] = 1.0 - cost_init
    for t in range(T - 1):
        ret_t = returns.iloc[t].values
        portfolio_vals[t + 1] = portfolio_vals[t] * (1.0 + np.dot(weights, ret_t))
    return portfolio_vals

#### 5.0 Performance Metric

In [None]:
def compute_performance_metrics(portfolio_vals, weight_history=None, annual_factor=250):
    pv = np.asarray(portfolio_vals)
    rets = np.diff(pv) / pv[:-1]
    ann_ret = rets.mean() * annual_factor
    cum_ret = pv[-1]/pv[0] - 1
    ann_vol = rets.std() * np.sqrt(annual_factor)

    negative_rets = rets[rets < 0]
    ddev = (negative_rets.std() * np.sqrt(annual_factor)) if len(negative_rets) > 0 else 0.0
    max_dd = (pv / np.maximum.accumulate(pv) - 1).min()
    sharpe = ann_ret / (ann_vol + 1e-12)
    sortino = ann_ret / ddev if ddev > 1e-12 else np.nan
    calmar  = ann_ret / abs(max_dd) if max_dd < 0 else np.nan

    if weight_history is not None and len(weight_history) > 1:
        turnovers = []
        for t in range(1, len(weight_history)):
            turnovers.append(np.sum(np.abs(weight_history[t] - weight_history[t-1])))
        avg_turnover = np.mean(turnovers)
    else:
        avg_turnover = 0.0

    return {
        "Annualized Return": ann_ret,
        "Cumulative Return": cum_ret,
        "Volatility": ann_vol,
        "Downside Deviation": ddev,
        "Max Drawdown": max_dd,
        "Sharpe Ratio": sharpe,
        "Sortino Ratio": sortino,
        "Calmar Ratio": calmar,
        "Turnover Rate": avg_turnover,
    }

#### 6. Helper Function: get per-regime means & std

In [None]:
def get_regime_means_stds_single_asset(asset_series, regime_assignments):
    unique_states = np.unique(regime_assignments)
    regime_means = {}
    regime_stds  = {}
    for s in unique_states:
        data_in_s = asset_series[regime_assignments == s]
        if len(data_in_s) > 0:
            regime_means[s] = data_in_s.mean()
            regime_stds[s]  = data_in_s.std()
        else:
            regime_means[s] = asset_series.mean()
            regime_stds[s]  = asset_series.std()
    return regime_means, regime_stds

#### 7. Equal Unconditional Prior

In [None]:
def build_equal_unconditional_prior(df_train):
    SIM_MEAN_1STATE = 0.000461
    SIM_SIG_1STATE  = 0.008388
    TRUE_CORR       = 0.185
    n_assets = df_train.shape[1]
    uniform_corr = np.full((n_assets, n_assets), TRUE_CORR)
    np.fill_diagonal(uniform_corr, 1.0)
    cov_flat = (SIM_SIG_1STATE**2) * uniform_corr
    assets = df_train.columns
    pi_series = pd.Series(np.full(n_assets, SIM_MEAN_1STATE), index=assets)
    cov_df = pd.DataFrame(cov_flat, index=assets, columns=assets)
    return pi_series, cov_df

#### 8.0 Regime-Based BL with the 'Equal Unconditional Prior'

In [None]:
def regime_based_bl_backtest_flatprior(
    df_test,
    states_test,
    regime_means_list,
    regime_stds_list,
    flat_pi,
    flat_cov,
    train_means_per_asset,
    train_stds_per_asset,
    transaction_cost=0.0007,
    risk_free_rate=0.02/252,
    bl_tau=0.05,
    fallback_bear_mean=-0.0005,
    fallback_bear_std=0.015
):
    T_test = len(df_test)
    n_assets = len(df_test.columns)
    assets = df_test.columns
    diag_std = np.sqrt(np.diag(flat_cov))
    corr_matrix = flat_cov / (np.outer(diag_std, diag_std) + 1e-12)
    portfolio_vals = np.zeros(T_test)
    portfolio_vals[0] = 1.0
    weight_history = np.zeros((T_test, n_assets))
    w_prev = np.ones(n_assets) / n_assets
    weight_history[0] = w_prev

    for t in range(1, T_test):
        # Identify regime from day (t-1)
        view_vector = np.zeros(n_assets)
        for i in range(n_assets):
            current_regime = states_test[t - 1, i]
            if current_regime in regime_means_list[i]:
                mean_i = regime_means_list[i][current_regime]
            else:
                mean_i = fallback_bear_mean
            view_vector[i] = mean_i

        # Build daily covariance from correlation + regime-based volatilities
        day_vols = np.zeros(n_assets)
        for i in range(n_assets):
            current_regime = states_test[t - 1, i]
            if current_regime in regime_stds_list[i]:
                day_vols[i] = regime_stds_list[i][current_regime]
            else:
                day_vols[i] = fallback_bear_std
        daily_cov = corr_matrix * np.outer(day_vols, day_vols)

        # Build Black-Litterman model
        bl = BlackLittermanModel(
            cov_matrix    = daily_cov,
            pi            = flat_pi,
            absolute_views= dict(zip(assets, view_vector)),
            tau           = bl_tau,
            risk_aversion = 1.0
        )
        bl_rets = bl.bl_returns()
        bl_cov  = bl.bl_cov()
        ef = EfficientFrontier(bl_rets, bl_cov, weight_bounds=(0, 1), solver="SCS")
        try:
            w_dict = ef.max_sharpe(risk_free_rate=risk_free_rate)
        except ValueError:
            w_dict = ef.min_volatility()
        w_array = np.array([w_dict[a] for a in assets])

        # Transaction costs & portfolio update
        ret_t_minus_1 = df_test.iloc[t - 1].values
        gross_growth  = portfolio_vals[t - 1] * (1.0 + np.dot(w_prev, ret_t_minus_1))
        traded_fraction = np.sum(np.abs(w_array - w_prev))
        cost = gross_growth * traded_fraction * transaction_cost

        portfolio_vals[t] = gross_growth - cost
        weight_history[t] = w_array
        w_prev = w_array

    return portfolio_vals, weight_history

#### 9. Wrapper to run all strategies

In [None]:
def equal_weight_allocation(n_assets):
    return np.ones(n_assets)/n_assets

def inverse_vol_weights(returns):
    stds = returns.std(axis=0).values + 1e-12
    w = 1.0/stds
    return w / w.sum()

def static_mvo_allocation(returns, risk_free_rate=0.02/252):
    mu = expected_returns.mean_historical_return(returns, frequency=250)
    raw_cov = risk_models.sample_cov(returns)
    ridge_lambda = 1e-5
    cov = raw_cov + np.eye(len(raw_cov)) * ridge_lambda
    ef = EfficientFrontier(mu, cov, weight_bounds=(0, 1), solver="SCS")
    try:
        ef_weights = ef.max_sharpe(risk_free_rate=risk_free_rate)
    except ValueError:
        ef_weights = ef.min_volatility()
    return ef.clean_weights()

In [None]:
def run_allocation_with_pvs(
    df,
    lam_sjm=50,
    risk_free_rate=0.02/252,
    transaction_cost=0.0007,
    bl_tau=0.05
):
    split_idx = int(len(df) * 0.8)
    df_train = df.iloc[:split_idx]
    df_test  = df.iloc[split_idx:]

    # Train per-asset models
    hmm_models_default = []
    hmm_models_kmeans  = []
    sjm_models         = []
    sjm_clippers       = []
    sjm_scalers        = []
    hmm_states_default_train = np.zeros((split_idx, N_ASSETS), dtype=int)
    hmm_states_kmeans_train  = np.zeros((split_idx, N_ASSETS), dtype=int)
    sjm_states_train         = np.zeros((split_idx, N_ASSETS), dtype=int)

    for i, asset in enumerate(ASSETS):
        series_train = df_train[asset]

        # HMM default
        hmm_d = train_hmm_single_asset_default(series_train)
        st_def = hmm_d.predict(series_train.values.reshape(-1, 1))
        hmm_models_default.append(hmm_d)
        hmm_states_default_train[:, i] = st_def

        # HMM kmeans
        hmm_k = train_hmm_single_asset_kmeans(series_train)
        st_km = hmm_k.predict(series_train.values.reshape(-1, 1))
        hmm_models_kmeans.append(hmm_k)
        hmm_states_kmeans_train[:, i] = st_km

        # SJM
        sjm_mod, sjm_clip, sjm_scale = train_sjm_single_asset(
            series_train, n_components=2, max_feats=12, lam=lam_sjm
        )
        feats_train = compute_sjm_features(series_train).replace([np.inf, -np.inf], np.nan).fillna(0.0)
        X_train_clip = sjm_clip.transform(feats_train)
        X_train_scl  = sjm_scale.transform(X_train_clip)
        st_sjm = sjm_mod.predict(X_train_scl)

        sjm_models.append(sjm_mod)
        sjm_clippers.append(sjm_clip)
        sjm_scalers.append(sjm_scale)
        sjm_states_train[:, i] = st_sjm

    # In-sample regime means & stds
    hmm_regime_means_default = []
    hmm_regime_stds_default  = []
    hmm_regime_means_kmeans  = []
    hmm_regime_stds_kmeans   = []
    sjm_regime_means         = []
    sjm_regime_stds          = []

    train_means_per_asset = []
    train_stds_per_asset  = []

    for i in range(N_ASSETS):
        asset_train = df_train.iloc[:, i]
        train_means_per_asset.append(asset_train.mean())
        train_stds_per_asset.append(asset_train.std())

        # HMM default
        m_def, s_def = get_regime_means_stds_single_asset(asset_train, hmm_states_default_train[:, i])
        hmm_regime_means_default.append(m_def)
        hmm_regime_stds_default.append(s_def)

        # HMM kmeans
        m_km, s_km = get_regime_means_stds_single_asset(asset_train, hmm_states_kmeans_train[:, i])
        hmm_regime_means_kmeans.append(m_km)
        hmm_regime_stds_kmeans.append(s_km)

        # SJM
        m_sjm, s_sjm = get_regime_means_stds_single_asset(asset_train, sjm_states_train[:, i])
        sjm_regime_means.append(m_sjm)
        sjm_regime_stds.append(s_sjm)

    # Predict states on test day-by-day
    T_test = len(df_test)
    hmm_states_default_test = np.zeros((T_test, N_ASSETS), dtype=int)
    hmm_states_kmeans_test  = np.zeros((T_test, N_ASSETS), dtype=int)
    sjm_states_test         = np.zeros((T_test, N_ASSETS), dtype=int)

    for i, asset in enumerate(ASSETS):
        full_series = pd.concat([df_train[asset], df_test[asset]], axis=0).reset_index(drop=True)

        # HMM default
        for t in range(T_test):
            end_idx = min(split_idx + t + 1, len(full_series))
            partial_data = full_series.iloc[:end_idx].values.reshape(-1, 1)
            partial_states = hmm_models_default[i].predict(partial_data)
            hmm_states_default_test[t, i] = partial_states[-1]

        # HMM kmeans
        for t in range(T_test):
            end_idx = min(split_idx + t + 1, len(full_series))
            partial_data = full_series.iloc[:end_idx].values.reshape(-1, 1)
            partial_states = hmm_models_kmeans[i].predict(partial_data)
            hmm_states_kmeans_test[t, i] = partial_states[-1]

        # SJM
        feats_full = compute_sjm_features(full_series).replace([np.inf, -np.inf], np.nan).fillna(0.0)
        X_full_clip = sjm_clippers[i].transform(feats_full)
        X_full_scl  = sjm_scalers[i].transform(X_full_clip)
        for t in range(T_test):
            end_idx = min(split_idx + t + 1, len(full_series))
            partial_X = X_full_scl[:end_idx]
            partial_states = sjm_models[i].predict(partial_X)
            if isinstance(partial_states, pd.Series):
                sjm_states_test[t, i] = partial_states.iloc[-1]
            else:
                sjm_states_test[t, i] = partial_states[-1]

    # Build flat unconditional prior
    flat_pi, flat_cov = build_equal_unconditional_prior(df_train)

    # -- Evaluate Strategies and store their time series
    pvs = {}
    weights = {}

    # (A) Equal Weight
    w_ew = equal_weight_allocation(N_ASSETS)
    pv_ew = backtest_portfolio(df_test, w_ew, transaction_cost=transaction_cost)
    w_hist_ew = np.tile(w_ew, (T_test, 1))
    pvs["EW"] = pv_ew
    weights["EW"] = w_hist_ew

    # (B) Inverse Vol
    w_iv = inverse_vol_weights(df_test)
    pv_iv = backtest_portfolio(df_test, w_iv, transaction_cost=transaction_cost)
    w_hist_iv = np.tile(w_iv, (T_test, 1))
    pvs["IV"] = pv_iv
    weights["IV"] = w_hist_iv

    # (C) Static MVO
    w_mvo_dict = static_mvo_allocation(df_train, risk_free_rate=risk_free_rate)
    w_mvo_arr = np.array([w_mvo_dict[a] for a in df_train.columns])
    pv_mvo = backtest_portfolio(df_test, w_mvo_arr, transaction_cost=transaction_cost)
    w_hist_mvo = np.tile(w_mvo_arr, (T_test, 1))
    pvs["MVO"] = pv_mvo
    weights["MVO"] = w_hist_mvo

    # (D) HMM-BL-Default
    pv_hmmbl_def, w_hmmbl_def = regime_based_bl_backtest_flatprior(
        df_test, hmm_states_default_test, hmm_regime_means_default, hmm_regime_stds_default,
        flat_pi, flat_cov, train_means_per_asset, train_stds_per_asset,
        transaction_cost, risk_free_rate, bl_tau
    )
    pvs["HMM-BL-Default"] = pv_hmmbl_def
    weights["HMM-BL-Default"] = w_hmmbl_def

    # (E) HMM-BL-KMeans
    pv_hmmbl_km, w_hmmbl_km = regime_based_bl_backtest_flatprior(
        df_test, hmm_states_kmeans_test, hmm_regime_means_kmeans, hmm_regime_stds_kmeans,
        flat_pi, flat_cov, train_means_per_asset, train_stds_per_asset,
        transaction_cost, risk_free_rate, bl_tau
    )
    pvs["HMM-BL-KMeans"] = pv_hmmbl_km
    weights["HMM-BL-KMeans"] = w_hmmbl_km

    # (F) SJM-BL
    pv_sjmbl, w_sjmbl = regime_based_bl_backtest_flatprior(
        df_test, sjm_states_test, sjm_regime_means, sjm_regime_stds,
        flat_pi, flat_cov, train_means_per_asset, train_stds_per_asset,
        transaction_cost, risk_free_rate, bl_tau
    )
    pvs["SJM-BL"] = pv_sjmbl
    weights["SJM-BL"] = w_sjmbl

    # Compute performance
    perf = {}
    for strat in pvs:
        perf[strat] = compute_performance_metrics(pvs[strat], weights[strat])

    # Return dictionary with performance + time series for plotting
    return {
        "perf": perf,
        "pvs": pvs,
        "data": {
            "df_test": df_test,
            "weights": weights
        },
    }

#### 10. FUll scenario 1-state, 2-state, 3-state

In [None]:
def run_scenario_123_one_run(
    T_sim=1000,
    lam_sjm=50,
    risk_free_rate=0.02/252,
    transaction_cost=0.0007,
    bl_tau=0.05,
    seed1=None,
    seed2=None,
    seed3=None
):
    # 1-state
    df1_full = simulate_1state_data(T_sim, seed=seed1)
    res1 = run_allocation_with_pvs(
        df1_full,
        lam_sjm=lam_sjm,
        risk_free_rate=risk_free_rate,
        transaction_cost=transaction_cost,
        bl_tau=bl_tau
    )

    # 2-state
    df2_full, _ = simulate_2state_data(T_sim, seed=seed2)
    res2 = run_allocation_with_pvs(
        df2_full,
        lam_sjm=lam_sjm,
        risk_free_rate=risk_free_rate,
        transaction_cost=transaction_cost,
        bl_tau=bl_tau
    )

    # 3-state
    df3_full, _ = simulate_3state_data(T_sim, seed=seed3)
    res3 = run_allocation_with_pvs(
        df3_full,
        lam_sjm=lam_sjm,
        risk_free_rate=risk_free_rate,
        transaction_cost=transaction_cost,
        bl_tau=bl_tau
    )

    # Combine them into one dictionary for easy reference
    sim_results = {
        "1state": res1,
        "2state": res2,
        "3state": res3
    }
    return sim_results

#### Main Exectution

In [None]:
if __name__ == "__main__":
    config = {
        "T_sim": 1000,
        "risk_free_rate": 0.02 / 252,
        "transaction_cost": 0.0005,
        "bl_tau": 0.05,
        "lam_sjm": 50,
    }

    # Run exactly ONE simulation instead of multiple
    sim_results = run_scenario_123_one_run(
        T_sim=config["T_sim"],
        lam_sjm=config["lam_sjm"],
        risk_free_rate=config["risk_free_rate"],
        transaction_cost=config["transaction_cost"],
        bl_tau=config["bl_tau"],
        seed1=12345,  # Example seed for 1-state
        seed2=23456,  # Example seed for 2-state
        seed3=34567   # Example seed for 3-state
    )

    # Example: Print the performance metrics from single run
    print("=== Performance Results ===")
    for scenario in sim_results:
        print(f"\nScenario: {scenario}")
        for strategy_name, metrics_dict in sim_results[scenario]["perf"].items():
            print(f"  {strategy_name}: {metrics_dict}")

detected 8 cores

==== Wilcoxon Tests (SJM-BL vs. others) ====
1state | SJM-BL vs EW: stat=3.0000, p=0.0001526
1state | SJM-BL vs IV: stat=3.0000, p=0.0001526
1state | SJM-BL vs MVO: stat=64.0000, p=0.8603
1state | SJM-BL vs HMM-BL-Default: stat=4.0000, p=0.0002136
1state | SJM-BL vs HMM-BL-KMeans: stat=0.0000, p=3.052e-05
2state | SJM-BL vs EW: stat=21.0000, p=0.01309
2state | SJM-BL vs IV: stat=18.0000, p=0.007629
2state | SJM-BL vs MVO: stat=41.0000, p=0.1754
2state | SJM-BL vs HMM-BL-Default: stat=0.0000, p=3.052e-05
2state | SJM-BL vs HMM-BL-KMeans: stat=3.0000, p=0.0001526
3state | SJM-BL vs EW: stat=10.0000, p=0.001312
3state | SJM-BL vs IV: stat=20.0000, p=0.01099
3state | SJM-BL vs MVO: stat=7.0000, p=0.0005798
3state | SJM-BL vs HMM-BL-Default: stat=16.0000, p=0.005157
3state | SJM-BL vs HMM-BL-KMeans: stat=55.0000, p=0.5282

Wilcoxon Results Table:
Scenario               Comparison  Statistic  p-value
  1state             SJM-BL vs EW        3.0 0.000153
  1state            

In [None]:
    for scenario_key in ["1state", "2state", "3state"]:
        pvs_dict = sim_results[scenario_key]["pvs"]  # dict of {strategy -> array of portfolio values}
        plt.figure(figsize=(10, 6))
        for strat, vals in pvs_dict.items():
            plt.plot(vals, label=strat)
        plt.title(f"{scenario_key.upper()} - OOS Portfolio Values")
        plt.xlabel("Time")
        plt.ylabel("Portfolio Value")
        plt.legend()
        plt.tight_layout()
        plt.show()

In [None]:
   scenarios = ["1state", "2state", "3state"]
    for scenario in scenarios:
        scenario_data = sim_results[scenario]["data"]
        weights_dict = scenario_data["weights"]  # dictionary: strategy -> weight array (T_test x N_ASSETS)
        T_test = len(scenario_data["df_test"])
        for strategy, w_hist in weights_dict.items():
            plt.figure(figsize=(9, 5))
            x_vals = np.arange(T_test)
            plt.stackplot(x_vals, *w_hist.T, labels=ASSETS)
            plt.title(f"{scenario.upper()} - {strategy} Weights Over Time")
            plt.xlabel("Days (Test Period)")
            plt.ylabel("Portfolio Weight")
            plt.legend(loc='upper left')
            plt.tight_layout()
            plt.show()