# HMM vs kmeans vs Jump vs Sparse Jump Simulation study (Gaussian)

This script illustrates how to replicate a simulation study using the "temporal features" approach from Nystrup et al. (2020). We simulate data from a 2-state Gaussian HMM (univariate), then transform it into features, and finally apply:
1) HMM (from hmmlearn)
2) Jump Model
3) Sparse Jump Model

We'll demonstrate how to tune hyperparameters (penalty parameters, etc.) for the jump-based models.
In a real scenario, you may want more robust approaches (e.g., cross-validation).


### 1.0 Loading Packages

In [15]:
#Load Packages
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM  # Import GaussianHMM
from sklearn.metrics import balanced_accuracy_score, confusion_matrix
from scipy.optimize import linear_sum_assignment
from jumpmodels.sparse_jump import SparseJumpModel
from jumpmodels.jump import JumpModel
from scipy import stats
from joblib import Parallel, delayed
import multiprocessing
from scipy.stats import wilcoxon
from jumpmodels.preprocess import StandardScalerPD, DataClipperStd


### 2. Data Simulation & Utility Functions

We simulate a univariate 2-state Gaussian HMM. We then define functions for:
  - Aligning predicted labels with true labels (Hungarian algorithm).
  - Computing balanced accuracy.

In [16]:
def simulate_data(T, random_state=None):
    """
    Simulate data from a 2-state Gaussian HMM (univariate).
    This version uses the parameters from Nystrup et al. (2020b).
    """
    rng = np.random.default_rng(random_state)

    # True parameters (Nystrup et al. 2020b)
    mu1, mu2 = 0.000615, -0.000785
    sigma1, sigma2 = 0.007759, 0.02140
    transmat = np.array([[0.9979, 0.0021],
                         [0.0120, 0.9880]])

    # Compute stationary distribution
    eigvals, eigvecs = np.linalg.eig(transmat.T)
    stat = np.real(eigvecs[:, np.isclose(eigvals, 1)])
    stat = stat[:, 0] / np.sum(stat[:, 0])

    # Generate state sequence
    states = np.zeros(T, dtype=int)
    states[0] = rng.choice([0, 1], p=stat)
    for t in range(1, T):
        states[t] = rng.choice([0, 1], p=transmat[states[t - 1]])

    # Generate observations from state-dependent Gaussians
    y = np.zeros(T)
    for t in range(T):
        if states[t] == 0:
            y[t] = rng.normal(mu1, sigma1)
        else:
            y[t] = rng.normal(mu2, sigma2)

    return y, states

### 2.1 Alligning Predicted labels using Hungarian Algorithm

In [17]:
def align_labels(true_labels, pred_labels):
    """
    Align predicted labels with true labels using the Hungarian algorithm.
    """
    cm = confusion_matrix(true_labels, pred_labels)
    row_ind, col_ind = linear_sum_assignment(-cm)
    mapping = {col: row for row, col in zip(col_ind, row_ind)}
    aligned = np.array([mapping[x] for x in pred_labels])
    return aligned

### 2.2 Function for calculating the BAC

In [18]:
def calculate_bac(true_states, pred_states):
    """
    Compute Balanced Accuracy (BAC) after aligning predicted state labels.
    """
    aligned_pred = align_labels(true_states, pred_states)
    return balanced_accuracy_score(true_states, aligned_pred)

### 2.3 Per state Accuracy function

In [19]:

def compute_per_state_accuracy(true_states, pred_states):
    """
    Returns (acc1, acc2, BAC) where:
      acc1 = fraction of true-state=0 samples predicted as 0,
      acc2 = fraction of true-state=1 samples predicted as 1,
      BAC  = 0.5*(acc1+acc2)
    """
    aligned_pred = align_labels(true_states, pred_states)
    cm = confusion_matrix(true_states, aligned_pred, labels=[0, 1])
    denom_0 = cm[0, :].sum()
    denom_1 = cm[1, :].sum()
    acc1 = cm[0, 0] / denom_0 if denom_0 > 0 else 0
    acc2 = cm[1, 1] / denom_1 if denom_1 > 0 else 0
    bac = 0.5 * (acc1 + acc2)
    return acc1, acc2, bac


def compute_state_statistics(y, true_states, pred_states):
    """
    Compute the mean return and volatility (std) of the raw observations y for each state.
    The predicted labels are first aligned with the true labels.
    Returns a dictionary with keys:
      'state0_mean', 'state0_vol', 'state1_mean', 'state1_vol'
    """
    aligned = align_labels(true_states, pred_states)
    stats_dict = {}
    for state in [0, 1]:
        idx = np.where(aligned == state)[0]
        if len(idx) > 0:
            stats_dict[f"state{state}_mean"] = np.mean(y[idx])
            stats_dict[f"state{state}_vol"] = np.std(y[idx])
        else:
            stats_dict[f"state{state}_mean"] = np.nan
            stats_dict[f"state{state}_vol"] = np.nan
    return stats_dict

## 3.0 Model Wrappers

We'll define functions to:
  - Fit a standard HMM (hmmlearn)
  - Fit Jump Model penalty
  - Fit Sparse Jump Model penalties

### 3.1 Fit a GaussianHMM with two different initilizations

In [20]:
def run_mle(observations, n_components=2, init_type='default'):
    """
    Fit a GaussianHMM to the raw observations.
    Returns (model, states_est).
    """
    model = GaussianHMM(n_components=n_components, covariance_type="diag", n_iter=100, random_state=42)
    
    if init_type == 'default':
        model.startprob_ = np.array([1.0, 0.0])
        model.transmat_ = np.array([[0.9, 0.1],
                                    [0.1, 0.9]])
        model.means_ = np.array([[0.0], [0.0]])
        model.covars_ = np.array([[0.01], [0.01]])
        model.init_params = ''
    elif init_type == 'kmeans':
        from sklearn.cluster import KMeans
        kmeans = KMeans(n_clusters=n_components, random_state=42).fit(observations.reshape(-1, 1))
        labels = kmeans.labels_
        means = []
        covars = []
        for i in range(n_components):
            obs_i = observations[labels == i]
            means.append(np.mean(obs_i))
            var_ = np.var(obs_i) if len(obs_i) > 0 else 0.01
            covars.append(var_)
        model.startprob_ = np.ones(n_components) / n_components
        model.transmat_ = np.ones((n_components, n_components)) / n_components
        model.means_ = np.array(means).reshape(-1, 1)
        model.covars_ = np.array(covars).reshape(-1, 1)
        model.init_params = 'tmc'
    
    model.fit(observations.reshape(-1, 1))
    _, states_est = model.decode(observations.reshape(-1, 1))
    return model, states_est

### 3.2 Fitting a jump model with fixed hyperparameters

In [21]:
def run_jump_model(Z, n_components=2, lambda_=100.0, random_state=None):
    """
    Fit a JumpModel to standardized feature matrix Z.
    Returns predicted state labels.
    """
    jm = JumpModel(
        n_components=n_components,
        jump_penalty=lambda_,
        cont=False,
        max_iter=10,
        random_state=random_state
    )
    jm.fit(Z)
    return jm.labels_

### 3.3 Fitting a sparse jump model with fixed hyperparameters

In [22]:
def run_sparse_jump_model(Z, n_components=2, lambda_=10.0, max_feats=10, random_state=None):
    """
    Fit a SparseJumpModel to standardized feature matrix Z.
    Returns predicted state labels.
    """
    sjm = SparseJumpModel(
        n_components=n_components,
        jump_penalty=lambda_,
        cont=False,
        max_feats=max_feats,
        max_iter=10,
        random_state=random_state
    )
    sjm.fit(Z)
    return sjm.labels_

### 3.4 Features from Algorithm 2

Given a univariate time series `y`, the function will return the following features:

1. **Observation:** \( y[t] \)  
2. **Left absolute change:** \(\left| y[t] - y[t-1] \right|\)  
3. **Right absolute change:** \(\left| y[t+1] - y[t] \right|\)  
4. **Centered local mean:** \(\text{mean}(y[t-(l-1)/2 : t+(l-1)/2])\)  
5. **Centered local std**  
6. **Left local mean**  
7. **Left local std**  
8. **Right local mean**  
9. **Right local std**

In [23]:
def compute_temporal_features(y, l):
    """
    Compute the 9 features from Algorithm 2 for window length l.
    Returns an array of shape (T, 9).
    """
    T = len(y)
    feats = np.zeros((T, 9), dtype=float)
    half = (l - 1) // 2
    for t in range(T):
        feats[t, 0] = y[t]
        feats[t, 1] = abs(y[t] - y[t-1]) if t > 0 else 0.
        feats[t, 2] = abs(y[t+1] - y[t]) if t < (T-1) else 0.
        left_c = max(0, t - half)
        right_c = min(T, t + half + 1)
        window_c = y[left_c:right_c]
        feats[t, 3] = np.mean(window_c)
        feats[t, 4] = np.std(window_c)
        left_l = max(0, t - l)
        right_l = t
        window_l = y[left_l:right_l]
        feats[t, 5] = np.mean(window_l) if len(window_l) > 0 else 0.
        feats[t, 6] = np.std(window_l) if len(window_l) > 0 else 0.
        left_r = t
        right_r = min(T, t + l)
        window_r = y[left_r:right_r]
        feats[t, 7] = np.mean(window_r) if len(window_r) > 0 else 0.
        feats[t, 8] = np.std(window_r) if len(window_r) > 0 else 0.
    return feats


def combine_features(y, l_list=[5, 13]):
    """
    For each window length in l_list, compute the 9 features and horizontally stack them.
    Returns an array of shape (T, 9 * len(l_list)).
    """
    feat_list = []
    for l in l_list:
        feat_list.append(compute_temporal_features(y, l))
    return np.hstack(feat_list)

### 4.0 Main execution

### 4.1 Workhorse funciton

In [24]:
def run_one_simulation(seed, T=250, lambda_jump=30.0, lambda_sjump=32.0, max_feats=1):
    """
    Single simulation run:
      1) Simulate data (y, true_states)
      2) Build & standardize features Z for jump-based models
      3) Fit HMM (MLE default init) on raw y, extract parameters, compute per-state stats and BAC
      4) Fit HMM (MLE k-means init) on raw y, extract parameters, compute per-state stats and BAC
      5) Fit JumpModel on standardized features and compute BAC & state stats
      6) Fit SparseJumpModel on standardized features and compute BAC & state stats
      7) Return a dictionary with all metrics.
    """
    # 1) Simulate data
    y, true_states = simulate_data(T, random_state=seed)

    # 2) Compute features and standardize for jump-based models
    Z = combine_features(y, l_list=[5, 13])
    Z_df = pd.DataFrame(Z)
    clipper = DataClipperStd(mul=3.0)
    scaler = StandardScalerPD()
    Z_clipped = clipper.fit_transform(Z_df)
    Z_scaled = scaler.fit_transform(Z_clipped)
    Z_scaled_arr = Z_scaled.values

    # --- HMM (MLE default init) on raw y ---
    model_mle, states_est_mle = run_mle(y, n_components=2, init_type='default')
    acc1_mle, acc2_mle, bac_mle = compute_per_state_accuracy(true_states, states_est_mle)
    state_stats_mle = compute_state_statistics(y, true_states, states_est_mle)
    mu1_mle = float(model_mle.means_[0, 0])
    mu2_mle = float(model_mle.means_[1, 0])
    sigma1_mle = float(np.sqrt(model_mle.covars_[0, 0]))
    sigma2_mle = float(np.sqrt(model_mle.covars_[1, 0]))
    gamma12_mle = float(model_mle.transmat_[0, 1])
    gamma21_mle = float(model_mle.transmat_[1, 0])

    # --- HMM (MLE k-means init) on raw y ---
    model_mle_k, states_est_mle_k = run_mle(y, n_components=2, init_type='kmeans')
    acc1_mlek, acc2_mlek, bac_mlek = compute_per_state_accuracy(true_states, states_est_mle_k)
    state_stats_mlek = compute_state_statistics(y, true_states, states_est_mle_k)
    mu1_mlek = float(model_mle_k.means_[0, 0])
    mu2_mlek = float(model_mle_k.means_[1, 0])
    sigma1_mlek = float(np.sqrt(model_mle_k.covars_[0, 0]))
    sigma2_mlek = float(np.sqrt(model_mle_k.covars_[1, 0]))
    gamma12_mlek = float(model_mle_k.transmat_[0, 1])
    gamma21_mlek = float(model_mle_k.transmat_[1, 0])

    # --- JumpModel on standardized features ---
    pred_jump = run_jump_model(Z_scaled_arr, n_components=2, lambda_=lambda_jump, random_state=seed)
    acc1_jump, acc2_jump, bac_jump = compute_per_state_accuracy(true_states, pred_jump)
    state_stats_jump = compute_state_statistics(y, true_states, pred_jump)

    # --- SparseJumpModel on standardized features ---
    pred_sjump = run_sparse_jump_model(Z_scaled_arr, n_components=2, lambda_=lambda_sjump,
                                       max_feats=max_feats, random_state=seed)
    acc1_sjump, acc2_sjump, bac_sjump = compute_per_state_accuracy(true_states, pred_sjump)
    state_stats_sjump = compute_state_statistics(y, true_states, pred_sjump)

    return {
        "seed": seed,
        "T": T,
        # MLE default parameters and state stats
        "MLE_mu1": mu1_mle, "MLE_mu2": mu2_mle,
        "MLE_sigma1": sigma1_mle, "MLE_sigma2": sigma2_mle,
        "MLE_gamma12": gamma12_mle, "MLE_gamma21": gamma21_mle,
        "MLE_acc1": acc1_mle, "MLE_acc2": acc2_mle, "MLE_BAC": bac_mle,
        "MLE_state0_mean": state_stats_mle["state0_mean"],
        "MLE_state0_vol": state_stats_mle["state0_vol"],
        "MLE_state1_mean": state_stats_mle["state1_mean"],
        "MLE_state1_vol": state_stats_mle["state1_vol"],
        # MLE k-means parameters and state stats
        "MLEK_mu1": mu1_mlek, "MLEK_mu2": mu2_mlek,
        "MLEK_sigma1": sigma1_mlek, "MLEK_sigma2": sigma2_mlek,
        "MLEK_gamma12": gamma12_mlek, "MLEK_gamma21": gamma21_mlek,
        "MLEK_acc1": acc1_mlek, "MLEK_acc2": acc2_mlek, "MLEK_BAC": bac_mlek,
        "MLEK_state0_mean": state_stats_mlek["state0_mean"],
        "MLEK_state0_vol": state_stats_mlek["state0_vol"],
        "MLEK_state1_mean": state_stats_mlek["state1_mean"],
        "MLEK_state1_vol": state_stats_mlek["state1_vol"],
        # JumpModel state stats
        "Jump_acc1": acc1_jump, "Jump_acc2": acc2_jump, "Jump_BAC": bac_jump,
        "Jump_state0_mean": state_stats_jump["state0_mean"],
        "Jump_state0_vol": state_stats_jump["state0_vol"],
        "Jump_state1_mean": state_stats_jump["state1_mean"],
        "Jump_state1_vol": state_stats_jump["state1_vol"],
        # SparseJumpModel state stats
        "SparseJump_acc1": acc1_sjump, "SparseJump_acc2": acc2_sjump, "SparseJump_BAC": bac_sjump,
        "SparseJump_state0_mean": state_stats_sjump["state0_mean"],
        "SparseJump_state0_vol": state_stats_sjump["state0_vol"],
        "SparseJump_state1_mean": state_stats_sjump["state1_mean"],
        "SparseJump_state1_vol": state_stats_sjump["state1_vol"]
    }

### 4.2 Main Execution

In [25]:
if __name__ == "__main__":
    # Define simulation lengths and number of simulations per T
    T_values = [250, 500, 1000, 2000]
    n_simulations = 30

    # Hyperparameters for jump-based models
    lambda_jump = 100.0
    lambda_sjump = 100.0
    max_feats = 9

    num_cores = multiprocessing.cpu_count()
    print(f"Running {n_simulations} simulations per T value on {num_cores} cores...")

    all_results = []
    for T in T_values:
        print(f"\nRunning simulations for T = {T}...")
        seeds = np.arange(n_simulations)
        results_list = Parallel(n_jobs=num_cores)(
            delayed(run_one_simulation)(
                seed=s,
                T=T,
                lambda_jump=lambda_jump,
                lambda_sjump=lambda_sjump,
                max_feats=max_feats
            ) for s in seeds
        )
        df_results_T = pd.DataFrame(results_list)
        all_results.append(df_results_T)

    final_results = pd.concat(all_results, ignore_index=True)

    # Print summary statistics for each T value
    for T in T_values:
        df_T = final_results[final_results["T"] == T]
        print(f"\n=== Results Summary for T = {T} ===")
        # MLE default parameters
        for param in ["MLE_mu1", "MLE_mu2", "MLE_sigma1", "MLE_sigma2", "MLE_gamma12", "MLE_gamma21"]:
            mean_val = df_T[param].mean()
            std_val = df_T[param].std()
            print(f"{param}: {mean_val:.4f} ± {std_val:.4f}")
        # MLE default state stats
        for stat in ["MLE_state0_mean", "MLE_state0_vol", "MLE_state1_mean", "MLE_state1_vol"]:
            mean_val = df_T[stat].mean()
            std_val = df_T[stat].std()
            print(f"{stat}: {mean_val:.4f} ± {std_val:.4f}")
        # MLE k-means parameters
        for param in ["MLEK_mu1", "MLEK_mu2", "MLEK_sigma1", "MLEK_sigma2", "MLEK_gamma12", "MLEK_gamma21"]:
            mean_val = df_T[param].mean()
            std_val = df_T[param].std()
            print(f"{param}: {mean_val:.4f} ± {std_val:.4f}")
        # MLE k-means state stats
        for stat in ["MLEK_state0_mean", "MLEK_state0_vol", "MLEK_state1_mean", "MLEK_state1_vol"]:
            mean_val = df_T[stat].mean()
            std_val = df_T[stat].std()
            print(f"{stat}: {mean_val:.4f} ± {std_val:.4f}")
        # BAC values for each method
        print(f"MLE default BAC: {df_T['MLE_BAC'].mean():.3f} ± {df_T['MLE_BAC'].std():.3f}")
        print(f"MLE k-means BAC: {df_T['MLEK_BAC'].mean():.3f} ± {df_T['MLEK_BAC'].std():.3f}")
        print(f"Jump Model BAC : {df_T['Jump_BAC'].mean():.3f} ± {df_T['Jump_BAC'].std():.3f}")
        print(f"Sparse Jump BAC: {df_T['SparseJump_BAC'].mean():.3f} ± {df_T['SparseJump_BAC'].std():.3f}")

    print("\nSample of raw results (first 10 rows):")
    print(final_results.head(10))

Running 30 simulations per T value on 8 cores...

Running simulations for T = 250...


Model is not converging.  Current: 790.2671998347232 is not greater than 790.6833352777398. Delta is -0.4161354430166284
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Model is not converging.  Current: 823.201991261546 is not greater than 823.2072459466822. Delta is -0.005254685136151238
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even thou


Running simulations for T = 500...


Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Model is not converging.  Current: 1691.769912437574 is not greater than 1691.9923235313051. Delta is -0.22241109373112522
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even 


Running simulations for T = 1000...


Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Model is not converging.  Current: 3248.9435969762612 is not greater than 3248.954758532962. Delta is -0.011161556700699293
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Model is not converging.  Current: 3354.182395026121 is not greater than 3354.187122556514. Delta is -0.004727530393211055
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even 


Running simulations for T = 2000...


Model is not converging.  Current: 6152.484059903614 is not greater than 6152.495489587251. Delta is -0.011429683636379195
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Model is not converging.  Current: 6629.510842619877 is not greater than 6629.58496959585. Delta is -0.07412697597374063
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Even though the 'transmat_' attribute is set, it will be overwritten during initialization because 'init_params' contains 't'
Even tho


=== Results Summary for T = 250 ===
MLE_mu1: -0.0004 ± 0.0041
MLE_mu2: 0.0012 ± 0.0032
MLE_sigma1: 0.0167 ± 0.0177
MLE_sigma2: 14.7716 ± 16.0323
MLE_gamma12: 0.0523 ± 0.1821
MLE_gamma21: 0.5144 ± 0.4645
MLE_state0_mean: 0.0011 ± 0.0025
MLE_state0_vol: 0.0092 ± 0.0049
MLE_state1_mean: -0.0014 ± 0.0065
MLE_state1_vol: 0.0218 ± 0.0073
MLEK_mu1: 0.0010 ± 0.0018
MLEK_mu2: 0.0004 ± 0.0034
MLEK_sigma1: 0.0144 ± 0.0059
MLEK_sigma2: 0.0163 ± 0.0079
MLEK_gamma12: 0.7520 ± 0.2417
MLEK_gamma21: 0.8017 ± 0.2521
MLEK_state0_mean: 0.0029 ± 0.0080
MLEK_state0_vol: 0.0120 ± 0.0099
MLEK_state1_mean: 0.0001 ± 0.0031
MLEK_state1_vol: 0.0120 ± 0.0061
MLE default BAC: 0.602 ± 0.162
MLE k-means BAC: 0.391 ± 0.179
Jump Model BAC : 0.663 ± 0.294
Sparse Jump BAC: 0.663 ± 0.293

=== Results Summary for T = 500 ===
MLE_mu1: 0.0002 ± 0.0011
MLE_mu2: -0.0012 ± 0.0037
MLE_sigma1: 0.0124 ± 0.0076
MLE_sigma2: 6.3404 ± 12.8573
MLE_gamma12: 0.0328 ± 0.0524
MLE_gamma21: 0.2450 ± 0.3939
MLE_state0_mean: 0.0005 ± 0.0004
M

In [None]:
# Assuming final_results is your DataFrame with all simulation outputs

T_values = final_results["T"].unique()

for T_val in sorted(T_values):
    df_T = final_results[final_results["T"] == T_val]
    # Build a summary DataFrame with methods as rows and metrics as columns
    summary = pd.DataFrame({
        "MLE default": {
            "mu1": df_T["MLE_mu1"].mean(),
            "mu2": df_T["MLE_mu2"].mean(),
            "sigma1": df_T["MLE_sigma1"].mean(),
            "sigma2": df_T["MLE_sigma2"].mean(),
            "gamma12": df_T["MLE_gamma12"].mean(),
            "gamma21": df_T["MLE_gamma21"].mean(),
            "acc1": df_T["MLE_acc1"].mean(),
            "acc2": df_T["MLE_acc2"].mean(),
            "BAC": df_T["MLE_BAC"].mean(),
            "state0_mean": df_T["MLE_state0_mean"].mean(),
            "state0_vol": df_T["MLE_state0_vol"].mean(),
            "state1_mean": df_T["MLE_state1_mean"].mean(),
            "state1_vol": df_T["MLE_state1_vol"].mean()
        },
        "MLE k-means": {
            "mu1": df_T["MLEK_mu1"].mean(),
            "mu2": df_T["MLEK_mu2"].mean(),
            "sigma1": df_T["MLEK_sigma1"].mean(),
            "sigma2": df_T["MLEK_sigma2"].mean(),
            "gamma12": df_T["MLEK_gamma12"].mean(),
            "gamma21": df_T["MLEK_gamma21"].mean(),
            "acc1": df_T["MLEK_acc1"].mean(),
            "acc2": df_T["MLEK_acc2"].mean(),
            "BAC": df_T["MLEK_BAC"].mean(),
            "state0_mean": df_T["MLEK_state0_mean"].mean(),
            "state0_vol": df_T["MLEK_state0_vol"].mean(),
            "state1_mean": df_T["MLEK_state1_mean"].mean(),
            "state1_vol": df_T["MLEK_state1_vol"].mean()
        },
        "Jump Model": {
            "acc1": df_T["Jump_acc1"].mean(),
            "acc2": df_T["Jump_acc2"].mean(),
            "BAC": df_T["Jump_BAC"].mean(),
            "state0_mean": df_T["Jump_state0_mean"].mean(),
            "state0_vol": df_T["Jump_state0_vol"].mean(),
            "state1_mean": df_T["Jump_state1_mean"].mean(),
            "state1_vol": df_T["Jump_state1_vol"].mean()
        },
        "Sparse Jump": {
            "acc1": df_T["SparseJump_acc1"].mean(),
            "acc2": df_T["SparseJump_acc2"].mean(),
            "BAC": df_T["SparseJump_BAC"].mean(),
            "state0_mean": df_T["SparseJump_state0_mean"].mean(),
            "state0_vol": df_T["SparseJump_state0_vol"].mean(),
            "state1_mean": df_T["SparseJump_state1_mean"].mean(),
            "state1_vol": df_T["SparseJump_state1_vol"].mean()
        }
    }).T  # Transpose so that methods are rows

    # Format the numbers as "mean ± std" if desired:
    # Here we simply print the mean values. You could extend this to include std.
    print(f"\nSummary for T = {T_val}")
    print(summary)


Summary for T = 250
                  mu1       mu2    sigma1     sigma2   gamma12   gamma21      acc1      acc2       BAC  state0_mean  state0_vol  state1_mean  state1_vol
MLE default -0.000408  0.001215  0.016681  14.771609  0.052346  0.514388  0.866533  0.336552  0.601543     0.001142    0.009152    -0.001371    0.021784
MLE k-means  0.000980  0.000377  0.014424   0.016332  0.752004  0.801696  0.483947  0.298371  0.391159     0.002870    0.012048     0.000065    0.012035
Jump Model        NaN       NaN       NaN        NaN       NaN       NaN  0.865258  0.461671  0.663465     0.000316    0.008152     0.000713    0.015988
Sparse Jump       NaN       NaN       NaN        NaN       NaN       NaN  0.862620  0.463230  0.662925     0.000349    0.008185     0.000698    0.015819

Summary for T = 500
                  mu1       mu2    sigma1    sigma2   gamma12   gamma21      acc1      acc2       BAC  state0_mean  state0_vol  state1_mean  state1_vol
MLE default  0.000205 -0.001179  0.012367