In [5]:
#Lading Package
import numpy as np


In [7]:
import numpy as np

def build_reversible_chain(pi, alpha=0.02):
    """
    Construct a 3x3 "reversible" Markov chain with stationary distribution pi.

    pi: (array-like) length 3, the target stationary distribution, e.g. [0.89, 0.04, 0.07].
    alpha: (float) smaller alpha => states are more "sticky" (higher diagonal probabilities).

    Returns:
        P (2D np.array): The 3x3 transition matrix with pi as its stationary distribution.
    """
    pi = np.array(pi, dtype=float)
    assert np.allclose(pi.sum(), 1.0), "pi must sum to 1"
    assert len(pi) == 3, "This function is hardcoded for 3 states"

    # P[i,i] = 1 - alpha*(1 - pi[i])
    # P[i,j] = alpha*pi[j], for j != i
    # This ensures pi_i * p_{i,j} = pi_j * p_{j,i} => reversible => pi is stationary
    P = np.zeros((3, 3))
    for i in range(3):
        for j in range(3):
            if i == j:
                P[i, i] = 1.0 - alpha * (1.0 - pi[i])
            else:
                P[i, j] = alpha * pi[j]
    return P


def direct_stationary_distribution(P):
    """
    Solve for the stationary distribution pi via the linear system:
       pi * P = pi, sum(pi)=1
    Returns:
        pi (1D np.array) stationary distribution
    """
    n = P.shape[0]
    # We'll solve (P^T - I)x = 0 with sum(x)=1
    A = (P.T - np.eye(n))
    b = np.zeros(n)
    # Enforce sum(x)=1 by replacing the last row with [1,1,1,...], b=1
    A[-1, :] = 1.0
    b[-1] = 1.0

    pi, residuals, rank, s = np.linalg.lstsq(A, b, rcond=None)
    return pi


def power_method_stationary(P, max_iter=1000, tol=1e-14):
    """
    Iterative approach: start with any distribution, repeatedly multiply by P until convergence.
    Returns:
        pi (1D np.array) approximate stationary distribution
    """
    n = P.shape[0]
    # Start uniformly
    pi = np.ones(n) / n

    for _ in range(max_iter):
        pi_next = pi @ P
        if np.linalg.norm(pi_next - pi, 1) < tol:
            return pi_next
        pi = pi_next
    return pi


def mixture_mean_variance(pi, mus, sigmas):
    """
    For a discrete mixture of states with probabilities pi[i],
    each having mean=mus[i] and stdev=sigmas[i], compute
    the mixture's overall mean and standard deviation.

    Returns (mean_mix, stdev_mix).
    """
    mean_mix = np.sum(pi * mus)
    avg_sigsq_plus_musq = np.sum(pi * (sigmas**2 + mus**2))
    var_mix = avg_sigsq_plus_musq - mean_mix**2
    stdev_mix = np.sqrt(var_mix)
    return mean_mix, stdev_mix


def simulate_markov_chain(P, T=1000, start_state=0):
    """
    Simulate a sequence of states from Markov chain P over T steps.
    Returns:
        states (np.array of length T, integer states)
    """
    n = P.shape[0]
    states = np.zeros(T, dtype=int)
    states[0] = start_state
    for t in range(1, T):
        current = states[t-1]
        states[t] = np.random.choice(n, p=P[current])
    return states


def simulate_returns_from_chain(states, mus, sigmas):
    """
    For each time t, if states[t] = i, draw returns from Normal(mus[i], sigmas[i]).
    (Adapt if you need Student-t or other distributions.)
    
    Returns an array of length T with daily returns.
    """
    T = len(states)
    rets = np.zeros(T, dtype=float)
    for t in range(T):
        i = states[t]
        rets[t] = np.random.normal(mus[i], sigmas[i])
    return rets


if __name__ == "__main__":
    # 1) Desired stationary distribution
    pi_target = np.array([0.89, 0.04, 0.07])

    # 2) Desired unconditional daily mean & stdev (matching single-state stats)
    desired_mean = 0.000461    # i.e. 0.0461% per day
    desired_stdev = 0.008388   # i.e. 0.8388% per day

    # 3) Define each state's mean & stdev
    #    Such that the mixture under pi_target yields the same unconditional stats
    mu_bull = 0.0008
    mu_flat = 0.0
    mu_bear = -0.003586

    sigma_bull = 0.0070
    sigma_flat = 0.0050
    sigma_bear = 0.01897

    mus = np.array([mu_bull, mu_flat, mu_bear])
    sigmas = np.array([sigma_bull, sigma_flat, sigma_bear])

    # 4) Check mixture stats
    mm, ss = mixture_mean_variance(pi_target, mus, sigmas)
    print(f"Mixture Mean (full decimal) = {mm:.12f} vs desired {desired_mean:.12f}")
    print(f"Mixture Std  (full decimal) = {ss:.12f} vs desired {desired_stdev:.12f}")

    # 5) Build a "sticky" Markov chain with that stationary distribution
    alpha = 0.01  # smaller => more "sticky"
    P = build_reversible_chain(pi_target, alpha=alpha)

    print("\nTransition Matrix P (alpha={:.4f}):".format(alpha))
    # Print row by row, full decimals:
    for row in P:
        print(" ".join(f"{val:.6f}" for val in row))
    print("Row sums:", [f"{x:.6f}" for x in P.sum(axis=1)])

    # 6) Verify stationarity via direct solve and power method
    pi_direct = direct_stationary_distribution(P)
    pi_iter = power_method_stationary(P)

    print("\nDirect-solve stationary dist:", [f"{x:.12f}" for x in pi_direct],
          "| sum =", f"{np.sum(pi_direct):.12f}")
    print("Iterative stationary dist:   ", [f"{x:.12f}" for x in pi_iter],
          "| sum =", f"{np.sum(pi_iter):.12f}")
    diff = np.linalg.norm(pi_direct - pi_iter, 1)
    print(f"Difference direct vs iterative (L1): {diff:.12f}")

    # 7) Simulate from this chain
    T = 3000
    np.random.seed(999)
    chain_states = simulate_markov_chain(P, T=T, start_state=0)
    returns = simulate_returns_from_chain(chain_states, mus, sigmas)

    # 8) Empirical stats from simulation
    emp_mean = returns.mean()
    emp_stdev = returns.std()
    freq = np.bincount(chain_states, minlength=3) / T

    print("\nEmpirical Station Frequencies:", [f"{fval:.12f}" for fval in freq])
    print(f"Empirical Mean (full decimal) = {emp_mean:.12f}")
    print(f"Empirical Std  (full decimal) = {emp_stdev:.12f}")

    # Compare to desired
    print(f"\nDesired daily mean= {desired_mean:.12f}, daily stdev= {desired_stdev:.12f}")
    print("Done.")


Mixture Mean (full decimal) = 0.000460980000 vs desired 0.000461000000
Mixture Std  (full decimal) = 0.008429562157 vs desired 0.008388000000

Transition Matrix P (alpha=0.0100):
0.998900 0.000400 0.000700
0.008900 0.990400 0.000700
0.008900 0.000400 0.990700
Row sums: ['1.000000', '1.000000', '1.000000']

Direct-solve stationary dist: ['0.890000000000', '0.040000000000', '0.070000000000'] | sum = 1.000000000000
Iterative stationary dist:    ['0.889975968006', '0.040012663566', '0.070011368428'] | sum = 1.000000000000
Difference direct vs iterative (L1): 0.000048063989

Empirical Station Frequencies: ['0.839000000000', '0.063000000000', '0.098000000000']
Empirical Mean (full decimal) = 0.000461244386
Empirical Std  (full decimal) = 0.008952457181

Desired daily mean= 0.000461000000, daily stdev= 0.008388000000
Done.


In [8]:
def simulate_3state_data(T):
    """
    3-state HMM: three regimes with specified means and volatilities.
    Prints final data in a tabular form (assets + 'State' column)
    so you can copy-paste it afterwards.
    """
    import numpy as np
    import pandas as pd

    # Global references or define them inside if needed
    assets = ["Value", "Growth", "LowVol", "Size", "Momentum", "Quality"]
    n_assets = len(assets)
    rng = np.random.default_rng(42)  # or set your seed
    corr = np.full((n_assets, n_assets), 0.185)
    np.fill_diagonal(corr, 1.0)

    transmat = np.array([
        [0.9950, 0.004335, 0.000665],
        [0.01667, 0.95,    0.03333],
        [0.00652, 0.04348, 0.9500]
    ])
    states = np.zeros(T, dtype=int)
    states[0] = rng.integers(3)

    for t in range(1, T):
        states[t] = rng.choice(3, p=transmat[states[t - 1]])

    mu_list = [0.0005862,  0.0,       -0.0008672]
    sig_list = [0.0075313, 0.0135351,  0.0163387]

    rets = np.zeros((T, n_assets))
    dof = 5
    for t in range(T):
        s = states[t]
        mu_s = np.full(n_assets, mu_list[s])
        sig_s = np.full(n_assets, sig_list[s])
        Cov_s = np.outer(sig_s, sig_s) * corr

        z = rng.multivariate_normal(mean=np.zeros(n_assets), cov=Cov_s)
        chi = rng.chisquare(dof)
        factor = np.sqrt(dof / chi)
        rets[t] = mu_s + factor * z

    df = pd.DataFrame(rets, columns=assets)
    df["State"] = states  # attach the states as a separate column

    # Print out the entire DataFrame (careful with large T!)
    # For large T, consider printing only head & tail or saving to a file.
    print(df.to_string(index=False))

    return df, states
