### Parameters:
- $λ$=parameters
- $O_t$ = observation at t time
- $q_t$ = state at t time
- $\pi_i$ = initial state probobility
- $α_{ij}$ = transition prob
- $b_j(O_t)$ = emission prob

### Forward Alg
$α_t(i) = P(O_1, ..., O_t, q_t=S_i|λ)$
1. Intialization: $α_1(i) = π_ib_i(O_1)$
2. Recursion: $ a_{t+1} [\sum_{i=1}^Nα_t(i)α_{ij}]b_j(O_{t+1})$
3. Termination: $P(O|λ) = ∑_{i=1}^Nα_T(i)$

### Backward Alg
$β_t(i) = P(O_{t+1}, ..., O_T|q_t=S_i,λ)$
1. Initialization: β_T(i) = 1
2. Recursion: $ β_t(i) = \sum_{j=1}^Nα_{ij}b_j(O_{t+1})β_{t+1}(j) $

##Training algorithm
### baum-welch Alg:
1. Expectation
- $ℽ_t(i) = P(q_t = S_i|O, λ)=\frac{a_t(i)β_t(i)}{P(O|λ)}$
- $\xi_t(i, j)=P(q_t=S_i,q_{t+1}=S_j|O,λ)=\frac{α_t(i)a_{ij}b_j(O_{t+1})β_{t+1}(j)}{P(O|λ)}$
2. Maximization
- $\pi_i = γ_1(i)$
- $a_{ij} = \frac{∑_{t=1}^{T-1}\xi_t(i,j)}{\sum_{t=1}^{T-1}γ_t(i)}$
- $μ_i = \frac{∑_{t=1}^{T}γ_t(i)O_t}{∑_{T}^{t=1}γ_t(i)}$
- $σ_i^2=\frac{∑_{t=1}^{T}γ_t(i)(O_{t}-μ_i)^2}{∑_{t=1}^{T}γ_t(i)}$

### where:
- γ_t(i) - probability of being in state i at the time t
- ξ_t{i,j} - probability of transition from state i to state j at time t
- μ_i,σ_i^2 - parameters of emission probability distribution  


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from scipy.stats import norm
import plotly.graph_objects as go
np.random.seed(42)

SYMBOL = "TSLA"
START = "2020-01-01"
END = "2025-01-01"
N = 3

In [None]:
df = yf.Ticker(SYMBOL).history(start=START, end=END)

In [None]:
df["Returns"] = df["Close"].pct_change()
df.dropna(inplace=True)

x = df["Returns"].values
T = len(x)
N = 3  

pi = np.ones(N) / N
A = np.random.rand(N, N)
A = A / A.sum(axis=1)[:, np.newaxis]

mu = np.array([-0.01, 0, 0.01])
sigma = np.array([0.01, 0.02, 0.03])

max_iter = 100
prev_log_likelihood = -np.inf

def forward_pass(x, pi, A, mu, sigma):
    T, N = len(x), len(pi)
    alpha = np.zeros((T, N))
    for j in range(N):
        alpha[0, j] = pi[j] * norm.pdf(x[0], mu[j], sigma[j])
    for t in range(1, T):
        for j in range(N):
            alpha[t, j] = norm.pdf(x[t], mu[j], sigma[j]) * np.sum(alpha[t-1, :] * A[:, j])
    return alpha

def backward_pass(x, A, mu, sigma):
    T, N = len(x), len(mu)
    beta = np.zeros((T, N))
    beta[T-1, :] = 1
    for t in range(T-2, -1, -1):
        for i in range(N):
            beta[t, i] = np.sum([A[i, j] * norm.pdf(x[t+1], mu[j], sigma[j]) * beta[t+1, j] for j in range(N)])
    return beta

for iteration in range(max_iter):
    alpha = forward_pass(x, pi, A, mu, sigma)
    beta = backward_pass(x, A, mu, sigma)

    gamma = alpha * beta
    gamma = gamma / gamma.sum(axis=1, keepdims=True)

    xi = np.zeros((T-1, N, N))
    for t in range(T-1):
        denom = np.sum([alpha[t, i] * A[i, j] * norm.pdf(x[t+1], mu[j], sigma[j]) * beta[t+1, j] for i in range(N) for j in range(N)])
        for i in range(N):
            for j in range(N):
                xi[t, i, j] = (alpha[t, i] * A[i, j] * norm.pdf(x[t+1], mu[j], sigma[j]) * beta[t+1, j]) / denom

    pi = gamma[0]
    A = xi.sum(axis=0) / gamma[:-1].sum(axis=0)[:, np.newaxis]

    for j in range(N):
        mu[j] = np.sum(gamma[:, j] * x) / np.sum(gamma[:, j])
        sigma[j] = np.sqrt(np.sum(gamma[:, j] * (x - mu[j])**2) / np.sum(gamma[:, j]))

    log_likelihood = np.sum(np.log(alpha.sum(axis=1)))
    if abs(log_likelihood - prev_log_likelihood) < 1e-6:
        break
    prev_log_likelihood = log_likelihood

hidden_states = np.argmax(gamma, axis=1)
states_volatilities = np.array([np.std(x[hidden_states == i]) for i in range(N)])
state_order = np.argsort(states_volatilities)

regime_map = {state_order[0]: "Low Vol", state_order[1]: "Mid Vol", state_order[2]: "High Vol"}
regimes = [regime_map[state] for state in hidden_states]

regime_stats = {}
for regime in ["Low Vol", "Mid Vol", "High Vol"]:
    mask = np.array(regimes) == regime
    returns = df.loc[mask, "Returns"]
    regime_stats[regime] = {
        "mean": returns.mean(),
        "std": returns.std(),
        "count": len(returns)
    }

print("Transition prob matrix:")
for i, state in enumerate(["Low Vol", "Mid Vol", "High Vol"]):
    print(f"{state:15} {A[i,0]:.3f} {A[i,1]:.3f} {A[i,2]:.3f}")

print("\nLatent State Statistics (Daily):")
for regime, stats in regime_stats.items():
    print(f"\n{regime}:")
    print(f"Mean Return: {stats['mean']*100:.2f}%")
    print(f"Std Dev: {stats['std']*100:.2f}%")
    print(f"Number of Days: {stats['count']}")

In [None]:
x = np.linspace(-0.1, 0.1, 1000)
fig = go.Figure()
for regieme in states:
  mu_r = regieme_stats[regieme]["mean"]
  sigma_r = regieme_stats[regieme]["std"]
  y = 1/(sigma_r * np.sqrt(2*np.pi)) *np.exp(-(x - mu_r)**2 / (2 * sigma_r**2))

  fig.add_trace(
      go.Scatter(
          x=x*100,
          y=y,
          name=regieme,
          line=dict(color=f'rgb({50 + int(regime[-1])*50}, {100 + int(regime[-1])*50}, {150 + int(regime[-1])*50})')
      )
  )

fig.update_layout(
  title='Return Distributions by Latent State (HMM)',
  xaxis_title='Daily Return (%)',
  yaxis_title='Density',
  height=500,
  width=900,
  showlegend=True,
  plot_bgcolor='rgba(0,0,0,0)',
  paper_bgcolor='rgba(0,0,0,0)',
  font=dict(color='white'),
  xaxis=dict(
      showgrid=True,
      gridwidth=1,
      gridcolor='rgba(128,128,128,0.2)',
      zeroline=True,
      zerolinewidth=1,
      zerolinecolor='rgba(128,128,128,0.5)'
  ),
  yaxis=dict(
      showgrid=True,
      gridwidth=1,
      gridcolor='rgba(128,128,128,0.2)',
      zeroline=True,
      zerolinewidth=1,
      zerolinecolor='rgba(128,128,128,0.5)'
  )
)

fig.show()
