In [1]:
import os
import sys
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from hmmlearn import hmm
from sklearn.preprocessing import StandardScaler

# --- Configuration
pd.set_option('display.max_rows', None)
np.random.seed(42)  # For reproducibility

PERIOD = 30

raw_bar = pd.read_parquet(os.path.join(os.getcwd(), 'history.parquet'))
vrun_bar = pd.read_parquet(os.path.join(os.getcwd(), 'volume_run_bar.parquet'))
df = vrun_bar[-(60*24*7):].copy()
df = df.set_index('time')

series = df["close"]
df['ref'] = np.log((series / series.iloc[0]).fillna(1))
df['data'] = np.log((series / series.rolling(PERIOD).mean().shift(PERIOD)).fillna(1))

# | Data Type          | Works with HMM? | Recommended Action                      |
# | ------------------ | --------------- | --------------------------------------- |
# | Symmetric Gaussian | Yes             | Use standard Gaussian HMM               |
# | Skewed data        | Maybe           | Transform or use non-Gaussian emissions |
# | Heavy-tailed       | Maybe           | Try Student-t or GMM-HMM                |
# | Discrete/ordinal   | Yes             | Use categorical/discrete HMM            |

# 1. Load your data
periods = [1, 2, 4, 6, 8]  # longer trend than available periods, but cleaner label
# periods = [3, 6, 12, 24] # longer trend than available periods, but cleaner label
for period in periods:
    df[f'pct_fwd_{period}'] = np.log1p(df["close"].pct_change(periods=-period).fillna(0))
    # df[f'pct_bak_{period}'] = np.log1p(df["close"].pct_change(periods=period).fillna(0))

for i, period in enumerate(periods):
    df[f'pct_cum_fwd_{period}'] = sum(df[f'pct_fwd_{p}'] for p in periods[:(i+1)])
    # df[f'pct_cum_bak_{period}'] = sum(df[f'pct_bak_{p}'] for p in periods[:(i+1)])
    df[f'f_p_{period}'] = df[f'pct_cum_fwd_{period}'].apply(lambda x: x if x > 0 else 0)
    df[f'f_n_{period}'] = df[f'pct_cum_fwd_{period}'].apply(lambda x: x if x <= 0 else 0)
# df['atr'] = df[f'high'] - df[f'low']
# df['bar_ratio'] = np.where(df['atr'] != 0, (df['close'] - df['open']) / df['atr'], 0)
X = df[[f'f_p_{p}' for p in periods]+[f'f_n_{p}' for p in periods]]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
n_states = 2  # choose your number of regimes
model = hmm.GaussianHMM(
    n_components=n_states,
    covariance_type="full",
    n_iter=30,
    verbose=True,    # print convergence info
    random_state=0,
)
model.fit(X_scaled)
hidden_states = model.predict(X_scaled)
df["label"] = hidden_states

# hmm_stats = pd.DataFrame(index=sorted(df["label"].unique()), columns=[f"fut_return_{p}" for p in periods])
# for period in periods:
#     col_name = f"fut_return_{period}"
#     df[col_name] = df.groupby("label")["close"].transform(lambda x: x.shift(-period) / x - 1)
#     stats = df.groupby("label")[col_name].agg(['mean'])
#     hmm_stats.loc[:, col_name] = stats['mean']
# print(hmm_stats)

fig = go.Figure()
fig.add_trace(go.Scatter(y=df['ref'], mode='lines+markers', marker=dict(color=hidden_states,
              colorscale='Plasma', showscale=True, colorbar=dict(title="HMM State"), size=6), name="data",))
fig.add_trace(go.Scatter(y=df['data'], mode='lines+markers', marker=dict(color=hidden_states,
              colorscale='Spectral', showscale=True, colorbar=dict(title="HMM State"), size=6), name="data",))
fig.show()

         1  -54596.54059928             +nan
         2  -30723.24705090  +23873.29354837
         3  -18377.41212343  +12345.83492747
         4   -9631.21306485   +8746.19905857
         5   -3448.94773691   +6182.26532794
         6   -1673.42527336   +1775.52246355
         7     356.64269145   +2030.06796481
         8    1856.27944889   +1499.63675745
         9    6907.21651394   +5050.93706505
        10   10012.84762107   +3105.63110713
        11   14014.75678175   +4001.90916068
        12   15099.31427502   +1084.55749327
        13   15608.00216222    +508.68788720
        14   16425.06001143    +817.05784921
        15   17245.34193512    +820.28192369
        16   18168.65715936    +923.31522424
        17   21234.97873817   +3066.32157881
        18   24437.43899541   +3202.46025724
        19   30375.92855443   +5938.48955902
        20   36551.49061536   +6175.56206093
        21   48526.53690903  +11975.04629367
        22   58424.50921454   +9897.97230551
        23

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from tqdm import tqdm

# --- Configuration
pd.set_option('display.max_rows', None)
np.random.seed(42)  # For reproducibility

exchange_fee = 0.25  # per round turn
clearing_fee = 0.10  # per round turn
nfa_fee = 0.02  # per round turn
broker_commission = 1.0  # per round turn
total_fees = exchange_fee + clearing_fee + nfa_fee + broker_commission
taker_slippage = 0.25*2
index_value = 25000  # hypothetical index level
notional_value = 2 * index_value  # MNQ multiplier is $2
fee = ((total_fees + taker_slippage) / notional_value)

print(fee)

# Label Logic A (try both long and short, see which is the better trade):
# 1. Calculate EMA-based volatility to filter potential high-vol events.
# 2. Try both trade direction (long and short).
# 3. For each entry event:
#    a. Draw a box with the following characteristics:
#       - Horizontal line at entry high/low, allowing some stop-loss tolerance.
#       - Vertical line with a maximum length of N bars (time constraint).
#    b. Determine the end point:
#       - If the price falls back to the baseline (entry high/low) (with a bit of stop loss), or
#       - If price continues in the trade direction without fallback until N bars, use that as the end.
#    c. Between entry_x and end_x:
#       - Scan for the furthest point in the trade direction, mark as `close_x` (best close time),
#         used for calculating labels and sample weights.
#    d. Between entry_x and close_x:
#       - Scan for the maximum drawdown from the dynamic price and mark the drawdown ratio.

# Label Logic B (force breakout position, because reversals, even though high pnl, is arguably too hard to predict):
# 1. Calculate EMA-based volatility to filter potential breakout events.
# 2. Use breakout direction as the trade direction (long or short).
# 3. For each breakout entry event:
#    a. Draw a box with the following characteristics:
#       - Horizontal line at (entry_price - trade_direction * ATR * m), allowing some stop-loss tolerance.
#       - Vertical line with a maximum length of N bars (time constraint).
#    b. Determine the end point:
#       - If the price falls back to the baseline (entry - trade_direction * ATR * m) (with a bit of stop loss), or
#       - If price continues in the trade direction without fallback until N bars, use that as the end.
#    c. Between entry_x and end_x:
#       - Scan for the furthest point in the trade direction, mark as `close_x` (best close time),
#         used for calculating labels and sample weights.
#    d. Between entry_x and close_x:
#       - Scan for the maximum drawdown from the dynamic price and mark the drawdown ratio.

# 2 strategies:
#    a. force trade direction to breakout direction after volatility filter (because reversals, even though high pnl, is arguably too hard to predict)
#    b. assume long and short, pick the best direction

# --- CONFIGURATION ---
H_PER_D = 23                                # trade hours per day
P_PER_B = 5                                 # equivalent bar period
EMA_VOL_SPAN = int(60/P_PER_B * H_PER_D)    # span for EMA volatility (daily)
CUSUM_FACTOR = 0.1                          # multiplier for CUSUM threshold
ATR_WINDOW = int(60/P_PER_B * H_PER_D)      # bars for ATR
# SL_MULTIPLIER = 0.1                         # stop-loss = ATR * multiplier
MAX_BARS = int(60/P_PER_B * 4)              # maximum lifetime of event

# --- LOAD DATA ---
# Assumes parquet with time, high, low, close
# vrun_bar = pd.read_parquet(os.path.join(os.path.dirname(__file__), 'volume_run_bar.parquet'))
vrun_bar = pd.read_parquet(os.path.join(os.getcwd(), 'volume_run_bar.parquet'))
df = vrun_bar # [-(60*24*1):].copy()
df = df.set_index('time').sort_index()
df['mid'] = (df['high'] + df['low']) / 2

# --- VOLATILITY AND FILTER ---
df['ref'] = np.log((df['close'] / df['close'].iloc[0]).fillna(1))
# EMA-based volatility on log returns
df['return'] = df['close'].pct_change().fillna(0)
# daily vol (volume/run bars has more constant volatility for their homoscedasticity)
df['pos_return'] = df['return'].where(df['return'] > 0, 0.0001)
df['neg_return'] = df['return'].where(df['return'] < 0, 0.0001)
df['pos_vol'] = df['pos_return'].ewm(span=EMA_VOL_SPAN, adjust=False).std().replace(0, np.nan).ffill().bfill()
df['neg_vol'] = df['neg_return'].ewm(span=EMA_VOL_SPAN, adjust=False).std().replace(0, np.nan).ffill().bfill()

# CUSUM to mark breakout events and direction
s_pos, s_neg = 0.0, 0.0
df['event'] = 0.0 # np.nan
for i in range(1, len(df)):
    # note that for time i, the label/prediction is calculated after all info of that time is known
    pos_threshold = df['pos_vol'].iloc[i] * CUSUM_FACTOR
    neg_threshold = df['neg_vol'].iloc[i] * CUSUM_FACTOR
    diff = df['return'].iloc[i]
    s_pos = max(0, s_pos + diff)
    s_neg = min(0, s_neg + diff)
    index = df.index[i]
    if s_pos > pos_threshold:
        df.at[index, 'event'] = 0.01
        s_pos = 0.0
    elif s_neg < -neg_threshold:
        df.at[index, 'event'] = -0.01
        s_neg = 0.0

# --- ATR for stop-loss ---
high_low = df['high'] - df['low']
high_close = (df['high'] - df['close'].shift()).abs()
low_close = (df['low'] - df['close'].shift()).abs()
tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
df['atr'] = tr.ewm(span=ATR_WINDOW, adjust=False).mean()

# --- INITIALIZE BARRIER COLUMNS ---
df['best_close_time'] = float('nan')
df['max_return_ratio'] = 0.0
df['max_drawback_ratio'] = 0.0
# --- APPLY CUSTOM TRIPLE-BARRIER DIRECTLY ON df ---
directions = [1, -1] # try both long and short
for direction in directions:
    # prev_mid = None
    for idx, row in tqdm(df.iterrows()):
        # direction = np.sign(row['event'])
        if direction == 0:
            # prev_mid = row['mid']
            continue
        entry_price = row['close']
        # if prev_mid is None:
        #     prev_mid = entry_price
        
        if direction == 1:
            stop_line = row['low']
        else:
            stop_line = row['high']
        # sl_dist = row['atr'] * SL_MULTIPLIER
        # stop_line = entry_price - direction * sl_dist

        # define window slice
        entry_pos = int(df.index.get_loc(idx)) # type: ignore
        max_end_pos = min(entry_pos + MAX_BARS, len(df) - 1)
        window = df.iloc[entry_pos:max_end_pos + 1]

        # detect stop-loss trigger
        if direction == 1:
            hits = window[window['close'] <= stop_line]
        else:
            hits = window[window['close'] >= stop_line]
        if not hits.empty:
            end_idx = hits.index[0]
        else:
            end_idx = window.index[-1]

        # find best exit point
        sub = df.loc[idx:end_idx]
        if direction == 1:
            best_exit = sub['close'].idxmax()
            rr = df.at[best_exit, 'close'] / entry_price - 1
        else:
            best_exit = sub['close'].idxmin()
            rr = df.at[best_exit, 'close'] / entry_price - 1

        # compute drawback
        sub2 = df.loc[idx:best_exit]
        if direction == 1:
            running_high = sub2['close'].cummax()
            drawback = (running_high - sub2['close']) / (running_high-entry_price)
        else:
            running_low = sub2['close'].cummin()
            drawback = (sub2['close'] - running_low) / (entry_price-running_low)
        dr = drawback.fillna(0.0).replace([np.inf, -np.inf], 2.0).max()
            
        # prev_mid = entry_price

        # assign results back to entry row
        if direction == directions[0]:
            df.at[idx, 'best_close_time'] = best_exit
            df.at[idx, 'max_return_ratio'] = rr
            df.at[idx, 'max_drawback_ratio'] = dr
        else: # process at end
            calmer_long = row['max_return_ratio']*(1-row['max_drawback_ratio'])
            calmer_short = -rr*(1-dr)
            if calmer_short > calmer_long: # if short is the best option
                df.at[idx, 'best_close_time'] = best_exit
                df.at[idx, 'max_return_ratio'] = rr
                df.at[idx, 'max_drawback_ratio'] = dr

# Label calculation
df['vol'] = np.where(df['event'] > 0, df['pos_vol'], df['neg_vol'])
df['label'] = df['max_return_ratio']/df['vol']*(1-df['max_drawback_ratio']) # square to punish small returns
# outlier can over-estimate std
lower = np.percentile(df['label'], 5)
upper = np.percentile(df['label'], 95)
winsorize_soft = np.clip(df['label'], lower - (upper - lower) * 0.1, upper + (upper - lower) * 0.1)
df['label'] = np.tanh(df['label']/(winsorize_soft.std()*1.5)) # 1-sig:68.3%, 2-sig:99.5%, 3-sig:99.7%

# weight calculation
df['counts'] = 0
for idx, row in df.iterrows():
    df.loc[idx:row['best_close_time'], 'counts'] = df.loc[idx:row['best_close_time'], 'counts'] + 1
df['uniqueness'] = df['counts'].apply(lambda x: 1 / x**1 if x > 0 else 0)
# TARGET_MEAN_SAMPLE_WEIGHT = 0.6
# df['uniqueness'] = df['uniqueness'] * (TARGET_MEAN_SAMPLE_WEIGHT / df['uniqueness'].mean())

# output = df[['open', 'high', 'low', 'close', 'label', 'uniqueness']]
# output.to_parquet(os.path.join(os.getcwd(), "bar_and_label.parquet"))

# --- CLEAN UP AUXILIARY COLUMNS ---
# df.drop(columns=[''], inplace=True)

fig1 = go.Figure()
fig1.add_trace(go.Scatter(y=df['return'], mode='markers', name="return"))
fig1.add_trace(go.Scatter(y=df['event'], mode='markers', name="breakout"))
fig1.add_trace(go.Scatter(y=df['pos_vol'], mode='lines', name="pos_vol"))
fig1.add_trace(go.Scatter(y=-df['neg_vol'], mode='lines', name="neg_vol"))
fig1.show()

fig2 = go.Figure()
fig2.add_trace(go.Scatter(y=df['ref']*100, mode='lines+markers', marker=dict(color=df['label'], colorscale='RdBu', showscale=True, colorbar=dict(title="calmer label"), size=6), name="data",))
fig2.add_trace(go.Scatter(y=df['ref']*100-10, mode='lines+markers', marker=dict(color=df['uniqueness'], colorscale='Plasma'), name="data"))
fig2.show()

from scipy.stats import gaussian_kde
x = df['label']
kde = gaussian_kde(x)
x_vals = np.linspace(x.min(), x.max(), 1000)
y_vals = kde(x_vals)
fig3 = go.Figure()
fig3.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='lines', name='KDE'))
fig3.update_layout(title='KDE of label', xaxis_title='Label', yaxis_title='Density')
fig3.show()


3.74e-05


1440it [00:03, 390.50it/s]
1440it [00:03, 440.24it/s]


In [None]:
# Alternative Fine-grained Trade Labels to Triple-Barrier-Method

"""
TIMESERIES TREND/BREAKOUT Label
    
this label should be "Localized Sharpe/Calmar Ratio"
high-localized sharpe = high PnL opportunities:
    1. price consolidates, test support multiple times, we go long at support level
    2. price rise quickly, very high momentum, we go long to do mid-price chasing, set SL-TP narrowly(hence being safe)
    
how "localized" should we be?
    1. too short, your ML model would go long every time it sees a giant positive bar, which would not work
       (either too little info to predict the giant bar, or not much profit left after it)
    2. if not too short? how long(x-direction)? and how to define the returns/volatility(drawdown)(y-direction)?
        1. it would require something like the famous "Triple-Barrier-Method" in ML4F textbook
        2. because trend strategies usually apply for relatively narrow SL-TP(contrary to mean-reversion that has wide SL-TP),
           by using SL-TP, we can determine the future return direction, return magnitude(when SL-TP is triggered), thus the drawdown magnitude
    3. we use adaptive super-trend as SL-TP metric, this is a relative stable method insensitive to hyperparameter tuning
    
also this engineered label should be continuous etc. thus ICIR and other factor analysis tools could work nicely, and model can learn relatively easily
    
remember the last 3 switches (a, b, c) and calculate returns/drawdowns
based on profitability of initial direction:
- For point A between a and b:
    1. going along a's direction, possibly ends with profit at price_B
    2. going against a's direction, possibly ends with profit at price_C
    
    
TIMESERIES MEAN-REVERSION Label
Background: in efficient market, ATR is usually bound by fees (because of mean-reversal strategies like grid trading)
    It is by nature the counterpart for trend strategies, terminating premature breakouts, leaving fake-outs for breakout traders
    If the price can't even effectively break ATR, then it is not worth trading as trend/breakouts
Consider upper/lower band around (High+Low)/2 by +-EMA_rolling(ATR, n)
for point a in ATR range:
    if the next crossover b happens at upper ATR band, and denote the 2nd crossover back to ATR band as point c
        then the point is a presumable long trade
        low = min(ohlc, a->b)
        high = max(ohlc, b->c)
        label = signed local sharpe = + abs(delta(high)/delta(low))
for point a out of ATR range:
    if the next crossover b happens at upper ATR band, and denote the 2nd crossover back to ATR band as point c
        then the point is a presumable long trade
        low = min(ohlc, a->b)
        high = max(ohlc, b->c)
        label = signed local sharpe = + abs(delta(high)/delta(low))
"""

In [3]:
# for general Trend/Directional labels (probably for longer holding periods trades):
# Trend Scan Method

import pandas as pd
import numpy as np
import statsmodels.api as sm
import plotly.graph_objects as go
from scipy.stats import t

# Function to compute the t-value of the linear trend
def tValLinR(close):
    """
    Compute the t-value for the slope of a linear trend.

    Parameters:
    - close: Array-like, the sequence of observations.

    Returns:
    - float: The t-value of the trend slope.
    """
    x = np.ones((close.shape[0], 2))
    x[:, 1] = np.arange(close.shape[0])
    ols = sm.OLS(close, x).fit()
    return ols.tvalues[1]

# Modified function to get continuous labels from the trend-scanning method
def getBinsFromTrend(molecule, close, span):
    """
    Derive continuous regression labels between -1 and 1 from the t-value of linear trend.

    Parameters:
    - molecule: Index of observations to label (e.g., timestamps).
    - close: pandas Series, time series of prices with datetime index.
    - span: Tuple, (start, end) range of look-forward periods to evaluate.

    Returns:
    - pandas DataFrame with columns 't1' (end time), 'tVal' (t-value), 'label' (continuous label).
    """
    out = pd.DataFrame(index=molecule, columns=['t1', 'tVal', 'label'])
    hrzns = range(*span)
    for dt0 in molecule:
        iloc0 = close.index.get_loc(dt0)
        if iloc0 + max(hrzns) > close.shape[0]:
            continue
        # Compute t-values for each horizon
        df0 = pd.Series(index=hrzns)
        for hrzn in hrzns:
            dt1 = close.index[iloc0 + hrzn - 1]
            df1 = close.loc[dt0:dt1]
            df0[hrzn] = tValLinR(df1.values)
        df0 = df0.replace([-np.inf, np.inf, np.nan], 0)
        # Select horizon with maximum absolute t-value
        hrzn_selected = df0.abs().idxmax()
        t_val = df0[hrzn_selected]
        dt1 = close.index[iloc0 + hrzn_selected - 1]
        # Compute continuous label
        n = int(hrzn_selected)  # Number of points in the selected window
        df = n - 2  # Degrees of freedom
        if df > 0:  # Ensure valid degrees of freedom
            label = 2 * t.cdf(t_val, df=df) - 1
        else:
            label = 0  # Insufficient points for a trend
        out.loc[dt0, 't1'] = dt1
        out.loc[dt0, 'tVal'] = t_val
        out.loc[dt0, 'label'] = label
    out['t1'] = pd.to_datetime(out['t1'])
    out['label'] = pd.to_numeric(out['label'])
    return out.dropna(subset=['label'])

# Load and prepare data
vrun_bar = pd.read_parquet(os.path.join(os.getcwd(), 'volume_run_bar.parquet'))
df = vrun_bar[-(60*24*7):].copy()
df['pos'] = range(len(df))  # Create a continuous integer position
pos_map = dict(zip(df.index, df['pos']))  # Map from original index to position

# Set molecule to the index of df
molecule = df.index

# Define the range of look-forward periods
span = (3, 10)

# Get the labels using the modified trend-scanning method
labels = getBinsFromTrend(molecule, df['close'], span)

# Since labels are continuous, adjust visualization (e.g., use a color gradient)
fig = go.Figure()

# Plot points with a continuous color scale
fig.add_trace(go.Scatter(
    x=[pos_map[i] for i in labels.index],
    y=df.loc[labels.index, 'close'],
    mode='markers',
    marker=dict(
        size=8,
        color=labels['label'],
        colorscale='RdYlGn',  # Red (-1) to Green (1) through Yellow (0)
        showscale=True,
        colorbar=dict(title='Trend Label'),
    ),
    name='Trend Labels',
    opacity=0.7
))

# Plot unlabeled points
unlabeled_idx = df.index.difference(labels.index)
if not unlabeled_idx.empty:
    fig.add_trace(go.Scatter(
        x=[pos_map[i] for i in unlabeled_idx],
        y=df.loc[unlabeled_idx, 'close'],
        mode='markers',
        marker=dict(color='black'),
        name='No Label',
        opacity=0.7
    ))

# Customize layout
fig.update_layout(
    title='Close Prices with Continuous Trend Labels',
    xaxis_title='Position',
    yaxis_title='Close Price',
    legend=dict(title='Trend'),
    height=600,
    width=1000
)

# Show plot
fig.show()

In [None]:
# Other HMM variants:
# Higher order HMM
# Semi HSMM

from scipy.stats import poisson, multivariate_normal
import os
import sys
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from hmmlearn import hmm
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_rows', None)

# Load data
raw_bar = pd.read_parquet(os.path.join(os.getcwd(), 'history.parquet'))
vrun_bar = pd.read_parquet(os.path.join(os.getcwd(), 'volume_run_bar.parquet'))

# Prepare features
df = vrun_bar[-(60*24*7):].copy()
periods = [1, 2, 4, 6, 8, 10]
for period in periods:
    df[f'pct_fwd_{period}'] = np.log1p(df["close"].pct_change(periods=-period).fillna(0))
    df[f'pct_bak_{period}'] = np.log1p(df["close"].pct_change(periods=period).fillna(0))

for i, period in enumerate(periods):
    df[f'pct_cum_fwd_{period}'] = sum(df[f'pct_fwd_{p}'] for p in periods[:(i+1)])
    df[f'pct_cum_bak_{period}'] = sum(df[f'pct_bak_{p}'] for p in periods[:(i+1)])
    df[f'f_p_{period}'] = df[f'pct_cum_fwd_{period}'].apply(lambda x: x if x > 0 else 0)
    df[f'f_n_{period}'] = df[f'pct_cum_fwd_{period}'].apply(lambda x: x if x <= 0 else 0)
X = df[[f'f_p_{p}' for p in periods]+[f'f_n_{p}' for p in periods]]

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


def HHMM():
    # Higher-order HMM parameters
    order = 2  # Set the desired order here
    n_states = 3  # Original number of states
    new_n_components = n_states ** order  # Expanded state space

    # Create constrained transition matrix
    transmat = np.zeros((new_n_components, new_n_components))
    for i in range(new_n_components):
        prev_state = i // n_states
        current_state = i % n_states
        next_indices = [current_state * n_states + next_state for next_state in range(n_states)]
        transmat[i, next_indices] = 1.0 / len(next_indices)

    # Initialize model with custom transitions
    model = hmm.GaussianHMM(
        n_components=new_n_components,
        covariance_type="full",
        n_iter=30,
        verbose=True,
        random_state=0,
        init_params='se'  # Initialize startprob and emissions only
    )
    model.transmat_ = transmat  # Set our custom transition matrix

    # Fit model
    model.fit(X_scaled)

    # Predict and decode states
    hidden_states = model.predict(X_scaled)
    current_states = hidden_states % n_states  # Extract current state from expanded state
    df["hmm_state"] = current_states


class PoissonDuration:
    def __init__(self, alpha, beta, max_duration):
        # Poisson with mean = alpha/beta truncated at max_duration
        self.alpha = alpha
        self.beta = beta
        self.max_duration = max_duration
        # precompute probabilities
        lam = alpha / beta
        probs = poisson.pmf(np.arange(1, max_duration+1), lam)
        self.probs = probs / probs.sum()

    def pmf(self, d):
        if 1 <= d <= self.max_duration:
            return self.probs[d-1]
        return 0.0

    def update(self, durations, weights):
        # durations: array of sampled durations, weights: their expected counts
        # simple moment-matching update
        mean_d = np.sum(durations * weights) / np.sum(weights)
        lam = mean_d
        # set new alpha/beta such that alpha/beta = lam; keep beta = 1
        self.alpha = lam
        self.beta = 1
        probs = poisson.pmf(np.arange(1, self.max_duration+1), lam)
        self.probs = probs / probs.sum()


class HSMM:
    def __init__(self, n_states, obs_dim, max_duration):
        self.K = n_states
        self.D = obs_dim
        self.max_duration = max_duration
        # initialize parameters
        self.A = np.full((self.K, self.K), 1/self.K)
        self.pi = np.full(self.K, 1/self.K)
        self.mu = np.random.randn(self.K, self.D)
        self.Sigma = np.array([np.eye(self.D) for _ in range(self.K)])
        self.durations = [PoissonDuration(alpha=10, beta=1, max_duration=max_duration)
                          for _ in range(n_states)]

    def _compute_obs_likelihood(self, X):
        T = X.shape[0]
        B = np.zeros((self.K, T))
        for k in range(self.K):
            rv = multivariate_normal(mean=self.mu[k], cov=self.Sigma[k])
            B[k, :] = rv.pdf(X)
        return B

    def _forward(self, B):
        T = B.shape[1]
        # alpha: shape (K, T)
        alpha = np.zeros((self.K, T))
        for k in range(self.K):
            for d in range(1, min(T, self.max_duration)+1):
                alpha[k, d-1] += self.pi[k] * self.durations[k].pmf(d) * np.prod(B[k, :d])
        for t in range(1, T):
            for j in range(self.K):
                val = 0.0
                for i in range(self.K):
                    for d in range(1, min(self.max_duration, t)+1):
                        val += alpha[i, t-d] * self.A[i, j] * self.durations[j].pmf(d) * np.prod(B[j, t-d+1:t+1])
                alpha[j, t] = val
        return alpha

    def fit(self, X, num_iters=10):
        T = X.shape[0]
        B = self._compute_obs_likelihood(X)
        for iteration in range(num_iters):
            # E-step
            alpha = self._forward(B)
            # (Backward and computation of gamma, xi, etc. would go here)
            # M-step (placeholder): update transitions and emissions empirically
            # For brevity, we skip full implementation details.
            # A more complete implementation would compute expected counts and update:
            # self.A, self.pi, self.mu, self.Sigma, and self.durations
            print(f"Iteration {iteration+1}/{num_iters} (E-step and M-step placeholder)")
        print("Fit complete.")

    def most_likely_states(self, X):
        # Use simple Viterbi-like decoding ignoring durations (for brevity)
        T = X.shape[0]
        delta = np.zeros((self.K, T))
        psi = np.zeros((self.K, T), dtype=int)
        B = self._compute_obs_likelihood(X)
        delta[:, 0] = np.log(self.pi) + np.log(B[:, 0])
        for t in range(1, T):
            for j in range(self.K):
                seq_probs = delta[:, t-1] + np.log(self.A[:, j])
                psi[j, t] = np.argmax(seq_probs)
                delta[j, t] = np.max(seq_probs) + np.log(B[j, t])
        states = np.zeros(T, dtype=int)
        states[-1] = np.argmax(delta[:, -1])
        for t in range(T-2, -1, -1):
            states[t] = psi[states[t+1], t+1]
        return states


def hsmm():
    # X_scaled: your scaled data, shape (T, D)
    # Define periods and df externally
    hsmm = HSMM(n_states=2, obs_dim=X_scaled.shape[1], max_duration=10)
    hsmm.fit(X_scaled, num_iters=30)
    labels = hsmm.most_likely_states(X_scaled)
    df['label'] = labels

         1  -44241.13941906             +nan
         2    9608.69771630  +53849.83713536
         3   52926.28970205  +43317.59198576
         4   69803.89271136  +16877.60300931
         5   80594.81389702  +10790.92118566
         6   83711.92246035   +3117.10856333
         7   86630.50818776   +2918.58572741
         8   90455.30498917   +3824.79680141
         9  123476.43321124  +33021.12822207
        10  108784.82433439  -14691.60887686
Model is not converging.  Current: 108784.82433438509 is not greater than 123476.43321124124. Delta is -14691.608876856146
