In [None]:
# regime_inference.py
import numpy as np
import pandas as pd

# ───────────────────────── helpers ───────────────────────────────────────
def _ols_slope(y: np.ndarray) -> float:
    t = np.arange(len(y))
    X = np.vstack([t, np.ones_like(t)]).T
    m, _ = np.linalg.lstsq(X, y, rcond=None)[0]
    return m


def _slope_vol_reg(close: np.ndarray,
                   idx: int,
                   slope_win: int = 30,
                   vol_win: int = 100) -> float | int:
    logp = np.log(close)

    slope_series = (
        pd.Series(logp)
          .rolling(slope_win, min_periods=slope_win)
          .apply(lambda arr: _ols_slope(arr), raw=True)
    )
    rtn = pd.Series(logp).diff()
    vol_series = rtn.rolling(vol_win, min_periods=vol_win).std()

    slope = slope_series.iloc[idx]
    vol   = vol_series.iloc[idx]

    if np.isnan(slope) or np.isnan(vol):
        return np.nan

    median_vol = vol_series.iloc[: idx + 1].median()
    return 2 if (slope > 0 and vol < median_vol) else 0


# ────────────────────── pipeline (no drop_last) ──────────────────────────
def compute_regime_features_window(prices_window: np.ndarray) -> np.ndarray:
    """
    Parameters
    ----------
    prices_window : np.ndarray
        Shape (50, 100).  Each row is one instrument’s 100-bar history
        ending at the timestep for which we want predictions.

    Returns
    -------
    np.ndarray
        Shape (50, 9).  Columns in training order:
        [ma, ema, slope_vol, macd, kalman, fib, psar, zscore, wret]
    """
    n_inst, win_len = prices_window.shape
    idx = win_len - 1                     # evaluate at the latest bar

    out = np.full((n_inst, 9), np.nan)
    sqrt_weights = np.arange(1, 46, dtype=float) ** 0.5
    sqrt_weights /= sqrt_weights.sum()

    for i in range(n_inst):
        close = prices_window[i]
        logp  = np.log(close)

        # MA regime
        ma_s = pd.Series(logp).rolling(5).mean().iloc[idx]
        ma_l = pd.Series(logp).rolling(70).mean().iloc[idx]
        ma_reg = 0 if ma_l > ma_s else 2

        # EMA regime
        ema_s = pd.Series(logp).ewm(span=5,  adjust=False).mean().iloc[idx]
        ema_l = pd.Series(logp).ewm(span=50, adjust=False).mean().iloc[idx]
        ema_reg = 2 if ema_s > ema_l else 0

        # Slope/Vol regime
        sv_reg = _slope_vol_reg(close, idx)

        # MACD regime
        macd_line = (
            pd.Series(logp).ewm(50, adjust=False).mean()
            - pd.Series(logp).ewm(90, adjust=False).mean()
        )
        signal_line = macd_line.ewm(span=40, adjust=False).mean()
        macd_reg = 2 if macd_line.iloc[idx] > signal_line.iloc[idx] else 0

        # Kalman trend regime
        proc_var, meas_var = 0.01, 10.0
        x_est = np.zeros(win_len)
        P     = np.zeros(win_len)
        x_est[0], P[0] = logp[0], 1.0
        for t in range(1, win_len):
            x_pred = x_est[t - 1]
            P_pred = P[t - 1] + proc_var
            K      = P_pred / (P_pred + meas_var)
            x_est[t] = x_pred + K * (logp[t] - x_pred)
            P[t]     = (1 - K) * P_pred
        kalman_reg = 2 if logp[idx] > x_est[idx] else 0

        # Fibonacci regime
        if idx >= 50:
            win50 = close[idx - 49 : idx + 1]
            hi, lo = win50.max(), win50.min()
            rng = hi - lo
            upper, lower = lo + 0.786 * rng, lo + 0.618 * rng
            fib_reg = 2 if close[idx] > upper else 0 if close[idx] < lower else 1
        else:
            fib_reg = np.nan

        # PSAR regime
        psar = np.empty(win_len)
        trend_up, af, max_af = True, 0.01, 0.10
        ep = close[0]
        psar[0] = close[0]
        for t in range(1, win_len):
            psar[t] = psar[t - 1] + af * (ep - psar[t - 1])
            if trend_up:
                if close[t] < psar[t]:
                    trend_up, psar[t], ep, af = False, ep, close[t], 0.01
                elif close[t] > ep:
                    ep, af = close[t], min(af + 0.01, max_af)
            else:
                if close[t] > psar[t]:
                    trend_up, psar[t], ep, af = True, ep, close[t], 0.01
                elif close[t] < ep:
                    ep, af = close[t], min(af + 0.01, max_af)
        psar_reg = 2 if close[idx] > psar[idx] else 0

        # Z-score regime
        ma90 = pd.Series(close).rolling(90).mean().iloc[idx]
        sd90 = pd.Series(close).rolling(90).std().iloc[idx]
        if np.isnan(ma90) or np.isnan(sd90):
            zscore_reg = np.nan
        else:
            z = (close[idx] - ma90) / sd90
            zscore_reg = 2 if z > 0.5 else 0 if z < -0.5 else 1

        # Weighted-return regime
        if idx >= 45:
            r = pd.Series(close).pct_change().iloc[idx - 44 : idx + 1].values
            wr = np.dot(r, sqrt_weights)
            wret_reg = 2 if wr > 0 else 0 if wr < 0 else 1
        else:
            wret_reg = np.nan

        out[i] = [
            ma_reg, ema_reg, sv_reg, macd_reg, kalman_reg,
            fib_reg, psar_reg, zscore_reg, wret_reg,
        ]

    return out


# ──────────────────── I/O wrappers for prices.txt ───────────────────────
def _extract_window(price_file: str,
                    timestep: int,
                    win_len: int = 100) -> np.ndarray:
    """
    Slice the latest `win_len` bars (inclusive) ending at `timestep` from the
    price file and transpose to (n_inst, win_len).
    """
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    n_rows, n_inst = df.shape

    if not (0 <= timestep < n_rows):
        raise ValueError(f"timestep {timestep} out of range (0 … {n_rows-1})")
    if timestep < win_len - 1:
        raise ValueError("Not enough history to build a 100-bar window.")

    slice_df = df.iloc[timestep - win_len + 1 : timestep + 1, :]
    return slice_df.to_numpy().T            # (n_inst, win_len)


def infer_from_file(price_file: str,
                    timestep: int) -> np.ndarray:
    """
    High-level convenience wrapper:
    1. read prices.txt
    2. build the (50,100) window ending at `timestep`
    3. run the regime-feature pipeline
    """
    window = _extract_window(price_file, timestep, win_len=102)
    return compute_regime_features_window(window)


In [None]:

features_t451 = infer_from_file("prices.txt", timestep=101)
# features_t451.shape  ->  (50, 9)
print(features_t451)  # Example output for the first instrument


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ── Helpers ──────────────────────────────────────────────────────────────
def get_segments(reg):
    """Convert a 1D array of regimes into list of (start, end, label)."""
    changes = np.flatnonzero(reg[:-1] != reg[1:])
    starts  = np.concatenate(([0], changes+1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

# ── 1) Load raw prices ────────────────────────────────────────────────────
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)
T        = len(price_df)
n_inst   = price_df.shape[1]

# ── 2) Allocate prediction buffer ─────────────────────────────────────────
preds_all = np.zeros((n_inst, T), dtype=int)

# ── 3) Inference loop ─────────────────────────────────────────────────────
#    assume your feature-window length is WIN_LEN (e.g. 102)
WIN_LEN = 102

for t in range(WIN_LEN, T+1):
    # infer_from_file returns shape (n_inst, n_feats)
    feats_t   = infer_from_file("prices.txt", timestep=t)  
    # choose your classifier:
    #   if you're using a sklearn lr:
    #       regs_t = lr.predict(feats_t)
    #   or if your torch LSTM:
    #       with torch.no_grad():
    #           logits = model(torch.tensor(feats_t[None,:,:], dtype=torch.float32).to(device))
    #           regs_t = logits.argmax(-1).cpu().numpy()[0]
    regs_t    = lr.predict(feats_t)  
    preds_all[:, t-1] = regs_t

# ── 4) Plot price + regime shading for each instrument ────────────────────
true_cmap = ListedColormap(["#ffcccc","#ccffcc"])  # bear, bull
for inst in range(n_inst):
    price = price_df.iloc[:,inst].values
    reg   = preds_all[inst]

    fig, ax = plt.subplots(1,1, figsize=(12,3))
    for s,e,lbl in get_segments(reg):
        color = true_cmap(lbl//2)  # assuming {0→bear,2→bull}
        ax.axvspan(s, e, color=color, alpha=0.4, linewidth=0)

    ax.plot(price, 'k-', lw=1)
    ax.set_title(f"Instrument {inst:02d}")
    ax.set_xlim(0, T)
    ax.set_ylabel("Price")
    ax.set_xlabel("Timestep")
    plt.tight_layout()
    plt.show()
