In [None]:
# regime_inference_array.py

import numpy as np
import pandas as pd

def _ols_slope(y: np.ndarray) -> float:
    """Ordinary‐least‐squares slope of a 1D series y."""
    t = np.arange(len(y))
    X = np.vstack([t, np.ones_like(t)]).T
    m, _ = np.linalg.lstsq(X, y, rcond=None)[0]
    return m

def _slope_vol_reg(close: np.ndarray,
                   idx:   int,
                   slope_win: int = 30,
                   vol_win:   int = 100
                  ) -> float | int:
    """
    Slope‐and‐volatility regime:
      2 if (slope > 0 and vol < rolling_median(vol)), else 0.
    """
    logp = np.log(close)

    # 1) rolling slope
    slope_series = (
        pd.Series(logp)
          .rolling(slope_win, min_periods=slope_win)
          .apply(_ols_slope, raw=True)
    )
    # 2) rolling volatility (std of log‐returns)
    rtn        = pd.Series(logp).diff()
    vol_series = rtn.rolling(vol_win, min_periods=1).std()

    slope = slope_series.iloc[idx]
    vol   = vol_series.iloc[idx]
    if np.isnan(slope) or np.isnan(vol):
        return np.nan

    # 3) causal median of vol up to idx
    median_vol = vol_series.iloc[: idx + 1].median()

    return 2 if (slope > 0 and vol < median_vol) else 0

def compute_regime_features_window(prices_window: np.ndarray) -> np.ndarray:
    """
    Compute 9 regime indicators for each instrument, given a
    (n_inst, win_len) array of closing prices.
    Returns shape (n_inst, 9) in the order:
      [ma, ema, slope_vol, macd, kalman, fib, psar, zscore, wret]
    """
    n_inst, win_len = prices_window.shape
    idx = win_len - 1

    out = np.full((n_inst, 9), np.nan)
    # weights for weighted-return
    sqrt_weights = np.arange(1, 46, dtype=float) ** 0.5
    sqrt_weights /= sqrt_weights.sum()

    for i in range(n_inst):
        close = prices_window[i]
        logp  = np.log(close)

        # 1) MA regime
        ma_s = pd.Series(logp).rolling(5).mean().iloc[idx]
        ma_l = pd.Series(logp).rolling(70).mean().iloc[idx]
        ma_reg = 0 if ma_l > ma_s else 2

        # 2) EMA regime
        ema_s = pd.Series(logp).ewm(span=5,  adjust=False).mean().iloc[idx]
        ema_l = pd.Series(logp).ewm(span=50, adjust=False).mean().iloc[idx]
        ema_reg = 2 if ema_s > ema_l else 0

        # 3) Slope/Vol regime
        sv_reg = _slope_vol_reg(close, idx)

        # 4) MACD regime
        macd_line   = pd.Series(logp).ewm(50, adjust=False).mean() \
                    - pd.Series(logp).ewm(90, adjust=False).mean()
        signal_line = macd_line.ewm(span=40, adjust=False).mean()
        macd_reg    = 2 if macd_line.iloc[idx] > signal_line.iloc[idx] else 0

        # 5) Kalman-trend regime
        proc_var, meas_var = 0.01, 10.0
        x_est = np.zeros(win_len); P = np.zeros(win_len)
        x_est[0], P[0] = logp[0], 1.0
        for t in range(1, win_len):
            P_pred = P[t-1] + proc_var
            K      = P_pred / (P_pred + meas_var)
            x_est[t] = x_est[t-1] + K*(logp[t] - x_est[t-1])
            P[t]     = (1-K)*P_pred
        kalman_reg = 2 if logp[idx] > x_est[idx] else 0

        # 6) Fibonacci regime
        if idx >= 50:
            win50 = close[idx-49:idx+1]
            hi, lo = win50.max(), win50.min()
            rng = hi - lo
            upper, lower = lo + 0.786*rng, lo + 0.618*rng
            fib_reg = 2 if close[idx] > upper else 0 if close[idx] < lower else 1
        else:
            fib_reg = np.nan

        # 7) PSAR regime
        psar = np.empty(win_len)
        trend_up, af, max_af = True, 0.01, 0.10
        ep = close[0]; psar[0] = close[0]
        for t in range(1, win_len):
            psar[t] = psar[t-1] + af*(ep - psar[t-1])
            if trend_up:
                if close[t] < psar[t]:
                    trend_up, psar[t], ep, af = False, ep, close[t], 0.01
                elif close[t] > ep:
                    ep, af = close[t], min(af+0.01, max_af)
            else:
                if close[t] > psar[t]:
                    trend_up, psar[t], ep, af = True, ep, close[t], 0.01
                elif close[t] < ep:
                    ep, af = close[t], min(af+0.01, max_af)
        psar_reg = 2 if close[idx] > psar[idx] else 0

        # 8) Z‐score regime
        ma90 = pd.Series(close).rolling(90).mean().iloc[idx]
        sd90 = pd.Series(close).rolling(90).std().iloc[idx]
        if np.isnan(ma90) or np.isnan(sd90):
            zscore_reg = np.nan
        else:
            z = (close[idx] - ma90)/sd90
            zscore_reg = 2 if z > 0.5 else 0 if z < -0.5 else 1

        # 9) Weighted‐return regime
        if idx >= 45:
            r = pd.Series(close).pct_change().iloc[idx-44:idx+1].values
            wr = np.dot(r, sqrt_weights)
            wret_reg = 2 if wr > 0 else 0 if wr < 0 else 1
        else:
            wret_reg = np.nan

        out[i] = [
            ma_reg, ema_reg, sv_reg, macd_reg, kalman_reg,
            fib_reg, psar_reg, zscore_reg, wret_reg,
        ]

    return out

def infer_from_array(prices: np.ndarray,
                     timestep: int,
                     win_len:   int = 100) -> np.ndarray:
    """
    Inference entry‐point for an in‐memory price array.

    Parameters
    ----------
    prices : np.ndarray
        shape (n_inst, T)
    timestep : int
        must satisfy win_len-1 <= timestep < T
    win_len : int
        how many bars to include in the rolling window

    Returns
    -------
    np.ndarray of shape (n_inst, 9)
    """
    n_inst, T = prices.shape
    if not (0 <= timestep < T):
        raise ValueError(f"timestep {timestep} out of range (0 … {T-1})")
    if timestep < win_len - 1:
        raise ValueError(f"need at least {win_len} bars, got only {timestep+1}")

    # slice latest `win_len` bars for each instrument
    window = prices[:, (timestep - win_len + 1):(timestep + 1)]
    return compute_regime_features_window(window)


In [None]:
# infer_stream_from_array_with_progress.py

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# ─── YOUR SAVED Bi-LSTM MODEL CLASS & LOADING ───────────────────────────────
class RegimeBiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = input_size,
            hidden_size  = hidden_size,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = dropout,
            bidirectional= True
        )
        self.fc = nn.Linear(hidden_size*2, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out    = out[:, -1, :]
        return self.fc(out)


# ─── REGIME FEATURES HELPER ─────────────────────────────────────────────────
import pandas as _pd
def infer_from_array(prices: np.ndarray,
                     timestep: int,
                     win_len:   int = 100) -> np.ndarray:
    n_inst, T = prices.shape
    if timestep < win_len - 1:
        raise ValueError(f"need at least {win_len} bars, got {timestep+1}")
    window = prices[:, (timestep-win_len+1):(timestep+1)]
    # … [same feature‐calc code as before] …
    # For brevity here assume compute_regime_features_window(window) is defined
    return compute_regime_features_window(window)


# ─── CONFIGURATION ─────────────────────────────────────────────────────────
PRICE_FILE = "prices.txt"
MODEL_PATH = "bilstm_self2.pth"
START      = 120
END        = 750
SEQ_LEN    = 20
DEVICE     = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ─── 1) LOAD MODEL ──────────────────────────────────────────────────────────
model = RegimeBiLSTM(9, 64, 2, 3, dropout=0.2).to(DEVICE)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.eval()

# ─── 2) LOAD PRICES ONCE ────────────────────────────────────────────────────
prices_raw = pd.read_csv(PRICE_FILE, sep=r"\s+", header=None).values.T
n_inst, T  = prices_raw.shape

# ─── 3) STREAMING INFERENCE WITH PROGRESS ────────────────────────────────────
cache       = []
predictions = {i: [] for i in range(n_inst)}
total_steps = END - START + 1

with torch.no_grad():
    for step, t in enumerate(range(START, END+1), 1):
        # 3.1) features at t
        feats = infer_from_array(prices_raw, t, win_len=100)  # (n_inst,9)

        # 3.2) update cache
        cache.append(feats)
        if len(cache) < SEQ_LEN:
            print(f"[{step}/{total_steps}] warming cache ({len(cache)}/{SEQ_LEN})", end="\r")
            continue
        if len(cache) > SEQ_LEN:
            cache.pop(0)

        # 3.3) build input & inference
        stacked = np.stack(cache, axis=0)                # (SEQ_LEN,n_inst,9)
        seqs    = np.transpose(stacked, (1,0,2))         # (n_inst,SEQ_LEN,9)
        Xb      = torch.from_numpy(seqs).float().to(DEVICE)
        logits  = model(Xb)                              # (n_inst,3)
        preds   = logits.argmax(dim=1).cpu().numpy()     # (n_inst,)

        # 3.4) store & print
        for inst in range(n_inst):
            predictions[inst].append((t, int(preds[inst])))
        print(f"[{step}/{total_steps}] inferred t={t}", end="\r")

    print("\nInference complete!\n")


def get_segments(label_seq):
    """Turn 1D label array into runs: [(start,end,label),…]."""
    changes = np.flatnonzero(label_seq[:-1] != label_seq[1:])
    starts  = np.concatenate(([0], changes+1))
    ends    = np.concatenate((changes, [len(label_seq)-1]))
    return list(zip(starts, ends, label_seq[starts]))

# ─── 4) SMOOTH & PLOT ───────────────────────────────────────────────────────
def smooth_min_run(raw, L=4):
    ch = np.flatnonzero(raw[:-1]!=raw[1:])
    st = np.concatenate(([0], ch+1))
    en = np.concatenate((ch, [len(raw)-1]))
    sm = np.empty_like(raw)
    curr = raw[0]; sm[:en[0]+1] = curr
    for s,e in zip(st[1:], en[1:]):
        lbl, ln = raw[s], e-s+1
        if lbl!=curr and ln>=L: curr=lbl
        sm[s:e+1] = curr
    return sm

true_cmap = ListedColormap(["#ffcccc","#f0f0f0","#ccffcc"])
pred_cmap = ListedColormap(["#ff6666","#b0b0b0","#66cc66"])

for inst in range(n_inst):
    times, raw = zip(*predictions[inst])
    times, raw = np.array(times), np.array(raw)
    labs       = smooth_min_run(raw, L=4)
    price      = prices_raw[inst, times]
    x          = np.arange(len(times))

    print(f"Plotting instrument {inst+1}/{n_inst}…")
    fig, ax = plt.subplots(figsize=(12,4))

    # ---- replace the broken comprehension with this ----
    segs = get_segments(labs)
    for s, e, lbl in segs:
        color = pred_cmap.colors[lbl]
        ax.axvspan(x[s], x[e], color=color, alpha=0.3, linewidth=0)

    ax.plot(x, price, "k-", label="Price")
    ax.set_title(f"Instrument {inst:02d} — Smoothed preds {START}→{END}")
    ax.set_xlabel("Index in window")
    ax.set_ylabel("Price")
    ax.legend()
    plt.tight_layout()
    plt.show()

