In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.tree import DecisionTreeClassifier

# ─── 1) Load & clean your features ────────────────────────────────────────────
df = pd.read_csv("features_all_models4.csv")
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])   # drop first 100 warm-ups per inst
    .reset_index(drop=True)
)
X_all = df.drop(["inst","time","true_regime"], axis=1)
y_all = df["true_regime"]

# ─── 2) Train one DecisionTree on everything ─────────────────────────────────
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_all, y_all)

# ─── 3) Load raw prices ───────────────────────────────────────────────────────
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)

# ─── 4) Helper to extract contiguous segments ─────────────────────────────────
def get_segments(reg):
    """
    Given a 1D array of regime labels, return a list of (start, end, label)
    where each segment is contiguous and label ∈ {0,1,2}.
    """
    changes = np.flatnonzero(reg[1:] != reg[:-1])
    starts  = np.concatenate(([0], changes + 1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

# ─── 5) Prepare the 10×5 grid ────────────────────────────────────────────────
n_rows, n_cols = 10, 5
fig, axes     = plt.subplots(n_rows, n_cols, figsize=(20, 40), sharex=False)
axes = axes.flatten()

# colormaps for true vs. predicted
true_cmap = ListedColormap(["#ffcccc","#f0f0f0","#ccffcc"])
pred_cmap = ListedColormap(["#ff6666","#b0b0b0","#66cc66"])

for inst in range(50):
    ax = axes[inst]
    # slice out inst i
    mask   = (df["inst"] == inst)
    X_i    = X_all[mask]
    true_i = y_all[mask].to_numpy()
    pred_i = clf.predict(X_i)
    T_i    = len(true_i)

    # price series (dropping same 100 warmups)
    price_i = price_df.iloc[100:100+T_i, inst].values

    # 5a) plot true-regime background
    for s, e, lbl in get_segments(true_i):
        ax.axvspan(s, e, color=true_cmap(lbl), alpha=0.3, linewidth=0)

    # 5b) plot predicted-regime overlay
    for s, e, lbl in get_segments(pred_i):
        ax.axvspan(s, e, color=pred_cmap(lbl), alpha=0.2, linewidth=0)

    # 5c) plot the price on top
    ax.plot(price_i, color="black", linewidth=1, label="Price")
    ax.set_title(f"Inst {inst}")
    ax.set_xlim(0, T_i)
    ax.set_ylabel("Price")

    # only show legend on the very first plot
    if inst == 0:
        ax.legend(loc="upper right")

# clean up any empty subplots (in case 50 < n_rows*n_cols)
for j in range(50, n_rows*n_cols):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()


In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.tree import DecisionTreeClassifier

# ── 1) Load & clean your features ─────────────────────────────────────────────
df = pd.read_csv("features_all_models4.csv")
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])   # drop first 100 warm-ups
    .reset_index(drop=True)
)
X_all = df.drop(["inst","time","true_regime"], axis=1)
y_all = df["true_regime"]

# ── 2) Train on first 500 samples ─────────────────────────────────────────────
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_all.iloc[:500], y_all.iloc[:500])
print(f"▶️ Trained on first 500 rows (of {len(y_all)})\n")

# ── 3) Load raw prices ────────────────────────────────────────────────────────
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)

# ── 4) Helper to find contiguous segments ─────────────────────────────────────
def get_segments(reg):
    changes = np.flatnonzero(reg[1:] != reg[:-1])
    starts  = np.concatenate(([0], changes + 1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

# vivid colour palettes
true_colors = ['#ff0000', '#808080', '#00ff00']  # red, grey, green
pred_colors = ['#cc0000', '#444444', '#00cc00']  # darker red, dark grey, dark green

# ── 5) Plot per instrument: separate “True” vs “Predicted” panels ────────────
for inst in range(50):
    mask   = (df["inst"] == inst)
    X_i    = X_all[mask]
    true_i = y_all[mask].to_numpy()
    pred_i = clf.predict(X_i)
    T_i    = len(true_i)

    price_i = price_df.iloc[100:100+T_i, inst].values

    fig, (ax_true, ax_pred) = plt.subplots(
        2, 1, sharex=True,
        figsize=(12, 6),
        gridspec_kw={'height_ratios':[1,1]}
    )

    # ─ True regimes panel ─────────────────────────────────────────────────────
    for s, e, lbl in get_segments(true_i):
        ax_true.axvspan(s, e, color=true_colors[lbl], alpha=0.5, linewidth=0)
    ax_true.plot(price_i, color="black", linewidth=1.5, label="Price")
    ax_true.set_title(f"Instrument {inst} — True Regimes")
    ax_true.set_ylabel("Price")
    ax_true.legend(loc="upper right")

    # ─ Predicted regimes panel ─────────────────────────────────────────────────
    for s, e, lbl in get_segments(pred_i):
        ax_pred.axvspan(s, e, color=pred_colors[lbl], alpha=0.5, linewidth=0)
    ax_pred.plot(price_i, color="black", linewidth=1.5, label="Price")
    ax_pred.set_title(f"Instrument {inst} — Predicted Regimes")
    ax_pred.set_xlabel("Time Step")
    ax_pred.set_ylabel("Price")
    ax_pred.legend(loc="upper right")

    plt.tight_layout()
    plt.show()


In [None]:
# ── 0) (Re)install a CPU-only PyTorch build to avoid NCCL errors ───────────────
# !pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision torchaudio --quiet

# ── 1) Imports & inline plotting ───────────────────────────────────────────────
%matplotlib inline
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# ── 2) Hyperparameters ────────────────────────────────────────────────────────
SEQ_LEN     = 20
BATCH_SIZE  = 64
HIDDEN_SIZE = 64
NUM_LAYERS  = 2
DROPOUT     = 0.2
LR          = 1e-3
NUM_EPOCHS  = 20
TEST_SIZE   = 0.3
RANDOM_SEED = 42

# ── 3) Dataset & Model ─────────────────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).long()
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class RegimeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)      # (batch, seq_len, hidden)
        out = out[:, -1, :]        # last time-step
        return self.fc(out)        # (batch, num_classes)

# ── 4) Load & preprocess data ─────────────────────────────────────────────────
# a) features
df = pd.read_csv("features_all_models4.csv")
df = df.groupby("inst", group_keys=False).apply(lambda g: g.iloc[100:]).reset_index(drop=True)

# b) raw prices
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)

# c) split out X, y and build sequences
X_raw = df.drop(["inst","time","true_regime"], axis=1).values
y_raw = df["true_regime"].values
NUM_CLASSES = int(y_raw.max())+1

X_seqs, y_seqs, inst_map = [], [], []
for inst in df["inst"].unique():
    mask = (df["inst"]==inst).values
    Xi, yi = X_raw[mask], y_raw[mask]
    for t in range(SEQ_LEN, len(Xi)):
        X_seqs.append(Xi[t-SEQ_LEN:t])
        y_seqs.append(yi[t])
        inst_map.append(inst)
X_seqs   = np.stack(X_seqs)  # (N, SEQ_LEN, n_feat)
y_seqs   = np.array(y_seqs)  # (N,)
inst_map = np.array(inst_map)

# ── 5) Train/val split ─────────────────────────────────────────────────────────
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
    X_seqs, y_seqs,
    test_size=TEST_SIZE,
    stratify=y_seqs,
    random_state=RANDOM_SEED
)

train_loader = DataLoader(RegimeDataset(X_train, y_train),
                          batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(RegimeDataset(X_val, y_val),
                          batch_size=BATCH_SIZE)

# ── 6) Build & train the LSTM ─────────────────────────────────────────────────
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = RegimeLSTM(
    input_size  = X_seqs.shape[2],
    hidden_size = HIDDEN_SIZE,
    num_layers  = NUM_LAYERS,
    num_classes = NUM_CLASSES,
    dropout     = DROPOUT
).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for Xb, yb in train_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(Xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()*Xb.size(0)
        preds = out.argmax(dim=1)
        correct += (preds==yb).sum().item()
        total   += yb.size(0)
    print(f"Epoch {epoch:02d}  Loss: {total_loss/total:.4f}  Acc: {correct/total:.4f}")

# ── 7) Inference & plotting per instrument ────────────────────────────────────
def get_segments(reg):
    changes = np.flatnonzero(reg[1:]!=reg[:-1])
    starts  = np.concatenate(([0], changes+1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

true_cmap = ListedColormap(["#ccffcc","#f0f0f0","#ffcccc"])  # 2=green,1=grey,0=red
pred_cmap = ListedColormap(["#66cc66","#b0b0b0","#ff6666"])

model.eval()
with torch.no_grad():
    for inst in sorted(np.unique(inst_map)):
        mask   = inst_map==inst
        Xi     = torch.from_numpy(X_seqs[mask]).float().to(device)
        true_i = y_seqs[mask]
        pred_i = model(Xi).argmax(dim=1).cpu().numpy()

        # reconstruct full-length arrays (pad first SEQ_LEN steps)
        Ni        = df[df["inst"]==inst].shape[0]
        price_i   = price_df.iloc[100:100+Ni, inst].values
        true_full = np.concatenate([np.full(SEQ_LEN, np.nan), true_i])
        pred_full = np.concatenate([np.full(SEQ_LEN, np.nan), pred_i])

        # plot
        fig, ax = plt.subplots(figsize=(12,4))
        for s,e,lbl in get_segments(true_full[~np.isnan(true_full)].astype(int)):
            ax.axvspan(s+SEQ_LEN, e+SEQ_LEN, color=true_cmap(lbl), alpha=0.3)
        for s,e,lbl in get_segments(pred_full[~np.isnan(pred_full)].astype(int)):
            ax.axvspan(s+SEQ_LEN, e+SEQ_LEN, color=pred_cmap(lbl), alpha=0.2)
        ax.plot(price_i, color="black", label="Price")
        ax.set_title(f"Instrument {inst}")
        ax.set_xlabel("Time Step")
        ax.set_ylabel("Price")
        ax.legend()
        plt.show()


In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split

# ── 1) Hyperparameters ─────────────────────────────────────────────────────────
SEQ_LEN     = 740
TRAIN_LEN   = 500
BATCH_SIZE  = 10
HIDDEN_SIZE = 64
NUM_LAYERS  = 2
DROPOUT     = 0.2
LR          = 1e-3
NUM_EPOCHS  = 15
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# ── 2) Load & clean ─────────────────────────────────────────────────────────────
df = pd.read_csv("features_all_models4.csv")
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])   # drop first 100 warm-ups
    .reset_index(drop=True)
)

# recompute SEQ_LEN from the cleaned data:
seq_lens = df.groupby("inst").size()
SEQ_LEN  = int(seq_lens.max())        # should be 640 in your case
print("Detected sequence length per instrument:", SEQ_LEN)

# now build X and Y arrays with the correct shape
n_inst    = df["inst"].nunique()
feat_cols = [c for c in df.columns if c not in ("inst","time","true_regime")]

X = np.zeros((n_inst, SEQ_LEN, len(feat_cols)), dtype=np.float32)
Y = np.zeros((n_inst, SEQ_LEN),               dtype=np.int64)

for inst in range(n_inst):
    sub = df[df["inst"]==inst].reset_index(drop=True)
    assert len(sub)==SEQ_LEN              # sanity check
    X[inst,:,:] = sub[feat_cols].values
    Y[inst,:]   = sub["true_regime"].values

NUM_TAGS = int(Y.max())+1


# ── 3) Train/test split along time axis ────────────────────────────────────────
X_train = torch.tensor(X[:, :TRAIN_LEN, :])
Y_train = torch.tensor(Y[:, :TRAIN_LEN])
X_full  = torch.tensor(X)      # for full‐sequence inference
Y_full  = Y                   # numpy for plotting

# ── 4) Dataset & DataLoader ───────────────────────────────────────────────────
class SeqTagDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return self.X.size(0)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

train_ds     = SeqTagDataset(X_train, Y_train)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

# ── 5) BiLSTM tagger ────────────────────────────────────────────────────────────
class BiLSTMTagger(nn.Module):
    def __init__(self, feat_dim, hidden_dim, num_layers, num_tags, dropout):
        super().__init__()
        self.lstm = nn.LSTM(
            feat_dim, hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout
        )
        self.fc = nn.Linear(hidden_dim*2, num_tags)

    def forward(self, x):
        out, _ = self.lstm(x)     # (B, T, 2*H)
        return self.fc(out)       # (B, T, num_tags)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = BiLSTMTagger(
    feat_dim   = X.shape[2],
    hidden_dim = HIDDEN_SIZE,
    num_layers = NUM_LAYERS,
    num_tags   = NUM_TAGS,
    dropout    = DROPOUT
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# ── 6) Train ───────────────────────────────────────────────────────────────────
for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    total_loss = 0.0
    for feats, tags in train_loader:
        feats, tags = feats.to(device), tags.to(device)
        logits      = model(feats)              # (B, T, C)
        loss        = criterion(
            logits.view(-1, NUM_TAGS),         # (B*T, C)
            tags.view(-1)                      # (B*T,)
        )
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch:02d} — Avg Loss: {total_loss/len(train_loader):.4f}")

# ── 7) Inference & two-panel plotting ──────────────────────────────────────────
def get_segments(reg):
    changes = np.flatnonzero(reg[1:] != reg[:-1])
    starts  = np.concatenate(([0], changes+1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

true_cmap = ListedColormap(["#ff0000","#808080","#00ff00"])
pred_cmap = ListedColormap(["#cc0000","#444444","#00cc00"])

model.eval()
with torch.no_grad():
    logits_full = model(X_full.to(device))         # (50,740,C)
    preds_full  = logits_full.argmax(dim=2).cpu().numpy()

for inst in range(n_inst):
    true_seq = Y_full[inst]
    pred_seq = preds_full[inst]
    price    = price_df.iloc[100:100+SEQ_LEN, inst].values

    fig, (ax1, ax2) = plt.subplots(2,1,
                                  sharex=True,
                                  figsize=(12,6))

    # TRUE regimes
    for s,e,lbl in get_segments(true_seq):
        ax1.axvspan(s, e, color=true_cmap(lbl), alpha=0.5, linewidth=0)
    ax1.plot(price, 'k-', label='Price')
    ax1.set_title(f"Inst {inst} — TRUE regimes")
    ax1.legend(loc='upper right')

    # PREDICTED regimes
    for s,e,lbl in get_segments(pred_seq):
        ax2.axvspan(s, e, color=pred_cmap(lbl), alpha=0.5, linewidth=0)
    ax2.plot(price, 'k-', label='Price')
    ax2.set_title(f"Inst {inst} — PREDICTED regimes")
    ax2.legend(loc='upper right')

    plt.tight_layout()
    plt.show()


In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# ── 1) Hyperparameters ─────────────────────────────────────────────────────────
TRAIN_LEN   = 400    # train on steps [0..399]
BATCH_SIZE  = 10
HIDDEN_SIZE = 64
NUM_LAYERS  = 2
DROPOUT     = 0.2
LR          = 1e-3
NUM_EPOCHS  = 15
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# ── 2) Load & clean ─────────────────────────────────────────────────────────────
df = pd.read_csv("features_all_models4.csv")
df = (
    df.groupby("inst", group_keys=False)
      .apply(lambda g: g.iloc[100:])   # drop first 100 warm-ups
      .reset_index(drop=True)
)
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)

# detect sequence length per inst
seq_lens = df.groupby("inst").size()
SEQ_LEN  = int(seq_lens.max())
print("Sequence length per instrument:", SEQ_LEN)

# build X,Y arrays: (n_inst, SEQ_LEN, D) and (n_inst, SEQ_LEN)
n_inst    = df["inst"].nunique()
feat_cols = [c for c in df.columns if c not in ("inst","time","true_regime")]

X = np.zeros((n_inst, SEQ_LEN, len(feat_cols)), dtype=np.float32)
Y = np.zeros((n_inst, SEQ_LEN),               dtype=np.int64)
for inst in range(n_inst):
    sub = df[df["inst"]==inst].reset_index(drop=True)
    X[inst] = sub[feat_cols].values
    Y[inst] = sub["true_regime"].values

NUM_TAGS = int(Y.max()) + 1

# ── 3) Split into train vs. test time windows ─────────────────────────────────
X_train = torch.tensor(X[:, :TRAIN_LEN, :])
Y_train = torch.tensor(Y[:, :TRAIN_LEN])
X_test  = torch.tensor(X[:, TRAIN_LEN:, :])
Y_test  = Y[:, TRAIN_LEN:]     # numpy for metrics & plotting
LEN_TEST = SEQ_LEN - TRAIN_LEN

# ── 4) Dataset & DataLoader ───────────────────────────────────────────────────
class SeqTagDataset(Dataset):
    def __init__(self, X, y):
        self.X = X; self.y = y
    def __len__(self):
        return self.X.size(0)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

train_ds     = SeqTagDataset(X_train, Y_train)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

# ── 5) BiLSTM tagger ────────────────────────────────────────────────────────────
class BiLSTMTagger(nn.Module):
    def __init__(self, feat_dim, hidden_dim, num_layers, num_tags, dropout):
        super().__init__()
        self.lstm = nn.LSTM(feat_dim, hidden_dim,
                            num_layers=num_layers,
                            batch_first=True,
                            bidirectional=True,
                            dropout=dropout)
        self.fc = nn.Linear(hidden_dim*2, num_tags)
    def forward(self, x):
        out, _ = self.lstm(x)     # (B, T, 2H)
        return self.fc(out)       # (B, T, num_tags)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = BiLSTMTagger(
    feat_dim   = X.shape[2],
    hidden_dim = HIDDEN_SIZE,
    num_layers = NUM_LAYERS,
    num_tags   = NUM_TAGS,
    dropout    = DROPOUT
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# ── 6) Train ───────────────────────────────────────────────────────────────────
for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    total_loss = 0.0
    for feats, tags in train_loader:
        feats, tags = feats.to(device), tags.to(device)
        logits      = model(feats)
        loss        = criterion(
            logits.view(-1, NUM_TAGS),
            tags.view(-1)
        )
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch:02d} — Avg Loss: {total_loss/len(train_loader):.4f}")

# ── 7) Inference on test window + metrics + plotting ──────────────────────────
def get_segments(reg):
    changes = np.flatnonzero(reg[1:] != reg[:-1])
    starts  = np.concatenate(([0], changes+1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

true_cmap = ListedColormap(["#ff0000","#808080","#00ff00"])
pred_cmap = ListedColormap(["#cc0000","#444444","#00cc00"])

model.eval()
with torch.no_grad():
    logits_test = model(X_test.to(device))          # (50, LEN_TEST, C)
    preds_test  = logits_test.argmax(dim=2).cpu().numpy()

for inst in range(n_inst):
    true_seq = Y_test[inst]
    pred_seq = preds_test[inst]
    price    = price_df.iloc[100+TRAIN_LEN:100+TRAIN_LEN+LEN_TEST, inst].values

    # compute and print test accuracy
    acc = (pred_seq == true_seq).mean()
    print(f"Inst {inst:02d} Test acc: {acc:.3f}")

    # two‐panel plot for test window only
    fig, (ax1, ax2) = plt.subplots(2,1, sharex=True, figsize=(12,6))

    # TRUE regimes
    for s,e,lbl in get_segments(true_seq):
        ax1.axvspan(s, e, color=true_cmap(lbl), alpha=0.5, linewidth=0)
    ax1.plot(price, 'k-', label='Price')
    ax1.set_title(f"Inst {inst} — TRUE regimes (t={TRAIN_LEN}→end)")
    ax1.legend(loc='upper right')

    # PREDICTED regimes
    for s,e,lbl in get_segments(pred_seq):
        ax2.axvspan(s, e, color=pred_cmap(lbl), alpha=0.5, linewidth=0)
    ax2.plot(price, 'k-', label='Price')
    ax2.set_title(f"Inst {inst} — PREDICTED regimes (t={TRAIN_LEN}→end)")
    ax2.legend(loc='upper right')

    plt.tight_layout()
    plt.show()
