In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

# 1) Load the full CSV
df = pd.read_csv('features_all_models4.csv')

# 2) Remove the first 100 “warm-up” rows for each instrument
df = (
    df
    .groupby('inst', group_keys=False)
    .apply(lambda g: g.iloc[100:])
    .reset_index(drop=True)
)

# 3) Drop the two useless columns
#    and pull out the target
X = df.drop(['inst', 'time', 'true_regime'], axis=1)
y = df['true_regime']

# 4) Train a Decision Tree on every remaining row
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X, y)

# 5) Print a quick summary
print("✅  Decision Tree trained on full dataset")
print(f" • training samples : {X.shape[0]}")
print(f" • features         : {X.shape[1]}")
print(f" • target classes   : {clf.classes_}")
print(f" • tree depth       : {clf.get_depth()}")
print(f" • leaf nodes       : {clf.get_n_leaves()}")
print(f" • training accuracy: {clf.score(X, y):.4f}")


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.tree import DecisionTreeClassifier

# 1) Load & clean the CSV just once
df = pd.read_csv('features_all_models4.csv')
df = (
    df
    .groupby('inst', group_keys=False)
    .apply(lambda g: g.iloc[100:])   # drop first 100 warm-ups per instrument
    .reset_index(drop=True)
)

# Features & true regimes
X_all = df.drop(['inst','time','true_regime'], axis=1)
y_all = df['true_regime']

# 2) Train your tree on every instrument
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_all, y_all)

# 3) Read the raw prices once
price_df = pd.read_csv('prices.txt', sep=r'\s+', header=None)

# 4) Prepare output folder
os.makedirs('plots', exist_ok=True)

# 5) Common plotting setup
cmap = ListedColormap(['red','lightgrey','green'])  # 0→bear,1→neutral,2→bull

for inst in sorted(df['inst'].unique()):
    # slice this instrument
    mask    = (df['inst'] == inst)
    X_i     = X_all[mask]
    y_true  = y_all[mask].to_numpy()
    T_i     = len(y_true)

    # inference
    y_pred  = clf.predict(X_i)

    # align price: drop the same 100 warm-ups
    price_i = price_df.iloc[100:100+T_i, inst].values

    # 6) make the three-panel plot
    fig, (ax1, ax2, ax3) = plt.subplots(
        3,1, sharex=True, figsize=(10,4),
        gridspec_kw={'height_ratios':[2,1,1]}
    )

    ax1.plot(price_i, color='black')
    ax1.set_ylabel('Price')
    ax1.set_title(f'Instrument {inst}: Price & Regimes')

    ax2.imshow([y_true], aspect='auto',
               cmap=cmap, extent=[0, T_i, 0, 1])
    ax2.set_yticks([])
    ax2.set_ylabel('Auto-label')

    ax3.imshow([y_pred], aspect='auto',
               cmap=cmap, extent=[0, T_i, 0, 1])
    ax3.set_yticks([])
    ax3.set_ylabel('Predicted')
    ax3.set_xlabel('Time Step')

    plt.tight_layout()

    # 7) save and close
    out_path = f'plots/instrument_{inst}.png'
    fig.savefig(out_path, dpi=150)
    plt.close(fig)

    print(f'Saved: {out_path}')


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# ─── 1) Hyperparameters ───────────────────────────────────────────────────────
SEQ_LEN     = 20
BATCH_SIZE  = 64
HIDDEN_SIZE = 64
NUM_LAYERS  = 2
DROPOUT     = 0.2
LR          = 1e-3
NUM_EPOCHS  = 20
TEST_SIZE   = 0.3
RANDOM_SEED = 42

# ─── 2) Dataset ───────────────────────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).long()
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ─── 3) Model ─────────────────────────────────────────────────────────────────
class RegimeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]          # last timestep
        return self.fc(out)

# ─── 4) Load & preprocess ────────────────────────────────────────────────────
# a) features
df = pd.read_csv("features_all_models4.csv")

# b) drop first 100 warm-up rows per inst
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])
    .reset_index(drop=True)
)

# c) optionally add raw price as a feature
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)
df["price"] = np.nan
for inst in df["inst"].unique():
    mask = (df["inst"] == inst)
    p = price_df.iloc[100:100 + mask.sum(), inst].values
    df.loc[mask, "price"] = p

# d) split out features & labels
X_raw = df.drop(["inst","time","true_regime"], axis=1).values
y_raw = df["true_regime"].values

# figure out how many classes we have
NUM_CLASSES = int(y_raw.max()) + 1   # e.g. 2 + 1 = 3

# ─── 5) Build sequences ───────────────────────────────────────────────────────
X_seqs, y_seqs = [], []
for inst in df["inst"].unique():
    mask = (df["inst"] == inst).values
    Xi   = X_raw[mask]
    yi   = y_raw[mask]
    for t in range(SEQ_LEN, len(Xi)):
        X_seqs.append(Xi[t-SEQ_LEN:t])
        y_seqs.append(yi[t])

X_seqs = np.stack(X_seqs)
y_seqs = np.array(y_seqs)

# ─── 6) Train/test split ──────────────────────────────────────────────────────
X_train, X_val, y_train, y_val = train_test_split(
    X_seqs, y_seqs,
    test_size=TEST_SIZE,
    random_state=RANDOM_SEED,
    stratify=y_seqs
)

# ─── 7) DataLoaders ───────────────────────────────────────────────────────────
train_ds = RegimeDataset(X_train, y_train)
val_ds   = RegimeDataset(X_val,   y_val)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE)

# ─── 8) Model, loss, opt ──────────────────────────────────────────────────────
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegimeLSTM(
    input_size  = X_seqs.shape[2],
    hidden_size = HIDDEN_SIZE,
    num_layers  = NUM_LAYERS,
    num_classes = NUM_CLASSES,
    dropout     = DROPOUT
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# ─── 9) Training loop ─────────────────────────────────────────────────────────
for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    train_loss = 0.0
    correct, total = 0, 0
    for Xb, yb in train_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(Xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * Xb.size(0)
        preds = out.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total   += yb.size(0)

    train_loss /= total
    train_acc   = correct / total

    # — Validate —
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for Xb, yb in val_loader:
            Xb, yb = Xb.to(device), yb.to(device)
            out = model(Xb)
            preds = out.argmax(dim=1)
            val_correct += (preds == yb).sum().item()
            val_total   += yb.size(0)

    val_acc = val_correct / val_total

    print(f"Epoch {epoch:02d}  "
          f"Train Loss: {train_loss:.4f}  "
          f"Train Acc: {train_acc:.4f}  "
          f"Val Acc:   {val_acc:.4f}"
    )

# ─── 10) Save weights ────────────────────────────────────────────────────────
torch.save(model.state_dict(), "regime_lstm.pth")


In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.tree import DecisionTreeClassifier

# ─── 1) Load & clean your features ────────────────────────────────────────────
df = pd.read_csv("features_all_models4.csv")
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])   # drop first 100 warm-ups per inst
    .reset_index(drop=True)
)
X_all = df.drop(["inst","time","true_regime"], axis=1)
y_all = df["true_regime"]

# ─── 2) Train one DecisionTree on everything ─────────────────────────────────
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_all, y_all)

# ─── 3) Load raw prices ───────────────────────────────────────────────────────
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)

# ─── 4) Helper to extract contiguous segments ─────────────────────────────────
def get_segments(reg):
    """
    Given a 1D array of regime labels, return a list of (start, end, label)
    where each segment is contiguous and label ∈ {0,1,2}.
    """
    changes = np.flatnonzero(reg[1:] != reg[:-1])
    starts  = np.concatenate(([0], changes + 1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

# ─── 5) Prepare the 10×5 grid ────────────────────────────────────────────────
n_rows, n_cols = 10, 5
fig, axes     = plt.subplots(n_rows, n_cols, figsize=(20, 40), sharex=False)
axes = axes.flatten()

# colormaps for true vs. predicted
true_cmap = ListedColormap(["#ffcccc","#f0f0f0","#ccffcc"])
pred_cmap = ListedColormap(["#ff6666","#b0b0b0","#66cc66"])

for inst in range(50):
    ax = axes[inst]
    # slice out inst i
    mask   = (df["inst"] == inst)
    X_i    = X_all[mask]
    true_i = y_all[mask].to_numpy()
    pred_i = clf.predict(X_i)
    T_i    = len(true_i)

    # price series (dropping same 100 warmups)
    price_i = price_df.iloc[100:100+T_i, inst].values

    # 5a) plot true-regime background
    for s, e, lbl in get_segments(true_i):
        ax.axvspan(s, e, color=true_cmap(lbl), alpha=0.3, linewidth=0)

    # 5b) plot predicted-regime overlay
    for s, e, lbl in get_segments(pred_i):
        ax.axvspan(s, e, color=pred_cmap(lbl), alpha=0.2, linewidth=0)

    # 5c) plot the price on top
    ax.plot(price_i, color="black", linewidth=1, label="Price")
    ax.set_title(f"Inst {inst}")
    ax.set_xlim(0, T_i)
    ax.set_ylabel("Price")

    # only show legend on the very first plot
    if inst == 0:
        ax.legend(loc="upper right")

# clean up any empty subplots (in case 50 < n_rows*n_cols)
for j in range(50, n_rows*n_cols):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()


In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.tree import DecisionTreeClassifier

# ─── 1) Load & clean your features ────────────────────────────────────────────
df = pd.read_csv("features_all_models4.csv")
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])   # drop first 100 warm-ups per inst
    .reset_index(drop=True)
)
X_all = df.drop(["inst","time","true_regime"], axis=1)
y_all = df["true_regime"]

# ─── 2) Train one DecisionTree on everything ─────────────────────────────────
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_all, y_all)

# ─── 3) Load raw prices ───────────────────────────────────────────────────────
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)

# ─── 4) Helper to extract contiguous segments ─────────────────────────────────
def get_segments(reg):
    """
    Given a 1D array of regime labels, return a list of (start, end, label)
    where each segment is contiguous and label ∈ {0,1,2}.
    """
    changes = np.flatnonzero(reg[1:] != reg[:-1])
    starts  = np.concatenate(([0], changes + 1))
    ends    = np.concatenate((changes, [len(reg)-1]))
    return list(zip(starts, ends, reg[starts]))

# ─── 5) Prepare the 10×5 grid ────────────────────────────────────────────────
n_rows, n_cols = 10, 5
fig, axes     = plt.subplots(n_rows, n_cols, figsize=(20, 40), sharex=False)
axes = axes.flatten()

# colormaps for true vs. predicted
true_cmap = ListedColormap(["#ffcccc","#f0f0f0","#ccffcc"])
pred_cmap = ListedColormap(["#ff6666","#b0b0b0","#66cc66"])

for inst in range(50):
    ax = axes[inst]
    # slice out inst i
    mask   = (df["inst"] == inst)
    X_i    = X_all[mask]
    true_i = y_all[mask].to_numpy()
    pred_i = clf.predict(X_i)
    T_i    = len(true_i)

    # price series (dropping same 100 warmups)
    price_i = price_df.iloc[100:100+T_i, inst].values

    # 5a) plot true-regime background
    for s, e, lbl in get_segments(true_i):
        ax.axvspan(s, e, color=true_cmap(lbl), alpha=0.3, linewidth=0)

    # 5b) plot predicted-regime overlay
    for s, e, lbl in get_segments(pred_i):
        ax.axvspan(s, e, color=pred_cmap(lbl), alpha=0.2, linewidth=0)

    # 5c) plot the price on top
    ax.plot(price_i, color="black", linewidth=1, label="Price")
    ax.set_title(f"Inst {inst}")
    ax.set_xlim(0, T_i)
    ax.set_ylabel("Price")

    # only show legend on the very first plot
    if inst == 0:
        ax.legend(loc="upper right")

# clean up any empty subplots (in case 50 < n_rows*n_cols)
for j in range(50, n_rows*n_cols):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import ParameterGrid, train_test_split

# ─── 1) Global settings & fixed parameters ────────────────────────────────────
SEQ_LEN      = 20    # keep constant for this gridsearch
NUM_CLASSES  = 3     # regimes {0,1,2}
NUM_EPOCHS   = 15
TEST_SIZE    = 0.2
RANDOM_SEED  = 42

# fix random seeds for reproducibility
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

# ─── 2) Data preparation ───────────────────────────────────────────────────────
df = pd.read_csv("features_all_models4.csv")
# drop first‐100 warmups per inst:
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])
    .reset_index(drop=True)
)
X_raw = df.drop(["inst","time","true_regime"], axis=1).values
y_raw = df["true_regime"].values

# build sliding‐window sequences
X_seqs, y_seqs = [], []
for inst in df["inst"].unique():
    mask = (df["inst"] == inst).values
    Xi   = X_raw[mask]
    yi   = y_raw[mask]
    for t in range(SEQ_LEN, len(Xi)):
        X_seqs.append(Xi[t-SEQ_LEN:t])
        y_seqs.append(yi[t])
X_seqs = np.stack(X_seqs)   # (M, SEQ_LEN, n_features)
y_seqs = np.array(y_seqs)   # (M,)

# train/val split (random stratified)
X_train_base, X_val_base, y_train_base, y_val_base = train_test_split(
    X_seqs, y_seqs,
    test_size=TEST_SIZE,
    random_state=RANDOM_SEED,
    stratify=y_seqs
)

# ─── 3) Dataset + Model definitions ───────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).long()
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class RegimeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size  = input_size,
            hidden_size = hidden_size,
            num_layers  = num_layers,
            batch_first = True,
            dropout     = dropout
        )
        self.fc = nn.Linear(hidden_size, NUM_CLASSES)

    def forward(self, x):
        out, _ = self.lstm(x)
        out     = out[:, -1, :]           # last timestep
        return self.fc(out)

# ─── 4) Training + evaluation function ────────────────────────────────────────
def train_evaluate(hidden_size, num_layers, dropout, lr, batch_size):
    train_loader = DataLoader(
        RegimeDataset(X_train_base, y_train_base),
        batch_size = batch_size, shuffle=True
    )
    val_loader   = DataLoader(
        RegimeDataset(X_val_base,   y_val_base),
        batch_size = batch_size, shuffle=False
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model  = RegimeLSTM(
        input_size  = X_seqs.shape[2],
        hidden_size = hidden_size,
        num_layers  = num_layers,
        dropout     = dropout
    ).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(NUM_EPOCHS):
        model.train()
        for Xb, yb in train_loader:
            Xb, yb = Xb.to(device), yb.to(device)
            optimizer.zero_grad()
            out   = model(Xb)
            loss  = criterion(out, yb)
            loss.backward()
            optimizer.step()

    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for Xb, yb in val_loader:
            Xb, yb = Xb.to(device), yb.to(device)
            preds = model(Xb).argmax(dim=1)
            correct += (preds == yb).sum().item()
            total   += yb.size(0)

    return correct / total

# ─── 5) Define your grid ───────────────────────────────────────────────────────
param_grid = {
    "hidden_size": [32, 64, 128],
    "num_layers":  [1, 2],
    "dropout":     [0.1, 0.2, 0.25],
    "lr":          [1e-3],
    "batch_size":  [32, 64],
}

# ─── 6) Run the grid search with progress prints ───────────────────────────────
grid = list(ParameterGrid(param_grid))
total = len(grid)
results = []

for idx, params in enumerate(grid, start=1):
    print(f"[{idx}/{total}] Testing:", params)
    acc = train_evaluate(
        hidden_size = params["hidden_size"],
        num_layers  = params["num_layers"],
        dropout     = params["dropout"],
        lr          = params["lr"],
        batch_size  = params["batch_size"]
    )
    print(f"    → val_acc = {acc:.4f}\n")
    results.append({**params, "val_acc": acc})

# ─── 7) Summarize all results, sorted by accuracy ─────────────────────────────
sorted_results = sorted(results, key=lambda x: x["val_acc"], reverse=True)

print("\nAll hyperparameters, ranked by validation accuracy:\n")
for rank, res in enumerate(sorted_results, 1):
    acc    = res["val_acc"]
    params = {k:v for k,v in res.items() if k!="val_acc"}
    param_str = ", ".join(f"{k}={v}" for k,v in params.items())
    print(f"{rank:02d}. val_acc={acc:.4f}  |  {param_str}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# ─── 1) Hyperparameters ───────────────────────────────────────────────────────
SEQ_LEN     = 20
BATCH_SIZE  = 32
HIDDEN_SIZE = 128
NUM_LAYERS  = 2
DROPOUT     = 0.1
LR          = 1e-3
NUM_EPOCHS  = 20
TEST_SIZE   = 0.3
RANDOM_SEED = 42

# ─── 2) Dataset ───────────────────────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).long()
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ─── 3) Model ─────────────────────────────────────────────────────────────────
class RegimeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]          # last timestep
        return self.fc(out)

# ─── 4) Load & preprocess ────────────────────────────────────────────────────
# a) features
df = pd.read_csv("features_all_models4.csv")

# b) drop first 100 warm-up rows per inst
df = (
    df
    .groupby("inst", group_keys=False)
    .apply(lambda g: g.iloc[100:])
    .reset_index(drop=True)
)

# c) optionally add raw price as a feature
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)
df["price"] = np.nan
for inst in df["inst"].unique():
    mask = (df["inst"] == inst)
    p = price_df.iloc[100:100 + mask.sum(), inst].values
    df.loc[mask, "price"] = p

# d) split out features & labels
X_raw = df.drop(["inst","time","true_regime"], axis=1).values
y_raw = df["true_regime"].values

# figure out how many classes we have
NUM_CLASSES = int(y_raw.max()) + 1   # e.g. 2 + 1 = 3

# ─── 5) Build sequences ───────────────────────────────────────────────────────
X_seqs, y_seqs = [], []
for inst in df["inst"].unique():
    mask = (df["inst"] == inst).values
    Xi   = X_raw[mask]
    yi   = y_raw[mask]
    for t in range(SEQ_LEN, len(Xi)):
        X_seqs.append(Xi[t-SEQ_LEN:t])
        y_seqs.append(yi[t])

X_seqs = np.stack(X_seqs)
y_seqs = np.array(y_seqs)

# ─── 6) Train/test split ──────────────────────────────────────────────────────
X_train, X_val, y_train, y_val = train_test_split(
    X_seqs, y_seqs,
    test_size=TEST_SIZE,
    random_state=RANDOM_SEED,
    stratify=y_seqs
)

# ─── 7) DataLoaders ───────────────────────────────────────────────────────────
train_ds = RegimeDataset(X_train, y_train)
val_ds   = RegimeDataset(X_val,   y_val)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE)

# ─── 8) Model, loss, opt ──────────────────────────────────────────────────────
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegimeLSTM(
    input_size  = X_seqs.shape[2],
    hidden_size = HIDDEN_SIZE,
    num_layers  = NUM_LAYERS,
    num_classes = NUM_CLASSES,
    dropout     = DROPOUT
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# ─── 9) Training loop ─────────────────────────────────────────────────────────
for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    train_loss = 0.0
    correct, total = 0, 0
    for Xb, yb in train_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(Xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * Xb.size(0)
        preds = out.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total   += yb.size(0)

    train_loss /= total
    train_acc   = correct / total

    # — Validate —
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for Xb, yb in val_loader:
            Xb, yb = Xb.to(device), yb.to(device)
            out = model(Xb)
            preds = out.argmax(dim=1)
            val_correct += (preds == yb).sum().item()
            val_total   += yb.size(0)

    val_acc = val_correct / val_total

    print(f"Epoch {epoch:02d}  "
          f"Train Loss: {train_loss:.4f}  "
          f"Train Acc: {train_acc:.4f}  "
          f"Val Acc:   {val_acc:.4f}"
    )

# ─── 10) Save weights ────────────────────────────────────────────────────────
torch.save(model.state_dict(), "regime_lstm.pth")
