In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.pipeline import make_pipeline

In [None]:
from pre_proc_pipeline import pipeline_regiem
from pre_proc_labelling_long import plot_all_regimes_long

In [None]:
#0 indicates bearmarket, 2 indicates bull market
count = 0
count_bull = 0
count_bear = 0
for i in range(50):
    labels = plot_all_regimes_long(500, False, i)
    for j in labels:
        if j == 1:
            count += 1
        elif j == 0:
            count_bear += 1
        else:
            count_bull +=1
        
print(f"Count of label Netural: {count}")
print(f"Count of label Bear: {count_bear}")
print(f"Count of label Bull: {count_bull}")

In [None]:
df = pipeline_regiem()

In [None]:
df

In [None]:
pd.set_option('display.max_columns', None)
df.describe()


In [None]:
pd.reset_option('display.max_columns')

In [None]:
import pandas as pd
from pre_proc_labelling_long import plot_all_regimes_long
from pre_proc_pipeline import pipeline_regiem

df = pipeline_regiem()            

label_frames = []
for inst, inst_df in df.groupby(level="inst", sort=False):

    labels = plot_all_regimes_long(len(inst_df), False, inst)

    valid_idx = inst_df.index[: len(labels) ]

    s = pd.Series(labels, index=valid_idx, name="regime")

    label_frames.append(s)

regimes = pd.concat(label_frames) 

df2 = df.copy()
df2["regime"] = regimes         
df2["target"] = (df2.groupby(level="inst")["regime"].shift(-1))

df2 = df2.dropna(subset=["target"])

X = df2.drop(columns=["regime","target"])
y = df2["target"].astype(int)

print("X:", X.shape, "y:", y.shape)


In [None]:
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

#Mapping 2.0 -> 1.0 for clarity
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

X_train, y_train = shuffle(X_train, y_train, random_state=42)

print("Before scaling →", "X_train:", X_train.shape, "X_test:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # learns μ/σ on train
X_test_scaled  = scaler.transform(X_test)       # applies same transform

In [None]:
'''
Dataset Context:

X_train: (25000, 59) X_test: (6950, 59)
59 features,
Labels: 0 -> Bear, 1 -> Bull
Derived from only close price data over 50 instruments, we train on 500 timesteps of data per instrument
Models Aim: We want to maximise consistency in regiem identifcation, regiems often last around 150 time steps long, but can range from 30 to 500.

Acess data by (all data is shuffled, segregated by bound 500):
X_train_scaled : Training set first 500 timesteps of each instrument
y_train : Labelled data for X_train_scaled already aligned


X_test_scaled : Test set last 250 timesteps of each instrument
y_test : Labelled data for X_test_scaled already aligned

'''

In [None]:
import lightgbm as lgb
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score, f1_score
from lightgbm.callback import early_stopping, log_evaluation


dtrain = lgb.Dataset(X_train, label=y_train)
dvalid = lgb.Dataset(X_test,  label=y_test, reference=dtrain)

#Training parameters
params = {
    "objective":       "binary",
    "metric":          ["binary_logloss","binary_error"],
    "learning_rate":   0.05,
    "num_leaves":      31,
    "min_data_in_leaf":20,
    "feature_fraction":0.8,
    "bagging_fraction":0.8,
    "bagging_freq":     5,
    "verbose":         -1,
}

# 2) your tqdm wrapper stays the same
num_round = 500
pbar = tqdm(total=num_round, desc="LightGBM boosting rounds")
def lgb_tqdm_callback(env):
    pbar.update(1)
    if env.iteration + 1 == num_round:
        pbar.close()

# 3) call train WITHOUT verbose_eval or early_stopping_rounds
bst = lgb.train(
    params,
    train_set=dtrain,
    num_boost_round=num_round,
    valid_sets=[dtrain, dvalid],
    valid_names=["train","valid"],
    callbacks=[
        # run early‐stop after 50 rounds of no improvement
        early_stopping(stopping_rounds=50),
        # log train/valid metrics every 10 iters (optional)
        log_evaluation(period=10),
        # your custom tqdm progress bar
        lgb_tqdm_callback,
    ]
)

y_pred_prob = bst.predict(X_test, num_iteration=bst.best_iteration)
y_pred      = (y_pred_prob > 0.5).astype(int)

print("Test accuracy:", accuracy_score(y_test, y_pred))
print("Test  F1     :", f1_score(y_test, y_pred))


In [None]:
from xgboost import XGBClassifier
xgb = XGBClassifier(
    n_estimators=500,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
xgb.fit(X_train_scaled, y_train)
print("XGBoost accuracy:", xgb.score(X_test_scaled, y_test))


In [None]:
from catboost import CatBoostClassifier
cb = CatBoostClassifier(
    iterations=500,
    depth=6,
    learning_rate=0.05,
    eval_metric='Accuracy',
    random_seed=42,
    verbose=100
)
cb.fit(X_train_scaled, y_train, eval_set=(X_test_scaled, y_test), use_best_model=True)
print("CatBoost accuracy:", cb.score(X_test_scaled, y_test))


In [None]:
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model  import LogisticRegression

estimators = [
    ('xgb', XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.05, random_state=42)),
    ('rf',  RandomForestClassifier(n_estimators=200, max_depth=None, random_state=42)),
    ('cb',  CatBoostClassifier(iterations=300, depth=5, learning_rate=0.05, verbose=0, random_seed=42))
]

stack = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(),
    cv=5,
    n_jobs=-1
)
stack.fit(X_train_scaled, y_train)
print("Stacked model accuracy:", stack.score(X_test_scaled, y_test))


In [None]:
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

#Mapping 2.0 -> 1.0 for clarity
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

print("Before scaling →", "X_train:", X_train.shape, "X_test:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # learns μ/σ on train
X_test_scaled  = scaler.transform(X_test)       # applies same transform

In [None]:
# pip install sklearn-crfsuite

import numpy as np
import pandas as pd
from sklearn_crfsuite import CRF
from sklearn.preprocessing import StandardScaler

# 1) Split X, y per instrument into train/test lists
train_dfs, train_lbls = [], []
test_dfs,  test_lbls  = [], []

for inst, grp in X.groupby(level='inst', sort=False):
    idx = grp.index
    train_dfs.append(X.loc[idx[:500]])
    train_lbls.append([str(v) for v in y.loc[idx[:500]].values])
    test_dfs .append(X.loc[idx[500:]])
    test_lbls .append([str(v) for v in y.loc[idx[500:]].values])

# 2) Global scaling
scaler = StandardScaler()
all_X = pd.concat(train_dfs + test_dfs)
_ = scaler.fit_transform(all_X)
for i in range(len(train_dfs)):
    train_dfs[i] = pd.DataFrame(
        scaler.transform(train_dfs[i]),
        index=train_dfs[i].index,
        columns=train_dfs[i].columns
    )
for i in range(len(test_dfs)):
    test_dfs[i] = pd.DataFrame(
        scaler.transform(test_dfs[i]),
        index=test_dfs[i].index,
        columns=test_dfs[i].columns
    )

# 3) Convert each row to a feature dict
def df_to_feats(df):
    return [
        {f"f{j}": float(val) for j, val in enumerate(row)}
        for row in df.values
    ]

X_seq_train = [df_to_feats(df) for df in train_dfs]
X_seq_test  = [df_to_feats(df) for df in test_dfs]

# 4) Train CRF
crf = CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=200,
    all_possible_transitions=True
)
crf.fit(X_seq_train, train_lbls)

# 5) Predict & evaluate
pred_seq = crf.predict(X_seq_test)
y_pred   = np.concatenate([[int(lbl) for lbl in seq] for seq in pred_seq])
y_true   = np.concatenate([[int(lbl) for lbl in seq] for seq in test_lbls])

accuracy = (y_pred == y_true).mean()
print(f"CRF test accuracy: {accuracy:.4f}")



In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Split & scale exactly as you already have ───────────────────────────
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

# Map labels 2→1, else 0
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

print("Before scaling →", "X_train:", X_train.shape, "X_test:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 1) Build sliding windows ───────────────────────────────────────────────
L = 50  # window length (you can tune this)

def make_windows(X_arr, y_arr, idx_index):
    Xs, ys = [], []
    inst_ids = idx_index.get_level_values(0)
    for inst in np.unique(inst_ids):
        mask = inst_ids == inst
        arr = X_arr[mask]       # (T_inst, n_features)
        labs = y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i : i + L])
            ys.append(labs[i + L - 1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(
    X_train_scaled, y_train.values, X_train.index
)
X_seq_test, y_seq_test = make_windows(
    X_test_scaled,  y_test.values,  X_test.index
)

print("Seq shapes →", X_seq_train.shape, y_seq_train.shape)

# ─── 2) Dataset & DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()  # (N, L, F)
        self.y = torch.from_numpy(y).float()  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

batch_size = 256
train_loader = DataLoader(
    RegimeDataset(X_seq_train, y_seq_train),
    batch_size=batch_size, shuffle=True,  num_workers=0
)
test_loader  = DataLoader(
    RegimeDataset(X_seq_test,  y_seq_test),
    batch_size=batch_size, shuffle=False, num_workers=0
)


# ─── 3) Pure‐LSTM model ─────────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, drop=0.3):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = drop if num_layers>1 else 0.0
        )
        self.drop = nn.Dropout(drop)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, L, F)
        out, (h_n, _) = self.lstm(x)
        # h_n: (num_layers, batch, hidden_dim)
        h_last = h_n[-1]            # (batch, hidden_dim)
        h_last = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model  = LSTMRegime(n_features=X_seq_train.shape[2]).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=1e-3)
crit   = nn.BCELoss()

# ─── 4) Train & evaluate with tqdm ─────────────────────────────────────────
num_epochs = 6
for epoch in range(1, num_epochs+1):
    # — train —
    model.train()
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]", leave=False)
    total_loss = 0
    for xb, yb in train_bar:
        xb, yb = xb.to(device), yb.to(device)
        preds  = model(xb)
        loss   = crit(preds, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()

        batch_loss = loss.item()
        total_loss += batch_loss * xb.size(0)
        train_bar.set_postfix(loss=f"{batch_loss:.4f}")

    avg_loss = total_loss / len(train_loader.dataset)

    # — eval —
    model.eval()
    correct = 0
    eval_bar = tqdm(test_loader, desc=f"Epoch {epoch}/{num_epochs} [Eval ]", leave=False)
    with torch.no_grad():
        for xb, yb in eval_bar:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            acc_batch = (preds == yb).float().mean().item()
            eval_bar.set_postfix(acc=f"{acc_batch:.4f}")

    epoch_acc = correct / len(test_loader.dataset)
    print(f"Epoch {epoch:02d} — Train loss: {avg_loss:.4f},  Test acc: {epoch_acc:.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Split & scale ───────────────────────────────────────────────────────
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

# map labels → 0/1
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 1) Sliding windows ──────────────────────────────────────────────────────
L = 50  # sequence length

def make_windows(X_arr, y_arr, idx_index):
    Xs, ys = [], []
    inst_ids = idx_index.get_level_values(0)
    for inst in np.unique(inst_ids):
        mask = (inst_ids == inst)
        arr = X_arr[mask]
        labs = y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i:i+L])
            ys.append(labs[i+L-1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(X_train_scaled, y_train.values, X_train.index)
X_seq_test,  y_seq_test  = make_windows(X_test_scaled,  y_test.values,  X_test.index)

# ─── 2) Dataset & DataLoader ────────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()  # (N, L, F)
        self.y = torch.from_numpy(y).float()  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

batch_size = 256
# using num_workers=0 to avoid multiprocessing pickling issues
train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                          batch_size=batch_size, shuffle=True,  num_workers=0)
test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                          batch_size=batch_size, shuffle=False, num_workers=0)

# ─── 3) Vanilla RNN model ───────────────────────────────────────────────────
class RNNRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, drop=0.3):
        super().__init__()
        self.rnn = nn.RNN(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            nonlinearity = 'tanh',
            dropout      = drop if num_layers>1 else 0.0
        )
        self.drop = nn.Dropout(drop)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, L, F)
        out, h_n = self.rnn(x)
        # h_n: (num_layers, batch, hidden_dim)
        h_last = h_n[-1]            # (batch, hidden_dim)
        h_last = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model  = RNNRegime(n_features=X_seq_train.shape[2]).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=1e-3)
crit   = nn.BCELoss()

# ─── 4) Train & evaluate ───────────────────────────────────────────────────
num_epochs = 20
for epoch in range(1, num_epochs+1):
    # — training —
    model.train()
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]", leave=False)
    total_loss = 0.0
    for xb, yb in train_bar:
        xb, yb = xb.to(device), yb.to(device)
        preds  = model(xb)
        loss   = crit(preds, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()

        batch_loss = loss.item()
        total_loss += batch_loss * xb.size(0)
        train_bar.set_postfix(loss=f"{batch_loss:.4f}")

    avg_loss = total_loss / len(train_loader.dataset)

    # — evaluation —
    model.eval()
    correct = 0
    eval_bar = tqdm(test_loader, desc=f"Epoch {epoch}/{num_epochs} [Eval ]", leave=False)
    with torch.no_grad():
        for xb, yb in eval_bar:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            batch_acc = (preds == yb).float().mean().item()
            eval_bar.set_postfix(acc=f"{batch_acc:.4f}")

    epoch_acc = correct / len(test_loader.dataset)
    print(f"Epoch {epoch:02d} — Train loss: {avg_loss:.4f},  Test acc: {epoch_acc:.4f}")


In [None]:
# ─── 0) Installs (if you haven’t already) ───────────────────────────────────
# In a notebook cell:
# %pip install --upgrade pip
# %pip install scikit-learn sktime numba

# ─── 1) Imports & assume X, y are already in memory ─────────────────────────
import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sktime.transformations.panel.rocket import MiniRocket
from sklearn.linear_model import SGDClassifier

# ─── 2) Split into train/test by inst, then scale ───────────────────────────
train_parts, train_labels = [], []
test_parts,  test_labels  = [], []

for inst, grp in X.groupby(level='inst', sort=False):
    idx = grp.index
    train_parts.append(X.loc[idx[:500]])
    train_labels.append(y.loc[idx[:500]])
    test_parts .append(X.loc[idx[500:]])
    test_labels .append(y.loc[idx[500:]])

X_train = pd.concat(train_parts)
y_train = (pd.concat(train_labels)==2).astype(int).values
X_test  = pd.concat(test_parts)
y_test  = (pd.concat(test_labels)==2).astype(int).values

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 3) Build sliding‐window arrays ─────────────────────────────────────────
L = 50
def make_windows(X_arr, y_arr, idx):
    Xs, ys = [], []
    insts = idx.get_level_values(0)
    for inst in np.unique(insts):
        mask = insts == inst
        arr  = X_arr[mask]
        labs = y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i : i + L])
            ys.append(labs[i + L - 1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(X_train_scaled, y_train, X_train.index)
X_seq_test,  y_seq_test  = make_windows(X_test_scaled,  y_test,  X_test.index)

print("Windowed shapes:", X_seq_train.shape, y_seq_train.shape)

# ─── 4) Convert to nested DataFrame for sktime ──────────────────────────────
def to_nested(arr3d):
    n, L, F = arr3d.shape
    return pd.DataFrame({
        f"f{i}": [pd.Series(arr3d[j,:,i]) for j in range(n)]
        for i in range(F)
    })

X_train_nested = to_nested(X_seq_train)
X_test_nested  = to_nested(X_seq_test)
print("Nested shape:", X_train_nested.shape)

# ─── 5) Fit MiniRocket & transform ──────────────────────────────────────────
print("Fitting MiniRocket…")
mrocket = MiniRocket(random_state=42, num_kernels=500, n_jobs=1)
t0 = time.time()
mrocket.fit(X_train_nested)
print(f" → done in {time.time()-t0:.1f}s, kernels={mrocket.num_kernels}")

print("Transforming train…")
t1 = time.time()
X_train_feat = mrocket.transform(X_train_nested)
print(f"  Train → {X_train_feat.shape} in {time.time()-t1:.1f}s")

print("Transforming test…")
t2 = time.time()
X_test_feat  = mrocket.transform(X_test_nested)
print(f"  Test  → {X_test_feat.shape} in {time.time()-t2:.1f}s")

# ─── 6) Train & evaluate an online logistic (SGDClassifier) ───────────────
print("Training SGDClassifier…")
clf = SGDClassifier(
    loss="log_loss",
    penalty="l2",
    max_iter=1000,
    tol=1e-3,
    random_state=42,
    n_jobs=1
)
t0 = time.time()
clf.fit(X_train_feat, y_seq_train)
print(f" → trained in {time.time()-t0:.1f}s")

print("Final test accuracy:")
print(f"  MiniRocket + SGDClassifier → {clf.score(X_test_feat, y_seq_test):.4f}")


In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# ─── 1) ASSUME YOU ALREADY HAVE:
#    X_train_scaled, X_test_scaled: numpy arrays of shape (N_train, 59) and (N_test, 59)
#    y_train, y_test: arrays or Series of 0/1 labels of length N_train and N_test

# ─── 2) DYNAMIC RESHAPING ──────────────────────────────────────────────────────
n_features      = X_train_scaled.shape[1]
train_timesteps = 500

# how many instruments in train?
n_instruments = X_train_scaled.shape[0] // train_timesteps

# infer test_timesteps so total_test_rows is divisible by n_instruments
total_test_rows = X_test_scaled.shape[0]
test_timesteps  = total_test_rows // n_instruments

# drop any extra rows if not perfectly divisible
needed = n_instruments * test_timesteps
if total_test_rows != needed:
    drop = total_test_rows - needed
    print(f"Dropping {drop} extra test rows → {total_test_rows} → {needed}")
    X_test_scaled = X_test_scaled[:needed]

# convert y to numpy
y_train_arr = y_train.values if hasattr(y_train, "values") else y_train
y_test_arr  = y_test .values if hasattr(y_test,  "values") else y_test

# drop the same rows in y_test
if total_test_rows != needed:
    y_test_arr = y_test_arr[:needed]

# reshape into (n_instruments, timesteps, n_features) / (n_instruments, timesteps)
X_train_seq = X_train_scaled.reshape(n_instruments, train_timesteps, n_features)
X_test_seq  = X_test_scaled .reshape(n_instruments, test_timesteps,  n_features)

y_train_seq = y_train_arr.reshape(n_instruments, train_timesteps)
y_test_seq  = y_test_arr .reshape(n_instruments, test_timesteps)

print("X_train_seq:", X_train_seq.shape)
print("X_test_seq: ", X_test_seq.shape)
print("y_train_seq:", y_train_seq.shape)
print("y_test_seq: ", y_test_seq.shape)


# ─── 3) DATASET & DATALOADER ─────────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        # X: (n_seq, seq_len, n_features), y: (n_seq, seq_len)
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()
    def __len__(self):
        return self.X.size(0)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size  = 8
train_ds    = RegimeDataset(X_train_seq, y_train_seq)
test_ds     = RegimeDataset(X_test_seq,  y_test_seq)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size)


# ─── 4) MODEL DEFINITION ─────────────────────────────────────────────────────
class BiRNN(nn.Module):
    def __init__(self, rnn_type="LSTM", hidden1=64, hidden2=32, dropout=0.2):
        super().__init__()
        rnn_cls = nn.LSTM if rnn_type == "LSTM" else nn.GRU

        # first bidirectional layer
        self.rnn1 = rnn_cls(
            input_size=n_features, hidden_size=hidden1,
            num_layers=1, batch_first=True, bidirectional=True
        )
        self.do1  = nn.Dropout(dropout)

        # second bidirectional layer
        self.rnn2 = rnn_cls(
            input_size=hidden1*2, hidden_size=hidden2,
            num_layers=1, batch_first=True, bidirectional=True
        )
        self.do2  = nn.Dropout(dropout)

        # per-timestep binary output
        self.fc   = nn.Linear(hidden2*2, 1)

    def forward(self, x):
        # x: (batch, seq_len, n_features)
        out, _ = self.rnn1(x)      # → (batch, seq_len, hidden1*2)
        out     = self.do1(out)
        out, _ = self.rnn2(out)    # → (batch, seq_len, hidden2*2)
        out     = self.do2(out)
        logits  = self.fc(out)     # → (batch, seq_len, 1)
        return torch.sigmoid(logits).squeeze(-1)  # → (batch, seq_len)


# instantiate model — choose "LSTM" or "GRU"
model = BiRNN(rnn_type="LSTM")
# model = BiRNN(rnn_type="GRU")


# ─── 5) TRAINING SETUP ────────────────────────────────────────────────────────
device    = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 20
for epoch in range(1, num_epochs+1):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()

        y_pred = model(X_batch)             # (batch, seq_len)
        loss   = criterion(y_pred, y_batch) # BCELoss over all timesteps
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.size(0)

    avg_loss = running_loss / len(train_loader.dataset)

    # validation accuracy
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            preds = (model(X_batch) > 0.5).float()
            correct += (preds == y_batch).sum().item()
            total   += y_batch.numel()
    val_acc = correct / total

    print(f"Epoch {epoch:02d} — Train Loss: {avg_loss:.4f}, Val Acc: {val_acc:.4f}")


# ─── 6) FINAL EVALUATION ─────────────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        preds = (model(X_batch) > 0.5).float()
        correct += (preds == y_batch).sum().item()
        total   += y_batch.numel()

print(f"\nTest Accuracy: {correct/total:.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Split & scale exactly as you already have ───────────────────────────
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

# Map labels 2→1, else 0
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

print("Before scaling →", "X_train:", X_train.shape, "X_test:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 1) Build sliding windows ───────────────────────────────────────────────
L = 60  # window length (you can tune this)

def make_windows(X_arr, y_arr, idx_index):
    Xs, ys = [], []
    inst_ids = idx_index.get_level_values(0)
    for inst in np.unique(inst_ids):
        mask = inst_ids == inst
        arr = X_arr[mask]       # (T_inst, n_features)
        labs = y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i : i + L])
            ys.append(labs[i + L - 1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(
    X_train_scaled, y_train.values, X_train.index
)
X_seq_test, y_seq_test = make_windows(
    X_test_scaled,  y_test.values,  X_test.index
)

print("Seq shapes →", X_seq_train.shape, y_seq_train.shape)

# ─── 2) Dataset & DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()  # (N, L, F)
        self.y = torch.from_numpy(y).float()  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

batch_size = 256
train_loader = DataLoader(
    RegimeDataset(X_seq_train, y_seq_train),
    batch_size=batch_size, shuffle=True,  num_workers=0
)
test_loader  = DataLoader(
    RegimeDataset(X_seq_test,  y_seq_test),
    batch_size=batch_size, shuffle=False, num_workers=0
)


# ─── 3) Pure‐LSTM model ─────────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, drop=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = drop if num_layers>1 else 0.0
        )
        self.drop = nn.Dropout(drop)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, L, F)
        out, (h_n, _) = self.lstm(x)
        # h_n: (num_layers, batch, hidden_dim)
        h_last = h_n[-1]            # (batch, hidden_dim)
        h_last = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model  = LSTMRegime(n_features=X_seq_train.shape[2]).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=5e-4)
crit   = nn.BCELoss()

# ─── 4) Train & evaluate with tqdm ─────────────────────────────────────────
num_epochs = 10
for epoch in range(1, num_epochs+1):
    # — train —
    model.train()
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]", leave=False)
    total_loss = 0
    for xb, yb in train_bar:
        xb, yb = xb.to(device), yb.to(device)
        preds  = model(xb)
        loss   = crit(preds, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()

        batch_loss = loss.item()
        total_loss += batch_loss * xb.size(0)
        train_bar.set_postfix(loss=f"{batch_loss:.4f}")

    avg_loss = total_loss / len(train_loader.dataset)

    # — eval —
    model.eval()
    correct = 0
    eval_bar = tqdm(test_loader, desc=f"Epoch {epoch}/{num_epochs} [Eval ]", leave=False)
    with torch.no_grad():
        for xb, yb in eval_bar:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            acc_batch = (preds == yb).float().mean().item()
            eval_bar.set_postfix(acc=f"{acc_batch:.4f}")

    epoch_acc = correct / len(test_loader.dataset)
    print(f"Epoch {epoch:02d} — Train loss: {avg_loss:.4f},  Test acc: {epoch_acc:.4f}")


In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# You’ll need: pip install torch-crf
from torchcrf import CRF

# ─── 1) ASSUME YOU ALREADY HAVE:
#    X_train_scaled, X_test_scaled: numpy arrays of shape (N_train, 59) & (N_test, 59)
#    y_train, y_test: arrays or pandas Series of 0/1 labels of length N_train & N_test

# ─── 2) DYNAMIC RESHAPING INTO SEQUENCES ──────────────────────────────────────
n_features      = X_train_scaled.shape[1]
train_timesteps = 500

# compute how many instruments in train
n_instruments   = X_train_scaled.shape[0] // train_timesteps

# infer test_timesteps so total_test_rows is divisible by n_instruments
total_test_rows = X_test_scaled.shape[0]
test_timesteps  = total_test_rows // n_instruments

# drop any leftover rows
needed = n_instruments * test_timesteps
if total_test_rows != needed:
    print(f"Dropping {total_test_rows - needed} extra test rows")
    X_test_scaled = X_test_scaled[:needed]

# convert y to numpy and drop same extras
y_train_arr = y_train.values if hasattr(y_train, "values") else y_train
y_test_arr  = y_test .values if hasattr(y_test,  "values") else y_test
if total_test_rows != needed:
    y_test_arr = y_test_arr[:needed]

# reshape to (n_instruments, timesteps, features) and (n_instruments, timesteps)
X_train_seq = X_train_scaled.reshape(n_instruments, train_timesteps, n_features)
X_test_seq  = X_test_scaled .reshape(n_instruments, test_timesteps,  n_features)
y_train_seq = y_train_arr.reshape(  n_instruments, train_timesteps)
y_test_seq  = y_test_arr.reshape(   n_instruments, test_timesteps)

print("X_train_seq:", X_train_seq.shape)  # → (n_instruments, 500, 59)
print("X_test_seq: ", X_test_seq.shape)   # → (n_instruments, test_timesteps, 59)

# ─── 3) DATASET & DATALOADER ─────────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        # X: (n_seq, seq_len, n_features), y: (n_seq, seq_len)
        self.X = torch.from_numpy(X).float()
        # CRF expects LongTensor labels
        self.y = torch.from_numpy(y).long()
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size   = 8
train_loader = DataLoader(RegimeDataset(X_train_seq, y_train_seq),
                          batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(RegimeDataset(X_test_seq,  y_test_seq),
                          batch_size=batch_size)


# ─── 4) BI-LSTM + CRF MODEL ──────────────────────────────────────────────────
class BiLSTM_CRF(nn.Module):
    def __init__(self,
                 in_features,
                 lstm_hidden=64,
                 dropout=0.2,
                 num_tags=2  # Bear, Bull
                 ):
        super().__init__()
        # bidirectional LSTM
        self.lstm = nn.LSTM(input_size=in_features,
                            hidden_size=lstm_hidden,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)
        self.dropout = nn.Dropout(dropout)
        # project LSTM outputs to emission scores for each tag
        self.hidden2tag = nn.Linear(lstm_hidden * 2, num_tags)
        # CRF layer
        self.crf = CRF(num_tags, batch_first=True)

    def forward(self, x):
        # x: (batch, seq_len, in_features)
        lstm_out, _ = self.lstm(x)           # → (batch, seq_len, hidden*2)
        feats       = self.hidden2tag(self.dropout(lstm_out))
        # decode returns list of tag sequences
        return self.crf.decode(feats)

    def neg_log_likelihood(self, x, tags):
        # tags: (batch, seq_len) with values in {0,1}
        lstm_out, _ = self.lstm(x)
        feats       = self.hidden2tag(self.dropout(lstm_out))
        # crf returns log likelihood of the gold tags; we minimize -ll
        return -self.crf(feats, tags)


# instantiate
model = BiLSTM_CRF(in_features=n_features).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# ─── 5) TRAIN/VALID LOOP ─────────────────────────────────────────────────────
device    = next(model.parameters()).device
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 15

for epoch in range(1, num_epochs+1):
    model.train()
    total_loss = 0.0
    for Xb, yb in train_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        optimizer.zero_grad()
        loss = model.neg_log_likelihood(Xb, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * Xb.size(0)
    train_loss = total_loss / len(train_loader.dataset)

    # validation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for Xb, yb in test_loader:
            Xb, yb = Xb.to(device), yb.to(device)
            preds = model(Xb)  # list of lists, length=batch
            # convert to tensor
            preds_tensor = torch.stack([torch.tensor(p, device=device) for p in preds])
            correct += (preds_tensor == yb).sum().item()
            total   += yb.numel()
    val_acc = correct / total

    print(f"Epoch {epoch:02d} — Train NLL: {train_loss:.4f}, Val Acc: {val_acc:.4f}")


# ─── 6) FINAL EVALUATION ─────────────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for Xb, yb in test_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        preds = model(Xb)
        preds_tensor = torch.stack([torch.tensor(p, device=device) for p in preds])
        correct += (preds_tensor == yb).sum().item()
        total   += yb.numel()
print(f"\nTest Accuracy: {correct/total:.4f}")


In [None]:
print('Hello')

In [None]:
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

# ─── 0) Your existing data split & scaling ──────────────────────────────────
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

# Map labels 2→1, else 0
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 1) Build sliding windows ───────────────────────────────────────────────
L = 60  # window length

def make_windows(X_arr, y_arr, idx_index):
    Xs, ys = [], []
    inst_ids = idx_index.get_level_values(0)
    for inst in np.unique(inst_ids):
        mask = inst_ids == inst
        arr = X_arr[mask]
        labs = y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i : i + L])
            ys.append(labs[i + L - 1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(
    X_train_scaled, y_train.values, X_train.index
)
X_seq_test, y_seq_test = make_windows(
    X_test_scaled,  y_test.values,  X_test.index
)

# ─── 2) Dataset & DataLoader wrapper ────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()  # (N, L, F)
        self.y = torch.from_numpy(y).float()  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

# ─── 3) LSTM model definition ────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, drop=0.3):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = drop if num_layers>1 else 0.0
        )
        self.drop = nn.Dropout(drop)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, (h_n, _) = self.lstm(x)
        h_last = h_n[-1]            # (batch, hidden_dim)
        h_last = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

# ─── 4) Hyperparameter grid ──────────────────────────────────────────────────
param_grid = {
    "hidden_dim":  [64, 128],
    "num_layers":  [1, 2],
    "dropout":     [0.2, 0.4],
    "lr":          [1e-3, 5e-4],
    "batch_size":  [128, 256]
}
grid = list(ParameterGrid(param_grid))

# ─── 5) Training settings with time limit ───────────────────────────────────
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 10
time_limit = 30 * 60  # 30 minutes
start_time = time.time()

results = []

for i, params in enumerate(grid, 1):
    elapsed = time.time() - start_time
    if elapsed > time_limit:
        print(f"Time limit reached ({elapsed:.0f}s), stopping grid search.")
        break

    print(f"\n[{i}/{len(grid)}] Testing params: {params}")
    # DataLoaders for this batch size
    bs = params["batch_size"]
    train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                              batch_size=bs, shuffle=True)
    test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                              batch_size=bs, shuffle=False)

    # Model, optimizer, criterion
    model = LSTMRegime(
        n_features = X_seq_train.shape[2],
        hidden_dim = params["hidden_dim"],
        num_layers = params["num_layers"],
        drop       = params["dropout"]
    ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
    criterion = nn.BCELoss()

    # Training loop
    for epoch in range(1, num_epochs+1):
        model.train()
        total_loss = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss  = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * xb.size(0)
        avg_loss = total_loss / len(train_loader.dataset)

        print(f"  Epoch {epoch}/{num_epochs} — loss: {avg_loss:.4f}")

    # Evaluation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in test_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            total   += yb.numel()
    test_acc = correct / total
    print(f"  → Test Accuracy: {test_acc:.4f}")

    results.append({**params, "test_acc": test_acc})

# ─── 6) Best hyperparameters ────────────────────────────────────────────────
best = max(results, key=lambda x: x["test_acc"])
print("\nBest hyperparameters and accuracy:")
print(best)


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Data split & scaling ─────────────────────────────────────────────────
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts); y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts);  y_test  = pd.concat(test_y_parts)

# Map labels 2→1, else 0
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 1) Sliding windows (L=60) ───────────────────────────────────────────────
L = 60
def make_windows(X_arr, y_arr, idx_index):
    Xs, ys = [], []
    inst_ids = idx_index.get_level_values(0)
    for inst in np.unique(inst_ids):
        mask = inst_ids == inst
        arr, labs = X_arr[mask], y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i : i + L])
            ys.append(labs[i + L - 1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(X_train_scaled, y_train.values, X_train.index)
X_seq_test,  y_seq_test  = make_windows(X_test_scaled,  y_test.values,  X_test.index)

# ─── 2) Dataset & DataLoader ─────────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()  # (N, L, F)
        self.y = torch.from_numpy(y).float()  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

batch_size = 256  # from optimal grid search
train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                          batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                          batch_size=batch_size, shuffle=False)

# ─── 3) Model definition ─────────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, drop=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = 0.0  # dropout only applies if num_layers > 1
        )
        self.drop = nn.Dropout(0.2)  # optimal dropout
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, L, features)
        out, (h_n, _) = self.lstm(x)
        h_last = h_n[-1]                    # (batch, hidden_dim)
        h_last = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

# ─── 4) Training setup ───────────────────────────────────────────────────────
device    = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model     = LSTMRegime(
    n_features = X_seq_train.shape[2],
    hidden_dim = 64,   # optimal
    num_layers = 1,    # optimal
    drop       = 0.2   # optimal
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)  # optimal LR
criterion = nn.BCELoss()

num_epochs = 10
for epoch in range(1, num_epochs+1):
    # — train —
    model.train()
    total_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]"):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss  = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
    train_loss = total_loss / len(train_loader.dataset)

    # — evaluate —
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in tqdm(test_loader, desc=f"Epoch {epoch}/{num_epochs} [Eval ]"):
            xb, yb = xb.to(device), yb.to(device)
            pred_labels = (model(xb) > 0.5).float()
            correct     += (pred_labels == yb).sum().item()
            total       += yb.numel()
    test_acc = correct / total

    print(f"\nEpoch {epoch:02d} — Train Loss: {train_loss:.4f}, Test Acc: {test_acc:.4f}\n")

# ─── 5) Final evaluation ─────────────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred_labels = (model(xb) > 0.5).float()
        correct     += (pred_labels == yb).sum().item()
        total       += yb.numel()
print(f"Final Test Accuracy: {correct/total:.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Split & scale exactly as you already have ───────────────────────────
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

# Map labels 2→1, else 0
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

print("Before scaling →", "X_train:", X_train.shape, "X_test:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 1) Build sliding windows ───────────────────────────────────────────────
L = 60  # window length (you can tune this)

def make_windows(X_arr, y_arr, idx_index):
    Xs, ys = [], []
    inst_ids = idx_index.get_level_values(0)
    for inst in np.unique(inst_ids):
        mask = inst_ids == inst
        arr = X_arr[mask]       # (T_inst, n_features)
        labs = y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i : i + L])
            ys.append(labs[i + L - 1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(
    X_train_scaled, y_train.values, X_train.index
)
X_seq_test, y_seq_test = make_windows(
    X_test_scaled,  y_test.values,  X_test.index
)

print("Seq shapes →", X_seq_train.shape, y_seq_train.shape)

# ─── 2) Dataset & DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()  # (N, L, F)
        self.y = torch.from_numpy(y).float()  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

batch_size = 256
train_loader = DataLoader(
    RegimeDataset(X_seq_train, y_seq_train),
    batch_size=batch_size, shuffle=True,  num_workers=0
)
test_loader  = DataLoader(
    RegimeDataset(X_seq_test,  y_seq_test),
    batch_size=batch_size, shuffle=False, num_workers=0
)


# ─── 3) Pure‐LSTM model ─────────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, drop=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = drop if num_layers>1 else 0.0
        )
        self.drop = nn.Dropout(drop)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, L, F)
        out, (h_n, _) = self.lstm(x)
        # h_n: (num_layers, batch, hidden_dim)
        h_last = h_n[-1]            # (batch, hidden_dim)
        h_last = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model  = LSTMRegime(n_features=X_seq_train.shape[2]).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=5e-4)
crit   = nn.BCELoss()

# ─── 4) Train & evaluate with tqdm ─────────────────────────────────────────
num_epochs = 10
for epoch in range(1, num_epochs+1):
    # — train —
    model.train()
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]", leave=False)
    total_loss = 0
    for xb, yb in train_bar:
        xb, yb = xb.to(device), yb.to(device)
        preds  = model(xb)
        loss   = crit(preds, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()

        batch_loss = loss.item()
        total_loss += batch_loss * xb.size(0)
        train_bar.set_postfix(loss=f"{batch_loss:.4f}")

    avg_loss = total_loss / len(train_loader.dataset)

    # — eval —
    model.eval()
    correct = 0
    eval_bar = tqdm(test_loader, desc=f"Epoch {epoch}/{num_epochs} [Eval ]", leave=False)
    with torch.no_grad():
        for xb, yb in eval_bar:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            acc_batch = (preds == yb).float().mean().item()
            eval_bar.set_postfix(acc=f"{acc_batch:.4f}")

    epoch_acc = correct / len(test_loader.dataset)
    print(f"Epoch {epoch:02d} — Train loss: {avg_loss:.4f},  Test acc: {epoch_acc:.4f}")


In [None]:
import torch
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from tqdm import tqdm

# 1) Evaluation helper
def eval_accuracy(model, X_seq, y_seq, batch_size=256):
    ds   = RegimeDataset(X_seq, y_seq)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False)
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            total   += yb.numel()
    return correct / total

# 2) Compute baseline accuracy
baseline_acc = eval_accuracy(model, X_seq_test, y_seq_test)
print(f"Baseline Test Accuracy: {baseline_acc:.4f}")

# 3) Ablate each feature
F = X_seq_test.shape[2]
importances = []
for f in tqdm(range(F), desc="Ablating features"):
    X_abl = X_seq_test.copy()
    X_abl[..., f] = 0.0
    acc = eval_accuracy(model, X_abl, y_seq_test)
    importances.append(baseline_acc - acc)

# 4) Build & display a sorted DataFrame
df_imp = pd.DataFrame({
    "feature_idx": np.arange(F),
    "importance": importances
}).sort_values("importance", ascending=False).reset_index(drop=True)

print(df_imp)


In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader

# ─── assume df is your original DataFrame with MultiIndex (inst, time),
#     and its columns are exactly the features in X_seq_* order ──────────────
#     e.g. df.columns = ["close", "log_price", "roll_pct_20", …, "vol_ratio_30"]

# 1) grab the feature names
feature_names = list(df.columns)   # length should be F=59

# 2) (re-)compute importances if you don’t already have them:
def eval_accuracy(model, X_seq, y_seq, batch_size=256):
    ds     = RegimeDataset(X_seq, y_seq)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False)
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            total   += yb.numel()
    return correct / total

baseline_acc = eval_accuracy(model, X_seq_test, y_seq_test)

importances = []
F = X_seq_test.shape[2]
for f in tqdm(range(F), desc="Ablating features"):
    X_abl = X_seq_test.copy()
    X_abl[..., f] = 0.0
    acc = eval_accuracy(model, X_abl, y_seq_test)
    importances.append(baseline_acc - acc)

# 3) build a DataFrame and map names
df_imp = pd.DataFrame({
    "feature_idx": np.arange(F),
    "importance": importances
})
df_imp["feature_name"] = df_imp["feature_idx"].map(lambda i: feature_names[i])

# 4) sort by importance descending
df_imp = df_imp.sort_values("importance", ascending=False).reset_index(drop=True)

# 5) show
print(df_imp[["feature_idx","feature_name","importance"]])


In [None]:
# 1) Identify negatively‐important features
neg_feats = df_imp.loc[df_imp["importance"] < 0, "feature_name"].tolist()
print("Removing these negatively‐important features:", neg_feats)

# 2) Drop them from your original DataFrame
df_pruned = df.drop(columns=neg_feats)

# 3) (Optional) Verify
print("Original df columns:", len(df.columns))
print("Pruned df columns:  ", len(df_pruned.columns))
print("Remaining features:", df_pruned.columns.tolist())


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Split & scale exactly as you already have ───────────────────────────
train_X_parts, train_y_parts = [], []
test_X_parts,  test_y_parts  = [], []

for inst, X_inst in X.groupby(level='inst', sort=False):
    idx = X_inst.index
    train_idx, test_idx = idx[:500], idx[500:]
    train_X_parts.append(X.loc[train_idx])
    train_y_parts.append(y.loc[train_idx])
    test_X_parts .append(X.loc[test_idx])
    test_y_parts .append(y.loc[test_idx])

X_train = pd.concat(train_X_parts)
y_train = pd.concat(train_y_parts)
X_test  = pd.concat(test_X_parts)
y_test  = pd.concat(test_y_parts)

# Map labels 2→1, else 0
y_train = (y_train == 2).astype(int)
y_test  = (y_test  == 2).astype(int)

print("Before scaling →", "X_train:", X_train.shape, "X_test:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── 1) Build sliding windows ───────────────────────────────────────────────
L = 60  # window length

def make_windows(X_arr, y_arr, idx_index):
    Xs, ys = [], []
    inst_ids = idx_index.get_level_values(0)
    for inst in np.unique(inst_ids):
        mask = inst_ids == inst
        arr  = X_arr[mask]       # (T_inst, n_features)
        labs = y_arr[mask]
        for i in range(len(arr) - L):
            Xs.append(arr[i : i + L])
            ys.append(labs[i + L - 1])
    return np.stack(Xs), np.array(ys)

X_seq_train, y_seq_train = make_windows(
    X_train_scaled, y_train.values, X_train.index
)
X_seq_test, y_seq_test = make_windows(
    X_test_scaled,  y_test.values,  X_test.index
)

print("Seq shapes →", X_seq_train.shape, y_seq_train.shape)

# ─── 2) Dataset & DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()  # (N, L, F)
        self.y = torch.from_numpy(y).float()  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return self.X[i], self.y[i]

batch_size = 256
train_loader = DataLoader(
    RegimeDataset(X_seq_train, y_seq_train),
    batch_size=batch_size, shuffle=True,  num_workers=0
)
test_loader  = DataLoader(
    RegimeDataset(X_seq_test,  y_seq_test),
    batch_size=batch_size, shuffle=False, num_workers=0
)

# ─── 3) Pure‐LSTM model ─────────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, drop=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = drop if num_layers>1 else 0.0
        )
        self.drop = nn.Dropout(drop)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, L, F)
        out, (h_n, _) = self.lstm(x)
        h_last = h_n[-1]            # (batch, hidden_dim)
        h_last = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model  = LSTMRegime(n_features=X_seq_train.shape[2]).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=5e-4)
crit   = nn.BCELoss()

# ─── 4) Train ───────────────────────────────────────────────────────────────
num_epochs = 10
for epoch in range(1, num_epochs+1):
    model.train()
    total_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]"):
        xb, yb = xb.to(device), yb.to(device)
        preds  = model(xb)
        loss   = crit(preds, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()
        total_loss += loss.item() * xb.size(0)
    avg_loss = total_loss / len(train_loader.dataset)
    print(f"Epoch {epoch:02d} Train loss: {avg_loss:.4f}")

# ─── 5) Evaluation helper ───────────────────────────────────────────────────
def eval_accuracy(model, X_seq, y_seq, batch_size=256):
    ds     = RegimeDataset(X_seq, y_seq)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False)
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = (model(xb) > 0.5).float()
            correct += (preds == yb).sum().item()
            total   += yb.numel()
    return correct / total

# ─── 6) Baseline full‐59 evaluation ─────────────────────────────────────────
base_acc = eval_accuracy(model, X_seq_test, y_seq_test)
print(f"Baseline Test Accuracy (59 features): {base_acc:.4f}")

# ─── 7) Ablate the 15 negative features at inference ───────────────────────
negative_feats = [
    'donch_pct_50','macd_line','streak_down','velocity','adx_14',
    'log_price','close','price_minus_sma_50','sma_12_26_diff',
    'lr_std_100','bb_width_30','percent_b_100','std_20','std_10','lr_std_30'
]
all_features = list(X.columns)  # original 59 feature names
neg_idx = [all_features.index(f) for f in negative_feats]

# zero‐out those dims in the test windows
X_seq_test_ablate = X_seq_test.copy()
X_seq_test_ablate[..., neg_idx] = 0.0

ablate_acc = eval_accuracy(model, X_seq_test_ablate, y_seq_test)
print(f"Ablated Test Accuracy (zeroed 15 negatives): {ablate_acc:.4f}")
print(f"Δ Accuracy: {ablate_acc - base_acc:+.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Assume you have:
#    X: DataFrame with MultiIndex (inst, time) and 59 feature columns
#    y: Series  with the same MultiIndex, values in {1,2}

# ─── 1) Build per‐instrument arrays of exactly 750 timesteps ──────────────
raw_X_parts = []
raw_y_parts = []
for inst, grp in X.groupby(level='inst', sort=False):
    arr  = grp.iloc[:750].values                # shape (750, 59)
    labs = y.loc[grp.index[:750]].values        # shape (750,)
    raw_X_parts.append(arr)
    raw_y_parts.append(labs)

X_all = np.stack(raw_X_parts)  # (n_inst, 750, 59)
y_all = np.stack(raw_y_parts)  # (n_inst, 750)

# ─── 2) Remap labels 1→0, 2→1 ─────────────────────────────────────────────
y_all = (y_all == 2).astype(np.float32)  # now in {0.0,1.0}

n_inst, T, F = X_all.shape

# ─── 3) Fit scaler on train portion (first 450 steps) and transform all ───
scaler = StandardScaler()
flat_train = X_all[:, :450, :].reshape(-1, F)  # (n_inst*450, 59)
scaler.fit(flat_train)

X_all_scaled = scaler.transform(X_all.reshape(-1, F)).reshape(n_inst, T, F)

# ─── 4) Create sliding windows (L=60) and split by end‐index ─────────────
L = 60
Xw_train, yw_train = [], []
Xw_val,   yw_val   = [], []
Xw_test,  yw_test  = [], []

for inst_idx in range(n_inst):
    series = X_all_scaled[inst_idx]  # (750,59)
    labels = y_all[inst_idx]         # (750,)
    for i in range(T - L + 1):       # 0 .. 690
        window = series[i : i+L]     # (60,59)
        lab    = labels[i + L - 1]   # float 0.0/1.0
        end_t  = i + L - 1
        if end_t < 450:
            Xw_train.append(window); yw_train.append(lab)
        elif end_t < 600:
            Xw_val.append(window);   yw_val.append(lab)
        else:
            Xw_test.append(window);  yw_test.append(lab)

X_seq_train = np.stack(Xw_train)
y_seq_train = np.array(yw_train, dtype=np.float32)
X_seq_val   = np.stack(Xw_val)
y_seq_val   = np.array(yw_val,   dtype=np.float32)
X_seq_test  = np.stack(Xw_test)
y_seq_test  = np.array(yw_test,  dtype=np.float32)

print("Train windows:", X_seq_train.shape, y_seq_train.shape)
print(" Val windows:", X_seq_val.shape,   y_seq_val.shape)
print("Test windows:", X_seq_test.shape,  y_seq_test.shape)

# ─── 5) Dataset + DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)       # (N,60,59)
        self.y = torch.from_numpy(y)       # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 256
train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                          batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(RegimeDataset(X_seq_val,   y_seq_val),
                          batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                          batch_size=batch_size, shuffle=False)

# ─── 6) Define your LSTM regime‐classifier ─────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size  = n_features,
            hidden_size = hidden_dim,
            num_layers  = num_layers,
            batch_first = True,
            dropout     = (dropout if num_layers > 1 else 0.0)
        )
        self.drop = nn.Dropout(dropout)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, (h_n, _) = self.lstm(x)
        h_last        = h_n[-1]               # (batch, hidden_dim)
        h_last        = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = LSTMRegime(n_features=F).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=5e-4)
crit   = nn.BCELoss()

# ─── 7) Train on first 450, validate on next 150 ──────────────────────────
num_epochs = 10
for epoch in range(1, num_epochs+1):
    # — training —
    model.train()
    train_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f"[Train] Epoch {epoch}/{num_epochs}"):
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        preds = model(xb)
        loss  = crit(preds, yb)
        loss.backward()
        opt.step()
        train_loss += loss.item() * xb.size(0)
    train_loss /= len(train_loader.dataset)

    # — validation (frozen) —
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb   = xb.to(device), yb.to(device)
            plabels  = (model(xb) > 0.5).float()
            correct += (plabels == yb).sum().item()
            total   += yb.numel()
    val_acc = correct / total

    print(f"Epoch {epoch:02d} — Train Loss: {train_loss:.4f}, Val Acc: {val_acc:.4f}")

# ─── 8) Final test evaluation ───────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb   = xb.to(device), yb.to(device)
        plabels  = (model(xb) > 0.5).float()
        correct += (plabels == yb).sum().item()
        total   += yb.numel()
test_acc = correct / total
print(f"Final Test Accuracy: {test_acc:.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Assume you have:
#    X: DataFrame with MultiIndex (inst, time) and 59 feature columns
#    y: Series  with the same MultiIndex, values in {1,2}

# ─── 1) Build per‐instrument arrays of exactly 750 timesteps ──────────────
raw_X_parts = []
raw_y_parts = []
for inst, grp in X.groupby(level='inst', sort=False):
    arr  = grp.iloc[:750].values                # (750,59)
    labs = y.loc[grp.index[:750]].values        # (750,)
    raw_X_parts.append(arr)
    raw_y_parts.append(labs)

X_all = np.stack(raw_X_parts)  # (n_inst, 750, 59)
y_all = np.stack(raw_y_parts)  # (n_inst, 750)

n_inst, T, F = X_all.shape

# ─── 2) Remap labels 1→0, 2→1 and cast to float32 ────────────────────────
y_all = (y_all == 2).astype(np.float32)

# ─── 3) Fit scaler on train portion (first 450 steps) ────────────────────
scaler = StandardScaler()
flat_train = X_all[:, :450, :].reshape(-1, F)   # (n_inst*450, 59)
scaler.fit(flat_train)

# apply to all data and cast to float32
X_all_scaled = scaler.transform(X_all.reshape(-1, F)) \
                   .reshape(n_inst, T, F) \
                   .astype(np.float32)

# ─── 4) Create sliding windows (L=60) and split by last index ────────────
L = 60
Xw_train, yw_train = [], []
Xw_val,   yw_val   = [], []
Xw_test,  yw_test  = [], []

for inst_idx in range(n_inst):
    series = X_all_scaled[inst_idx]  # (750,59), dtype=float32
    labels = y_all[inst_idx]         # (750,), dtype=float32
    for i in range(T - L + 1):
        window = series[i : i + L]       # (60,59)
        lab    = labels[i + L - 1]       # scalar float32
        end_t  = i + L - 1
        if end_t < 450:
            Xw_train.append(window); yw_train.append(lab)
        elif end_t < 600:
            Xw_val.append(window);   yw_val.append(lab)
        else:
            Xw_test.append(window);  yw_test.append(lab)

# stack and ensure float32
X_seq_train = np.stack(Xw_train).astype(np.float32)
y_seq_train = np.array(yw_train, dtype=np.float32)
X_seq_val   = np.stack(Xw_val).astype(np.float32)
y_seq_val   = np.array(yw_val,   dtype=np.float32)
X_seq_test  = np.stack(Xw_test).astype(np.float32)
y_seq_test  = np.array(yw_test,  dtype=np.float32)

print("Train windows:", X_seq_train.shape, y_seq_train.shape)
print(" Val windows:", X_seq_val.shape,   y_seq_val.shape)
print("Test windows:", X_seq_test.shape,  y_seq_test.shape)

# ─── 5) Dataset + DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        # both X and y are np.float32
        self.X = torch.from_numpy(X)       # yields FloatTensor
        self.y = torch.from_numpy(y)       # FloatTensor
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 256
train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                          batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(RegimeDataset(X_seq_val,   y_seq_val),
                          batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                          batch_size=batch_size, shuffle=False)

# ─── 6) LSTM model ─────────────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = (dropout if num_layers>1 else 0.0)
        )
        self.drop = nn.Dropout(dropout)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x is FloatTensor
        out, (h_n, _) = self.lstm(x)
        h_last        = h_n[-1]           # (batch, hidden_dim)
        h_last        = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = LSTMRegime(n_features=F).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=5e-4)
crit   = nn.BCELoss()

# ─── 7) Train on first 450, validate on next 150 ──────────────────────────
num_epochs = 10
for epoch in range(1, num_epochs+1):
    # — training —
    model.train()
    train_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f"[Train] Epoch {epoch}/{num_epochs}"):
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        preds = model(xb)
        loss  = crit(preds, yb)
        loss.backward()
        opt.step()
        train_loss += loss.item() * xb.size(0)
    train_loss /= len(train_loader.dataset)

    # — validation (frozen) —
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb  = xb.to(device), yb.to(device)
            plabels = (model(xb) > 0.5).float()
            correct += (plabels == yb).sum().item()
            total   += yb.numel()
    val_acc = correct / total

    print(f"Epoch {epoch:02d} — Train Loss: {train_loss:.4f}, Val Acc: {val_acc:.4f}")

# ─── 8) Final test evaluation ───────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb  = xb.to(device), yb.to(device)
        plabels = (model(xb) > 0.5).float()
        correct += (plabels == yb).sum().item()
        total   += yb.numel()
test_acc = correct / total
print(f"Final Test Accuracy: {test_acc:.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Assume:
#     X: DataFrame with MultiIndex (inst, time) and 59 feature columns
#     y: Series  with same MultiIndex, values in {1,2}

# ─── 1) Build per‐instrument arrays (750 timesteps each) ───────────────────
raw_X_parts = []
raw_y_parts = []
for inst, grp in X.groupby(level='inst', sort=False):
    # take exactly 750 rows
    arr  = grp.iloc[:750].values                 # shape (750, 59)
    labs = (y.loc[grp.index[:750]] == 2).astype(np.float32).values  # map 1→0,2→1
    raw_X_parts.append(arr)
    raw_y_parts.append(labs)

X_all = np.stack(raw_X_parts).astype(np.float32)  # (n_inst, 750, 59)
y_all = np.stack(raw_y_parts).astype(np.float32)  # (n_inst, 750)

n_inst, T, F = X_all.shape

# ─── 2) Fit scaler on train portion (first 600 steps) and transform all ───
scaler = StandardScaler()
flat_train = X_all[:, :600, :].reshape(-1, F)    # (n_inst*600, 59)
scaler.fit(flat_train)

X_all_scaled = scaler.transform(X_all.reshape(-1, F)) \
                   .reshape(n_inst, T, F) \
                   .astype(np.float32)

# ─── 3) Create sliding windows (L=60) and split into train/test ───────────
L = 60
Xw_train, yw_train = [], []
Xw_test,  yw_test  = [], []

for inst_idx in range(n_inst):
    series = X_all_scaled[inst_idx]  # (750,59)
    labels = y_all[inst_idx]         # (750,)
    for i in range(T - L + 1):       # i = 0 ... 690
        window = series[i : i + L]   # (60,59)
        lab    = labels[i + L - 1]   # float 0.0 or 1.0
        end_t  = i + L - 1           # index of that label
        if end_t < 600:
            Xw_train.append(window); yw_train.append(lab)
        else:
            Xw_test.append(window);  yw_test.append(lab)

# Stack into arrays
X_seq_train = np.stack(Xw_train)             # (n_train_windows, 60, 59)
y_seq_train = np.array(yw_train, dtype=np.float32)
X_seq_test  = np.stack(Xw_test)              # (n_test_windows, 60, 59)
y_seq_test  = np.array(yw_test,  dtype=np.float32)

print("Train windows:", X_seq_train.shape, y_seq_train.shape)
print("Test  windows:", X_seq_test.shape,  y_seq_test.shape)

# ─── 4) Dataset & DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)       # FloatTensor
        self.y = torch.from_numpy(y)       # FloatTensor
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 256
train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                          batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                          batch_size=batch_size, shuffle=False)

# ─── 5) Define LSTM model ───────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = (dropout if num_layers>1 else 0.0)
        )
        self.drop = nn.Dropout(dropout)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, (h_n, _) = self.lstm(x)
        h_last        = h_n[-1]            # (batch, hidden_dim)
        h_last        = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = LSTMRegime(n_features=F).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.BCELoss()

# ─── 6) Train on first 600, then test on next 150 ─────────────────────────
num_epochs = 10
for epoch in range(1, num_epochs+1):
    model.train()
    total_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]"):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss  = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
    avg_loss = total_loss / len(train_loader.dataset)
    print(f"Epoch {epoch:02d} — Train Loss: {avg_loss:.4f}")

# ─── 7) Evaluate on test set ───────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb     = xb.to(device), yb.to(device)
        pred_labels = (model(xb) > 0.5).float()
        correct    += (pred_labels == yb).sum().item()
        total      += yb.numel()
test_acc = correct / total
print(f"\nTest Accuracy: {test_acc:.4f}")

# ─── 8) Ablation at inference ──────────────────────────────────────────────
negative_feats = [
    'donch_pct_50','macd_line','streak_down','velocity','adx_14',
    'log_price','close','price_minus_sma_50','sma_12_26_diff',
    'lr_std_100','bb_width_30','percent_b_100','std_20','std_10','lr_std_30'
]
all_feats = list(X.columns)
neg_idx   = [all_feats.index(f) for f in negative_feats]

X_seq_test_ablate = X_seq_test.copy()
X_seq_test_ablate[..., neg_idx] = 0.0

ablate_loader = DataLoader(
    RegimeDataset(X_seq_test_ablate, y_seq_test),
    batch_size=batch_size, shuffle=False
)

model.eval()
correct_ab, total_ab = 0, 0
with torch.no_grad():
    for xb, yb in ablate_loader:
        xb, yb     = xb.to(device), yb.to(device)
        pred_labels = (model(xb) > 0.5).float()
        correct_ab += (pred_labels == yb).sum().item()
        total_ab   += yb.numel()
ablate_acc = correct_ab / total_ab

print(f"Ablated Test Accuracy: {ablate_acc:.4f}")
print(f"Δ Accuracy: {ablate_acc - test_acc:+.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Assume:
#     X: DataFrame with MultiIndex (inst, time) and 59 feature columns
#     y: Series  with same MultiIndex, values in {1,2}

# ─── 1) Build per‐instrument arrays (750 timesteps each) ───────────────────
raw_X_parts = []
raw_y_parts = []
for inst, grp in X.groupby(level='inst', sort=False):
    # take exactly 750 rows
    arr  = grp.iloc[:750].values                 # shape (750, 59)
    labs = (y.loc[grp.index[:750]] == 2).astype(np.float32).values  # map 1→0,2→1
    raw_X_parts.append(arr)
    raw_y_parts.append(labs)

X_all = np.stack(raw_X_parts).astype(np.float32)  # (n_inst, 750, 59)
y_all = np.stack(raw_y_parts).astype(np.float32)  # (n_inst, 750)

n_inst, T, F = X_all.shape

# ─── Set split index ────────────────────────────────────────────────────────
train_steps = 700   # use first 700 for training, last 50 for testing

# ─── 2) Fit scaler on train portion (first train_steps) and transform all ──
scaler = StandardScaler()
flat_train = X_all[:, :train_steps, :].reshape(-1, F)    # (n_inst*700, 59)
scaler.fit(flat_train)

X_all_scaled = scaler.transform(X_all.reshape(-1, F)) \
                   .reshape(n_inst, T, F) \
                   .astype(np.float32)

# ─── 3) Create sliding windows (L=60) and split into train/test ───────────
L = 60
Xw_train, yw_train = [], []
Xw_test,  yw_test  = [], []

for inst_idx in range(n_inst):
    series = X_all_scaled[inst_idx]
    labels = y_all[inst_idx]
    for i in range(T - L + 1):
        window = series[i : i + L]
        lab    = labels[i + L - 1]
        end_t  = i + L - 1

        if end_t < train_steps:
            Xw_train.append(window); yw_train.append(lab)
        else:
            Xw_test.append(window);  yw_test.append(lab)

# — sanity check —
print(f"T={T}, L={L}, train_steps={train_steps}")
print(f"→ collected {len(Xw_train)} train windows")
print(f"→ collected {len(Xw_test)} test  windows")


# ─── 4) Dataset & DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)       # FloatTensor
        self.y = torch.from_numpy(y)       # FloatTensor
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 256
train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                          batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                          batch_size=batch_size, shuffle=False)

# ─── 5) Define LSTM model ───────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = (dropout if num_layers>1 else 0.0)
        )
        self.drop = nn.Dropout(dropout)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, (h_n, _) = self.lstm(x)
        h_last        = h_n[-1]            # (batch, hidden_dim)
        h_last        = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = LSTMRegime(n_features=F).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.BCELoss()

# ─── 6) Train on first 600, then test on next 150 ─────────────────────────
num_epochs = 20
for epoch in range(1, num_epochs+1):
    model.train()
    total_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]"):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss  = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
    avg_loss = total_loss / len(train_loader.dataset)
    print(f"Epoch {epoch:02d} — Train Loss: {avg_loss:.4f}")

# ─── 7) Evaluate on test set ───────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb     = xb.to(device), yb.to(device)
        pred_labels = (model(xb) > 0.5).float()
        correct    += (pred_labels == yb).sum().item()
        total      += yb.numel()
test_acc = correct / total
print(f"\nTest Accuracy: {test_acc:.4f}")

# ─── 8) Ablation at inference ──────────────────────────────────────────────
negative_feats = [
    'donch_pct_50','macd_line','streak_down','velocity','adx_14',
    'log_price','close','price_minus_sma_50','sma_12_26_diff',
    'lr_std_100','bb_width_30','percent_b_100','std_20','std_10','lr_std_30'
]
all_feats = list(X.columns)
neg_idx   = [all_feats.index(f) for f in negative_feats]

X_seq_test_ablate = X_seq_test.copy()
X_seq_test_ablate[..., neg_idx] = 0.0

ablate_loader = DataLoader(
    RegimeDataset(X_seq_test_ablate, y_seq_test),
    batch_size=batch_size, shuffle=False
)

model.eval()
correct_ab, total_ab = 0, 0
with torch.no_grad():
    for xb, yb in ablate_loader:
        xb, yb     = xb.to(device), yb.to(device)
        pred_labels = (model(xb) > 0.5).float()
        correct_ab += (pred_labels == yb).sum().item()
        total_ab   += yb.numel()
ablate_acc = correct_ab / total_ab

print(f"Ablated Test Accuracy: {ablate_acc:.4f}")
print(f"Δ Accuracy: {ablate_acc - test_acc:+.4f}")


In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# ─── 0) Assume:
#     X: DataFrame with MultiIndex (inst, time) and 59 feature columns
#     y: Series  with same MultiIndex, values in {1,2}

# ─── 1) Build per‐instrument arrays (750 timesteps each) ───────────────────
raw_X_parts = []
raw_y_parts = []
for inst, grp in X.groupby(level='inst', sort=False):
    # take exactly 750 rows
    arr  = grp.iloc[:750].values                 # shape (750, 59)
    labs = (y.loc[grp.index[:750]] == 2).astype(np.float32).values  # map 1→0,2→1
    raw_X_parts.append(arr)
    raw_y_parts.append(labs)

X_all = np.stack(raw_X_parts).astype(np.float32)  # (n_inst, 750, 59)
y_all = np.stack(raw_y_parts).astype(np.float32)  # (n_inst, 750)

n_inst, T, F = X_all.shape

# ─── Set split index ────────────────────────────────────────────────────────
train_steps = 700   # use first 700 for training, last 50 for testing

# ─── 2) Fit scaler on train portion (first train_steps) and transform all ──
scaler = StandardScaler()
flat_train = X_all[:, :train_steps, :].reshape(-1, F)    # (n_inst*700, 59)
scaler.fit(flat_train)

X_all_scaled = scaler.transform(X_all.reshape(-1, F)) \
                   .reshape(n_inst, T, F) \
                   .astype(np.float32)

# ─── 3) Create sliding windows (L=60) and split into train/test ───────────
L = 60
Xw_train, yw_train = [], []
Xw_test,  yw_test  = [], []

for inst_idx in range(n_inst):
    series = X_all_scaled[inst_idx]
    labels = y_all[inst_idx]
    for i in range(T - L + 1):
        window = series[i : i + L]
        lab    = labels[i + L - 1]
        end_t  = i + L - 1

        if end_t < train_steps:
            Xw_train.append(window); yw_train.append(lab)
        else:
            Xw_test.append(window);  yw_test.append(lab)

# — sanity check —
print(f"T={T}, L={L}, train_steps={train_steps}")
print(f"→ collected {len(Xw_train)} train windows")
print(f"→ collected {len(Xw_test)} test  windows")


# ─── 4) Dataset & DataLoader ───────────────────────────────────────────────
class RegimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)       # FloatTensor
        self.y = torch.from_numpy(y)       # FloatTensor
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 256
train_loader = DataLoader(RegimeDataset(X_seq_train, y_seq_train),
                          batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(RegimeDataset(X_seq_test,  y_seq_test),
                          batch_size=batch_size, shuffle=False)

# ─── 5) Define LSTM model ───────────────────────────────────────────────────
class LSTMRegime(nn.Module):
    def __init__(self, n_features, hidden_dim=64, num_layers=1, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size   = n_features,
            hidden_size  = hidden_dim,
            num_layers   = num_layers,
            batch_first  = True,
            dropout      = (dropout if num_layers>1 else 0.0)
        )
        self.drop = nn.Dropout(dropout)
        self.fc   = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, (h_n, _) = self.lstm(x)
        h_last        = h_n[-1]            # (batch, hidden_dim)
        h_last        = self.drop(h_last)
        return torch.sigmoid(self.fc(h_last)).squeeze(1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = LSTMRegime(n_features=F).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.BCELoss()

# ─── 6) Train on first 600, then test on next 150 ─────────────────────────
num_epochs = 30
for epoch in range(1, num_epochs+1):
    model.train()
    total_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Train]"):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss  = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
    avg_loss = total_loss / len(train_loader.dataset)
    print(f"Epoch {epoch:02d} — Train Loss: {avg_loss:.4f}")

# ─── 7) Evaluate on test set ───────────────────────────────────────────────
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb     = xb.to(device), yb.to(device)
        pred_labels = (model(xb) > 0.5).float()
        correct    += (pred_labels == yb).sum().item()
        total      += yb.numel()
test_acc = correct / total
print(f"\nTest Accuracy: {test_acc:.4f}")

# ─── 8) Ablation at inference ──────────────────────────────────────────────
negative_feats = [
    'donch_pct_50','macd_line','streak_down','velocity','adx_14',
    'log_price','close','price_minus_sma_50','sma_12_26_diff',
    'lr_std_100','bb_width_30','percent_b_100','std_20','std_10','lr_std_30'
]
all_feats = list(X.columns)
neg_idx   = [all_feats.index(f) for f in negative_feats]

X_seq_test_ablate = X_seq_test.copy()
X_seq_test_ablate[..., neg_idx] = 0.0

ablate_loader = DataLoader(
    RegimeDataset(X_seq_test_ablate, y_seq_test),
    batch_size=batch_size, shuffle=False
)

model.eval()
correct_ab, total_ab = 0, 0
with torch.no_grad():
    for xb, yb in ablate_loader:
        xb, yb     = xb.to(device), yb.to(device)
        pred_labels = (model(xb) > 0.5).float()
        correct_ab += (pred_labels == yb).sum().item()
        total_ab   += yb.numel()
ablate_acc = correct_ab / total_ab

print(f"Ablated Test Accuracy: {ablate_acc:.4f}")
print(f"Δ Accuracy: {ablate_acc - test_acc:+.4f}")
