In [None]:
import os, sys

import os
import copy
import time
import random
import typing as T
from collections import deque, defaultdict

import numpy as np
import pandas as pd

# ML libs
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score
import xgboost as xgb

## --- CLASS / FUNC from file --- ##
from bAE import AdvancedDimReducerAE
from DeepAE import DeepAnomalyAE
from EWC import EWC
from train_model import train_model, plot_evaluation, calculate_metrics

# ----------------------------
# Config
# ----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# Hyperparameters
LATENT_DIM = 16
AE_HIDDEN = 64
BATCH_SIZE = 128
AE_LR = 1e-3
AE_EPOCHS = 30
BATCH_KMEANS = 1000

BUFFER_MAX = 10000
CLUSTER_PERIOD = 300  # seconds or you can make it call-based
CLUSTER_MIN_SAMPLES = 50  # minimum buffered samples to run clustering
NUM_CLUSTERS = 8  # fallback K means clusters
EWC_LAMBDA = 100.0  # regularization weight (tweak)
REPLAY_SIZE = 1000

# ----------------------------
# Utils
# ----------------------------
def to_device(x):
    if isinstance(x, torch.Tensor):
        return x.to(DEVICE)
    return None



In [None]:

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.svm import OneClassSVM
from sklearn.cluster import MiniBatchKMeans
from collections import deque
import xgboost as xgb
import time
import random
import os

# Optional: HDBSCAN for clustering
try:
    import hdbscan
    HAS_HDBSCAN = True
except:
    HAS_HDBSCAN = False

# -----------------------------------------------------
# CONFIG
# -----------------------------------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
AE_HIDDEN = 64
LATENT_DIM = 16
BATCH_SIZE = 128
AE_LR = 1e-3
AE_EPOCHS = 25
EWC_LAMBDA = 50.0
CLUSTER_MIN = 30
NUM_CLUSTERS = 6

BUFFER_MAX = 2000
REPLAY_SIZE = 1000

# -----------------------------------------------------
# MODEL DEFINITIONS
# -----------------------------------------------------
class AE(nn.Module):
    def __init__(self, input_dim, hidden=AE_HIDDEN, latent_dim=LATENT_DIM):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, input_dim)
        )

    def forward(self, x):
        z = self.encoder(x)
        out = self.decoder(z)
        return out

    def encode(self, x):
        return self.encoder(x)


class EWC:
    """Elastic Weight Consolidation regularizer"""
    def __init__(self, model, dataloader, device=DEVICE):
        self.model = model
        self.params = {n: p.clone().detach() for n, p in model.named_parameters() if p.requires_grad}
        self.fisher = {n: torch.zeros_like(p) for n, p in model.named_parameters() if p.requires_grad}
        self.device = device
        self._compute_fisher(dataloader)

    def _compute_fisher(self, dataloader):
        self.model.eval()
        for x_batch, in dataloader:
            x_batch = x_batch.to(self.device)
            out = self.model(x_batch)
            loss = nn.MSELoss()(out, x_batch)
            self.model.zero_grad()
            loss.backward()
            for n, p in self.model.named_parameters():
                if p.grad is not None:
                    self.fisher[n] += p.grad.detach() ** 2
        for n in self.fisher:
            self.fisher[n] /= len(dataloader)

    def penalty(self, model):
        loss = 0.0
        for n, p in model.named_parameters():
            if n in self.fisher:
                loss += (self.fisher[n] * (p - self.params[n])**2).sum()
        return loss

# -----------------------------------------------------
# MAIN PIPELINE
# -----------------------------------------------------
class IncrementalIDS:
    def __init__(self, input_dim):
        self.scaler = StandardScaler()
        self.bae = AE(input_dim).to(DEVICE)
        self.ae = AE(input_dim).to(DEVICE)
        self.ocsvm = None
        self.xgb = None
        self.ewc_bae = None
        self.ewc_ae = None
        self.buffer = deque(maxlen=BUFFER_MAX)
        self.replay = deque(maxlen=REPLAY_SIZE)

    # ---------------- Offline ----------------
    def train_autoencoder(self, model, X, epochs=AE_EPOCHS):
        ds = DataLoader(TensorDataset(torch.tensor(X, dtype=torch.float32)),
                        batch_size=BATCH_SIZE, shuffle=True)
        opt = optim.Adam(model.parameters(), lr=AE_LR)
        model.train()
        for ep in range(epochs):
            total_loss = 0
            for (xb,) in ds:
                xb = xb.to(DEVICE)
                rec = model(xb)
                loss = nn.MSELoss()(rec, xb)
                opt.zero_grad()
                loss.backward()
                opt.step()
                total_loss += loss.item()
            if ep % 10 == 0 or ep == epochs - 1:
                print(f"[AE] epoch {ep}/{epochs} loss={total_loss/len(ds):.6f}")

    def offline_train(self, X, y):
        print("→ Scaling data...")
        Xs = self.scaler.fit_transform(X)

        print("→ Training bAE & AE ...")
        self.train_autoencoder(self.bae, Xs)
        self.train_autoencoder(self.ae, Xs)

        print("→ Latent features ...")
        with torch.no_grad():
            z = self.bae.encode(torch.tensor(Xs, dtype=torch.float32).to(DEVICE)).cpu().numpy()

        print("→ Training OCSVM (on benign samples)...")
        benign_idx = (y == 0)
        train_z = z[benign_idx] if benign_idx.sum() > 10 else z
        self.ocsvm = OneClassSVM(nu=0.05, gamma='scale').fit(train_z)

        print("→ Training XGBoost...")
        self.xgb = xgb.XGBClassifier(
            n_estimators=150,
            max_depth=5,
            learning_rate=0.1,
            use_label_encoder=False,
            eval_metric='logloss'
        )
        self.xgb.fit(Xs, y.astype(int))

        # Estimate Fisher info for EWC
        print("→ Estimating Fisher for EWC...")
        ds = DataLoader(TensorDataset(torch.tensor(Xs[:2000], dtype=torch.float32)), batch_size=128, shuffle=True)
        self.ewc_bae = EWC(self.bae, ds)
        self.ewc_ae = EWC(self.ae, ds)
        print("✔ Offline training complete.")

    # ---------------- Online Detection ----------------
    def detect(self, x, recon_th=0.5, xgb_conf=0.7):
        xs = self.scaler.transform(x.reshape(1, -1))
        xt = torch.tensor(xs, dtype=torch.float32).to(DEVICE)

        with torch.no_grad():
            z = self.bae.encode(xt).cpu().numpy()
            xrec = self.ae(xt)
            rec_err = float(((xrec - xt)**2).mean().cpu())

        oc = int(self.ocsvm.predict(z)[0])
        proba = self.xgb.predict_proba(xs)[0]
        conf = float(np.max(proba))
        pred = int(np.argmax(proba))

        unknown = (oc == -1 or rec_err > recon_th or conf < xgb_conf)
        if unknown:
            self.buffer.append({'x': xs[0], 'latent': z[0], 'label_pred': pred})
            self.replay.append(xs[0])

        return {
            "ocsvm": oc, "recon": rec_err, "xgb_pred": pred,
            "xgb_conf": conf, "unknown": unknown
        }

    # ---------------- Clustering & Labeling ----------------
    def cluster_unknowns(self):
        if len(self.buffer) < CLUSTER_MIN:
            print(f"[Cluster] Buffer too small ({len(self.buffer)}/{CLUSTER_MIN})")
            return []
        X_latent = np.stack([b['latent'] for b in self.buffer])
        if HAS_HDBSCAN:
            clt = hdbscan.HDBSCAN(min_cluster_size=10)
            labels = clt.fit_predict(X_latent)
        else:
            clt = MiniBatchKMeans(n_clusters=NUM_CLUSTERS)
            labels = clt.fit_predict(X_latent)

        clusters = []
        for cid in np.unique(labels):
            idxs = np.where(labels == cid)[0]
            if cid == -1:  # noise
                continue
            # Simulate analyst: assign new label
            new_label = int(self.xgb.classes_.max()) + 1
            for i in idxs:
                clusters.append({'x': self.buffer[i]['x'], 'label': new_label})
        self.buffer.clear()
        print(f"[Cluster] labeled {len(clusters)} samples for incremental learning.")
        return clusters

    # ---------------- Incremental update ----------------
    def incremental_update(self, new_samples):
        if len(new_samples) == 0:
            return
        X_new = np.stack([s['x'] for s in new_samples])
        y_new = np.array([s['label'] for s in new_samples])

        # update XGBoost
        self.xgb.fit(X_new, y_new, xgb_model=self.xgb.get_booster())

        # retrain OCSVM on new latent
        with torch.no_grad():
            z_new = self.bae.encode(torch.tensor(X_new, dtype=torch.float32).to(DEVICE)).cpu().numpy()
        self.ocsvm.fit(z_new)

        # fine-tune AE/bAE with EWC + replay
        replay_data = list(self.replay)
        if len(replay_data) > 0:
            X_comb = np.vstack([np.stack(replay_data), X_new])
        else:
            X_comb = X_new
        ds = DataLoader(TensorDataset(torch.tensor(X_comb, dtype=torch.float32)), batch_size=128, shuffle=True)

        for model, ewc_obj, name in [(self.bae, self.ewc_bae, "bAE"), (self.ae, self.ewc_ae, "AE")]:
            opt = optim.Adam(model.parameters(), lr=AE_LR/10)
            model.train()
            for ep in range(5):
                tot = 0
                for (xb,) in ds:
                    xb = xb.to(DEVICE)
                    rec = model(xb)
                    loss = nn.MSELoss()(rec, xb)
                    loss += EWC_LAMBDA * ewc_obj.penalty(model)
                    opt.zero_grad()
                    loss.backward()
                    opt.step()
                    tot += loss.item()
                print(f"[{name} fine-tune] epoch {ep}: {tot/len(ds):.6f}")

        print("✔ Incremental fine-tune complete.")

# -----------------------------------------------------
# LOAD CSV + DEMO
# -----------------------------------------------------
def main():
    # --- Load dataset ---
    df = pd.read_csv("dataset.csv")
    print("Loaded:", df.shape)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values

    # --- Offline training ---
    ids = IncrementalIDS(input_dim=X.shape[1])
    ids.offline_train(X, y)

    # --- Online simulation ---
    print("\n[Simulation] Online detection ...")
    for i in range(0, len(X)):
        out = ids.detect(X[i])
        if i % 500 == 0:
            print(f"→ Processed {i} samples | Unknown buffer: {len(ids.buffer)}")
        if i % 1000 == 999:
            labeled = ids.cluster_unknowns()
            ids.incremental_update(labeled)

    # Final cluster
    labeled = ids.cluster_unknowns()
    ids.incremental_update(labeled)

    print("✅ Pipeline complete.")

if __name__ == "__main__":
    main()


In [None]:

import os
import copy
import time
import random
import typing as T
from collections import deque, defaultdict

import numpy as np
import pandas as pd

# ML libs
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score
import xgboost as xgb

try:
    import hdbscan
    HAS_HDBSCAN = True
except Exception:
    HAS_HDBSCAN = False

# ----------------------------
# Config (tweak as needed)
# ----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# Hyperparameters
LATENT_DIM = 16
AE_HIDDEN = 64
BATCH_SIZE = 128
AE_LR = 1e-3
AE_EPOCHS = 30
BATCH_KMEANS = 1000

BUFFER_MAX = 10000
CLUSTER_PERIOD = 300  # seconds or you can make it call-based
CLUSTER_MIN_SAMPLES = 50  # minimum buffered samples to run clustering
NUM_CLUSTERS = 8  # fallback K means clusters
EWC_LAMBDA = 100.0  # regularization weight (tweak)
REPLAY_SIZE = 1000

# ----------------------------
# Utils
# ----------------------------
def to_device(x):
    if isinstance(x, torch.Tensor):
        return x.to(DEVICE)
    return None

# ----------------------------
# Simple Autoencoder (bottleneck AE)
# ----------------------------
class AE(nn.Module):
    def __init__(self, input_dim, hidden=AE_HIDDEN, latent_dim=LATENT_DIM):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, input_dim)
        )

    def forward(self, x):
        z = self.encoder(x)
        xrec = self.decoder(z)
        return xrec

    def encode(self, x):
        return self.encoder(x)

# ----------------------------
# EWC helper (approx Fisher diag)
# ----------------------------
class EWC:
    def __init__(self, model: nn.Module, dataloader: DataLoader, device=DEVICE):
        self.model = copy.deepcopy(model).to(device)
        self.device = device
        self.params = {n: p.clone().detach() for n, p in self.model.named_parameters() if p.requires_grad}
        # compute diagonal Fisher info
        self.fisher = {n: torch.zeros_like(p) for n, p in self.model.named_parameters() if p.requires_grad}
        self._compute_fisher(dataloader)

    def _compute_fisher(self, dataloader):
        self.model.eval()
        for x_batch, in dataloader:
            x_batch = x_batch.to(self.device)
            self.model.zero_grad()
            # use negative log-likelihood surrogate: here MSE loss as proxy
            out = self.model(x_batch)
            loss = nn.MSELoss()(out, x_batch)
            loss.backward()
            for n, p in self.model.named_parameters():
                if p.requires_grad and p.grad is not None:
                    self.fisher[n] += p.grad.detach() ** 2
        # normalize
        for n in self.fisher:
            self.fisher[n] = self.fisher[n] / len(dataloader)

    def penalty(self, model):
        loss = 0.0
        for n, p in model.named_parameters():
            if n in self.fisher:
                _loss = (self.fisher[n] * (p - self.params[n])**2).sum()
                loss += _loss
        return loss

# ----------------------------
# Pipeline Manager
# ----------------------------
class IncrementalPipeline:
    def __init__(self, input_dim):
        self.input_dim = input_dim
        # models (to be trained offline)
        self.bae = AE(input_dim).to(DEVICE)  # bottleneck AE
        self.ae = AE(input_dim).to(DEVICE)   # auxiliary AE for recon-error detection
        self.scaler = StandardScaler()
        self.ocsvm = None
        self.xgb = None

        # Buffers
        self.unknown_buffer = deque(maxlen=BUFFER_MAX)
        self.replay_buffer = deque(maxlen=REPLAY_SIZE)  # for replay during fine-tune
        # bookkeeping for EWC
        self.ewc_bae = None
        self.ewc_ae = None

    # ----------------------------
    # Offline training
    # ----------------------------
    def train_autoencoder(self, model, X, epochs=AE_EPOCHS, lr=AE_LR):
        model = model.to(DEVICE)
        ds = TensorDataset(torch.tensor(X, dtype=torch.float32))
        dl = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True)
        opt = optim.Adam(model.parameters(), lr=lr)
        model.train()
        for ep in range(epochs):
            epoch_loss = 0.0
            for (x_batch,) in dl:
                x_batch = x_batch.to(DEVICE)
                xrec = model(x_batch)
                loss = nn.MSELoss()(xrec, x_batch)
                opt.zero_grad()
                loss.backward()
                opt.step()
                epoch_loss += loss.item() * x_batch.size(0)
            # simple print
            if ep % 10 == 0 or ep == epochs - 1:
                print(f"[AE] epoch {ep}/{epochs} loss={epoch_loss/len(X):.6f}")
        return model

    def train_offline(self, X: np.ndarray, y: np.ndarray):
        """X: (n, d), y: (n,) where known attacks have labels; unknown/benign may be 0/1 etc
           This function fits scaler, trains BAE, AE, OCSVM (on latent of benign?) and XGBoost.
        """
        print("Scaling data...")
        Xs = self.scaler.fit_transform(X)
        # split for XGBoost: use available supervised labels for known attacks
        known_idx = ~np.isnan(y)
        if known_idx.sum() == 0:
            raise ValueError("No known labels provided for supervised training (XGBoost).")
        # Train bAE and AE on all data (or only benign) — here use whole Xs to learn general features
        print("Training bottleneck AE (bAE)...")
        self.bae = self.train_autoencoder(self.bae, Xs)
        print("Training auxiliary AE (AE)...")
        self.ae = self.train_autoencoder(self.ae, Xs)

        # Build latent features (use bae.encode)
        with torch.no_grad():
            z = self.bae.encode(torch.tensor(Xs, dtype=torch.float32).to(DEVICE)).cpu().numpy()
        # Train OCSVM on latent of *benign* samples if you have such labels; fallback: train on all
        try:
            benign_idx = (y == 0)  # convention: 0 benign, 1 attack (change to your labels)
            if benign_idx.sum() < 10:
                print("Not enough benign labeled examples for OCSVM; training on all latent features.")
                train_latent = z
            else:
                train_latent = z[benign_idx]
        except Exception:
            train_latent = z
        print("Training OneClassSVM on latent features...")
        self.ocsvm = OneClassSVM(gamma='scale', nu=0.05).fit(train_latent)

        # Train XGBoost classifier for known attacks
        print("Training XGBoost on known labels...")
        xgb_train_X = Xs[known_idx]
        xgb_train_y = y[known_idx].astype(int)
        model = xgb.XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss')
        model.fit(xgb_train_X, xgb_train_y)
        self.xgb = model

        # Initialize EWC objects (estimate Fisher)
        print("Estimating Fisher for EWC (this may take time)...")
        ds_bae = DataLoader(TensorDataset(torch.tensor(Xs[:min(2000, len(Xs))], dtype=torch.float32)),
                            batch_size=BATCH_SIZE, shuffle=True)
        ds_ae = DataLoader(TensorDataset(torch.tensor(Xs[:min(2000, len(Xs))], dtype=torch.float32)),
                           batch_size=BATCH_SIZE, shuffle=True)
        self.ewc_bae = EWC(self.bae, ds_bae, device=DEVICE)
        self.ewc_ae = EWC(self.ae, ds_ae, device=DEVICE)
        print("Offline training complete.")

    # ----------------------------
    # Online detection
    # ----------------------------
    def detect_single(self, x_raw: np.ndarray, unknown_thresholds=None):
        """
        Process one sample (1D array) and return dict of detection outputs
        unknown_thresholds: dict('recon': float, 'ocsvm': int (label), 'xgb_prob': float)
        """
        if unknown_thresholds is None:
            unknown_thresholds = {'recon': 0.01, 'xgb_prob': 0.5}
        x_s = self.scaler.transform(x_raw.reshape(1, -1))
        x_tensor = torch.tensor(x_s, dtype=torch.float32).to(DEVICE)

        # latent / OCSVM
        with torch.no_grad():
            z = self.bae.encode(x_tensor).cpu().numpy()
        ocsvm_label = self.ocsvm.predict(z)[0]  # 1 inlier, -1 outlier

        # AE reconstruction error
        with torch.no_grad():
            xrec = self.ae(x_tensor)
            recon_err = float(((xrec - x_tensor) ** 2).mean().cpu().numpy())

        # XGBoost prediction (prob)
        xgb_proba = None
        xgb_pred = None
        try:
            proba = self.xgb.predict_proba(x_s)[0]
            xgb_proba = float(np.max(proba))
            xgb_pred = int(np.argmax(proba))
        except Exception:
            xgb_proba = None
            xgb_pred = None

        # Decide unknown: if XGBoost low confidence AND ocsvm says outlier OR recon error high
        is_unknown = False
        reasons = []
        if xgb_proba is not None and xgb_proba > unknown_thresholds['xgb_prob']:
            # known
            reasons.append('xgb_confident')
        else:
            # not confident from xgb
            if ocsvm_label == -1:
                is_unknown = True
                reasons.append('ocsvm_outlier')
            if recon_err > unknown_thresholds['recon']:
                is_unknown = True
                reasons.append('high_recon')
            if xgb_proba is not None and xgb_proba <= unknown_thresholds['xgb_prob']:
                reasons.append('xgb_not_confident')

        output = {
            'x_raw': x_raw,
            'x_scaled': x_s[0],
            'latent_z': z[0],
            'ocsvm_label': int(ocsvm_label),
            'recon_err': recon_err,
            'xgb_proba': xgb_proba,
            'xgb_pred': xgb_pred,
            'is_unknown': is_unknown,
            'reasons': reasons
        }

        # buffer if unknown
        if is_unknown:
            self.buffer_unknown(output)

        return output

    def buffer_unknown(self, detection_output):
        # store scaled features and latent
        rec = {
            'ts': time.time(),
            'x_scaled': detection_output['x_scaled'],
            'latent': detection_output['latent_z'],
            'meta': detection_output
        }
        self.unknown_buffer.append(rec)
        # also add to replay buffer for later fine-tune (optionally)
        self.replay_buffer.append(rec['x_scaled'])
        # debug
        if len(self.unknown_buffer) % 100 == 0:
            print(f"[Buffer] unknown buffer size = {len(self.unknown_buffer)}")

    # ----------------------------
    # Clustering + analyst labeling
    # ----------------------------
    def cluster_and_label(self, min_samples_for_cluster=CLUSTER_MIN_SAMPLES, use_hdbscan=True):
        n_buf = len(self.unknown_buffer)
        if n_buf < min_samples_for_cluster:
            print(f"[Cluster] not enough unknowns ({n_buf} < {min_samples_for_cluster})")
            return []

        # Build matrix
        X_latent = np.stack([r['latent'] for r in self.unknown_buffer])
        # Use HDBSCAN if available and desired
        if HAS_HDBSCAN and use_hdbscan:
            clusterer = hdbscan.HDBSCAN(min_cluster_size=10)
            labels = clusterer.fit_predict(X_latent)
        else:
            # fallback to MiniBatchKMeans
            k = min(NUM_CLUSTERS, max(2, n_buf // 10))
            clusterer = MiniBatchKMeans(n_clusters=k, batch_size=BATCH_KMEANS)
            labels = clusterer.fit_predict(X_latent)

        # group samples per cluster
        clusters = defaultdict(list)
        for idx, lab in enumerate(labels):
            clusters[lab].append(idx)

        # Simulated analyst labeling per cluster:
        # - If majority of cluster is predicted by XGB or ocsvm as known, use that label
        # - Otherwise, mark as new label (simulate analyst)
        labeled_clusters = []
        for lab, indices in clusters.items():
            if lab == -1:
                # HDBSCAN noise label
                continue
            # collect original metas
            metas = [self.unknown_buffer[i]['meta'] for i in indices]
            # try to infer a label from xgb_pred majority
            preds = [m['xgb_pred'] for m in metas if m['xgb_pred'] is not None]
            if len(preds) > 0:
                # majority label
                maj = max(set(preds), key=preds.count)
                assigned_label = maj
                analyst_note = 'majority_xgb'
            else:
                # fallback: analyst says "new attack class" -> label as next int
                assigned_label = self._simulate_new_label()
                analyst_note = 'simulated_new_label'
            labeled_clusters.append({
                'cluster_id': lab,
                'indices': indices,
                'assigned_label': assigned_label,
                'analyst_note': analyst_note
            })
        # Remove labeled samples from unknown_buffer and return assignments
        # We will extract their feature vectors to use for incremental update
        labeled_samples = []
        # Remove by reversing sorted indices to pop safely
        removed_indices = []
        for cluster in labeled_clusters:
            for idx in sorted(cluster['indices'], reverse=True):
                rec = self.unknown_buffer[idx]
                labeled_samples.append({'x_scaled': rec['x_scaled'], 'label': cluster['assigned_label']})
                removed_indices.append(idx)
                # remove
                del self.unknown_buffer[idx]
        print(f"[Cluster] produced {len(labeled_clusters)} clusters, labeled {len(labeled_samples)} samples.")
        return labeled_samples

    def _simulate_new_label(self):
        """Simulate a new label integer (choose > existing classes)."""
        # if xgb exists, see known classes
        if self.xgb is None:
            return 2  # arbitrary
        known_classes = self.xgb.classes_.tolist()
        new_label = max(known_classes) + 1
        return new_label

    # ----------------------------
    # Incremental fine-tuning
    # ----------------------------
    def incremental_update(self, new_samples: T.List[dict]):
        """
        new_samples: list of {'x_scaled': np.array, 'label': int}
        """
        if len(new_samples) == 0:
            print("[Incremental] no new samples.")
            return

        X_new = np.stack([s['x_scaled'] for s in new_samples])
        y_new = np.array([s['label'] for s in new_samples])

        # 1) Update XGBoost incrementally: use xgb_model param
        print("[Incremental] updating XGBoost...")
        try:
            # prepare dataset; fit with xgb_model to continue
            model = xgb.XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss')
            # use previous model as warm start if exists
            if self.xgb is not None:
                model.fit(X_new, y_new, xgb_model=self.xgb.get_booster())
            else:
                model.fit(X_new, y_new)
            self.xgb = model
            print("[Incremental] XGBoost updated.")
        except Exception as e:
            print("[Incremental] XGBoost incremental failed:", e)
            # fallback: retrain from scratch? keep old model

        # 2) Retrain / fine-tune OCSVM on subset (combine some old latent + new)
        print("[Incremental] retraining OCSVM subset...")
        try:
            # Build latent for new data
            with torch.no_grad():
                z_new = self.bae.encode(torch.tensor(X_new, dtype=torch.float32).to(DEVICE)).cpu().numpy()
            # sample some previous latent from replay buffer if available
            sample_old = []
            for _ in range(min(200, len(self.replay_buffer))):
                sample_old.append(self.replay_buffer[random.randrange(len(self.replay_buffer))])
            if len(sample_old) > 0:
                sample_old = self.scaler.transform(np.stack(sample_old))
                with torch.no_grad():
                    z_old = self.bae.encode(torch.tensor(sample_old, dtype=torch.float32).to(DEVICE)).cpu().numpy()
                z_comb = np.vstack([z_old, z_new])
            else:
                z_comb = z_new
            # retrain OCSVM on z_comb (or only on samples labeled as benign if available)
            self.ocsvm = OneClassSVM(gamma='scale', nu=0.05).fit(z_comb)
            print("[Incremental] OCSVM retrained.")
        except Exception as e:
            print("[Incremental] OCSVM retrain failed:", e)

        # 3) Fine-tune bAE and AE with replay and EWC
        print("[Incremental] fine-tuning bAE and AE with EWC + replay...")
        # assemble replay set: include some previous replay + new samples
        replay_items = list(self.replay_buffer)
        if len(replay_items) > 0:
            replay_items = np.stack(replay_items)
            # combine with new
            combined_X = np.vstack([replay_items, X_new])
        else:
            combined_X = X_new
        # create dataloader
        ds = DataLoader(TensorDataset(torch.tensor(combined_X, dtype=torch.float32)),
                        batch_size=BATCH_SIZE, shuffle=True)
        # fine-tune bae
        opt = optim.Adam(self.bae.parameters(), lr=AE_LR/5)
        self.bae.train()
        for ep in range(5):  # a few epochs of incremental fine-tune
            tot_loss = 0.0
            for (x_batch,) in ds:
                x_batch = x_batch.to(DEVICE)
                xrec = self.bae(x_batch)
                loss_rec = nn.MSELoss()(xrec, x_batch)
                # EWC penalty
                ewc_pen = self.ewc_bae.penalty(self.bae) if self.ewc_bae is not None else 0.0
                loss = loss_rec + (EWC_LAMBDA * ewc_pen)
                opt.zero_grad()
                loss.backward()
                opt.step()
                tot_loss += float(loss.detach().cpu().numpy()) * x_batch.size(0)
            # print
            print(f"[bAE finetune] ep {ep} loss {tot_loss/len(combined_X):.6f}")

        # update EWC after fine-tune: re-estimate fisher on combined dataset (small sample)
        ds_sample = DataLoader(TensorDataset(torch.tensor(combined_X[:min(2000, len(combined_X))], dtype=torch.float32)),
                               batch_size=BATCH_SIZE, shuffle=True)
        self.ewc_bae = EWC(self.bae, ds_sample, device=DEVICE)

        # fine-tune AE similarly
        opt = optim.Adam(self.ae.parameters(), lr=AE_LR/5)
        self.ae.train()
        for ep in range(5):
            tot_loss = 0.0
            for (x_batch,) in ds:
                x_batch = x_batch.to(DEVICE)
                xrec = self.ae(x_batch)
                loss_rec = nn.MSELoss()(xrec, x_batch)
                ewc_pen = self.ewc_ae.penalty(self.ae) if self.ewc_ae is not None else 0.0
                loss = loss_rec + (EWC_LAMBDA * ewc_pen)
                opt.zero_grad()
                loss.backward()
                opt.step()
                tot_loss += float(loss.detach().cpu().numpy()) * x_batch.size(0)
            print(f"[AE finetune] ep {ep} loss {tot_loss/len(combined_X):.6f}")
        ds_sample2 = DataLoader(TensorDataset(torch.tensor(combined_X[:min(2000, len(combined_X))], dtype=torch.float32)),
                                batch_size=BATCH_SIZE, shuffle=True)
        self.ewc_ae = EWC(self.ae, ds_sample2, device=DEVICE)

        print("[Incremental] fine-tune complete.")

    # ----------------------------
    # Convenience: run periodic clustering loop (callable from external sched)
    # ----------------------------
    def periodic_clustering_and_update(self):
        labeled = self.cluster_and_label()
        if len(labeled) > 0:
            self.incremental_update(labeled)

# ----------------------------
# Demo / Example usage
# ----------------------------
def load_demo_data(n_samples=5000, n_features=30, attack_ratio=0.05):
    """Create synthetic data: Gaussian normal for benign, added anomalies for attacks."""
    X_benign = np.random.normal(0, 1, size=(int(n_samples*(1-attack_ratio)), n_features))
    X_att = np.random.normal(3, 1.5, size=(int(n_samples*attack_ratio), n_features))
    X = np.vstack([X_benign, X_att])
    y = np.hstack([np.zeros(len(X_benign)), np.ones(len(X_att))])
    # shuffle
    perm = np.random.permutation(len(X))
    return X[perm], y[perm]

def main_demo():
    X, y = load_demo_data()
    # For offline, we only provide labels for a subset (simulate that only some attacks are labeled)
    y_offline = np.copy(y)
    # Let's make 50% of labels unknown -> NaN
    mask = np.random.rand(len(y)) < 0.5
    y_offline[mask] = np.nan

    pipe = IncrementalPipeline(input_dim=X.shape[1])
    pipe.train_offline(X, y_offline)

    # simulate online stream
    stream_X, stream_y = load_demo_data(n_samples=1000, n_features=X.shape[1], attack_ratio=0.08)
    for i, x in enumerate(stream_X):
        out = pipe.detect_single(x, unknown_thresholds={'recon': 0.5, 'xgb_prob': 0.7})
        if i % 200 == 0:
            print(f"[Stream] processed {i} samples, buffer size {len(pipe.unknown_buffer)}")
        # periodically run clustering every 300 samples
        if i % 300 == 299:
            labeled = pipe.cluster_and_label()
            pipe.incremental_update(labeled)

    # final forced clustering
    labeled = pipe.cluster_and_label(min_samples_for_cluster=10)
    pipe.incremental_update(labeled)
    print("Demo complete.")

if __name__ == "__main__":
    main_demo()


Scaling data...
Training bottleneck AE (bAE)...
[AE] epoch 0/30 loss=0.840614
[AE] epoch 10/30 loss=0.346224
[AE] epoch 20/30 loss=0.331581
[AE] epoch 29/30 loss=0.323878
Training auxiliary AE (AE)...
[AE] epoch 0/30 loss=0.833965
[AE] epoch 10/30 loss=0.347178
[AE] epoch 20/30 loss=0.329390
[AE] epoch 29/30 loss=0.323730
Training OneClassSVM on latent features...
Training XGBoost on known labels...
Estimating Fisher for EWC (this may take time)...


Parameters: { "use_label_encoder" } are not used.



Offline training complete.
[Stream] processed 0 samples, buffer size 0
[Stream] processed 200 samples, buffer size 1
[Cluster] not enough unknowns (1 < 50)
[Incremental] no new samples.
[Stream] processed 400 samples, buffer size 2
[Cluster] not enough unknowns (2 < 50)
[Incremental] no new samples.
[Stream] processed 600 samples, buffer size 2
[Stream] processed 800 samples, buffer size 2
[Cluster] not enough unknowns (2 < 50)
[Incremental] no new samples.
[Cluster] not enough unknowns (2 < 10)
[Incremental] no new samples.
Demo complete.
