In [1]:
# incremental_ids_pipeline.py
# Full incremental IDS pipeline combining bAE -> OCSVM -> AE -> XGBoost
# with online detection -> buffer unknowns -> clustering -> simulated analyst labeling
# -> incremental fine-tune using Replay + EWC to mitigate catastrophic forgetting.
#
# Usage: python incremental_ids_pipeline.py
# Requirements: torch, scikit-learn, xgboost, numpy
# pip install torch scikit-learn xgboost numpy

import warnings
warnings.filterwarnings("ignore")
import random, copy
from collections import deque, defaultdict

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.svm import OneClassSVM
from sklearn.cluster import DBSCAN, MiniBatchKMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import xgboost as xgb

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)


class BottleneckAE(nn.Module):
    def __init__(self, input_dim=64, latent_dim=16, hidden=128):
        super().__init__()
        self.encoder = nn.Sequential(nn.Linear(input_dim, hidden), nn.ReLU(), nn.Linear(hidden, latent_dim))
        self.decoder = nn.Sequential(nn.Linear(latent_dim, hidden), nn.ReLU(), nn.Linear(hidden, input_dim))
    def forward(self, x):
        z = self.encoder(x)
        recon = self.decoder(z)
        return recon, z


class DetectorAE(nn.Module):
    def __init__(self, input_dim=16, hidden=64):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(input_dim, hidden), nn.ReLU(),
                                 nn.Linear(hidden, int(hidden/2)), nn.ReLU(),
                                 nn.Linear(int(hidden/2), input_dim))
    def forward(self, x):
        return self.net(x)


class ReplayBuffer:
    def __init__(self, capacity=2000):
        self.capacity = capacity
        self.buffer = deque(maxlen=capacity)
    def add(self, x: np.ndarray, y: int):
        self.buffer.append((x.copy(), int(y)))
    def sample(self, k: int):
        k = min(k, len(self.buffer))
        if k == 0:
            return np.zeros((0,)), np.zeros((0,))
        batch = random.sample(self.buffer, k)
        xs = np.stack([b[0] for b in batch], axis=0)
        ys = np.array([b[1] for b in batch], dtype=np.int64)
        return xs, ys
    def __len__(self):
        return len(self.buffer)
    def all(self):
        xs = np.stack([b[0] for b in self.buffer], axis=0) if len(self.buffer)>0 else np.zeros((0,))
        ys = np.array([b[1] for b in self.buffer], dtype=np.int64) if len(self.buffer)>0 else np.zeros((0,))
        return xs, ys


class EWC:
    def __init__(self, model: nn.Module, data: np.ndarray, device='cpu'):
        self.device = device
        self.model = copy.deepcopy(model).to(self.device)
        self.params = {n: p.clone().detach() for n, p in self.model.named_parameters() if p.requires_grad}
        self.fisher = {n: torch.zeros_like(p, device=self.device) for n, p in self.model.named_parameters() if p.requires_grad}
        self._estimate_fisher(data)

    def _estimate_fisher(self, data: np.ndarray, samples=200):
        if data.size == 0:
            return
        idxs = np.random.choice(len(data), size=min(len(data), samples), replace=False)
        self.model.train()
        for i in idxs:
            x = torch.from_numpy(data[i]).float().to(self.device).unsqueeze(0)
            self.model.zero_grad()
            out = self.model(x)
            if isinstance(out, tuple):
                recon = out[0]
            else:
                recon = out
            loss = F.mse_loss(recon, x)
            loss.backward()
            for n, p in self.model.named_parameters():
                if p.grad is not None:
                    self.fisher[n] += (p.grad.detach() ** 2)
            self.model.zero_grad()
        for n in self.fisher:
            self.fisher[n] = self.fisher[n] / float(len(idxs))

    def penalty(self, model: nn.Module):
        loss = 0.0
        for n, p in model.named_parameters():
            if n in self.fisher:
                _loss = self.fisher[n] * (p - self.params[n]).pow(2)
                loss += _loss.sum()
        return loss


class OODDetectorOCSVM:
    def __init__(self, ocsvm_model: OneClassSVM, threshold=None):
        self.ocsvm = ocsvm_model
        self.threshold = threshold
    def fit(self, Z: np.ndarray):
        self.ocsvm.fit(Z)
    def predict(self, z: np.ndarray):
        single = False
        if z.ndim == 1:
            z = z[np.newaxis, :]
            single = True
        pred = self.ocsvm.predict(z)
        if single:
            return int(pred[0])
        return pred


def cluster_unknowns(X: np.ndarray, method='dbscan', **kwargs):
    if len(X) == 0:
        return np.array([])
    Xs = StandardScaler().fit_transform(X)
    if method == 'dbscan':
        eps = kwargs.get('eps', 0.6)
        min_samples = kwargs.get('min_samples', 5)
        cl = DBSCAN(eps=eps, min_samples=min_samples).fit(Xs)
        return cl.labels_
    elif method == 'mbk':
        k = kwargs.get('k', 3)
        mbk = MiniBatchKMeans(n_clusters=k, batch_size=64, random_state=SEED).fit(Xs)
        return mbk.labels_
    else:
        return np.zeros(len(X), dtype=int)


def make_synthetic_classes(input_dim=64, base_classes=3, samples_per_class=150):
    rng = np.random.RandomState(SEED)
    Xs, Ys = [], []
    for c in range(base_classes):
        mu = rng.randn(input_dim) * (1 + c*0.5)
        cov = np.eye(input_dim) * 0.12
        xs = rng.multivariate_normal(mu, cov, size=samples_per_class)
        ys = np.full((samples_per_class,), c, dtype=int)
        Xs.append(xs); Ys.append(ys)
    X = np.concatenate(Xs, axis=0)
    Y = np.concatenate(Ys, axis=0)
    perm = rng.permutation(len(X))
    return X[perm], Y[perm]


def make_zero_day(input_dim=64, shift=5.0, size=200):
    rng = np.random.RandomState(999)
    mu = rng.randn(input_dim) * 1.5 + shift
    xs = rng.multivariate_normal(mu, np.eye(input_dim)*0.15, size=size)
    ys = np.full((size,), 999, dtype=int)
    return xs, ys


def train_bAE(model: BottleneckAE, X: np.ndarray, epochs=10, batch_size=128, lr=1e-3, device='cpu', replay=None):
    model.to(device); model.train()
    opt = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    Xtensor = torch.from_numpy(X).float().to(device)
    dataset = torch.utils.data.TensorDataset(Xtensor)
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    for epoch in range(epochs):
        for (xb,) in loader:
            opt.zero_grad()
            recon, _ = model(xb)
            loss = criterion(recon, xb)
            loss.backward()
            opt.step()
        if replay is not None and len(replay) > 0:
            rx, ry = replay.sample(min(256, len(replay)))
            if len(rx) > 0:
                rxt = torch.from_numpy(rx).float().to(device)
                for _ in range(2):
                    opt.zero_grad()
                    recon_r, _ = model(rxt)
                    loss_r = criterion(recon_r, rxt)
                    loss_r.backward()
                    opt.step()
    model.to('cpu')
    return model


def train_detector_ae(model: DetectorAE, Z_normal: np.ndarray, epochs=20, batch_size=64, lr=1e-3, device='cpu'):
    model.to(device); model.train()
    opt = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    Xt = torch.from_numpy(Z_normal).float().to(device)
    loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(Xt), batch_size=batch_size, shuffle=True)
    for epoch in range(epochs):
        for (xb,) in loader:
            opt.zero_grad()
            out = model(xb)
            loss = criterion(out, xb)
            loss.backward()
            opt.step()
    model.to('cpu')
    return model


def run_full_demo(verbose=True):
    device = 'cpu'
    input_dim = 64
    latent_dim = 16
    base_classes = 3

    X_base, Y_base = make_synthetic_classes(input_dim=input_dim, base_classes=base_classes, samples_per_class=100)
    X_zd, Y_zd = make_zero_day(input_dim=input_dim, shift=6.0, size=220)
    X_all = np.concatenate([X_base, X_zd], axis=0)
    Y_all = np.concatenate([Y_base, Y_zd], axis=0)

    mask_base = Y_all != 999
    X_base_only = X_all[mask_base]
    Y_base_only = Y_all[mask_base]
    X_train_sup, X_test_sup, y_train_sup, y_test_sup = train_test_split(X_base_only, Y_base_only, test_size=0.2, random_state=SEED)

    X_zd_hold = X_zd[:80]; Y_zd_hold = Y_zd[:80]
    X_test_eval = np.concatenate([X_test_sup, X_zd_hold], axis=0)
    y_test_eval = np.concatenate([y_test_sup, Y_zd_hold], axis=0)

    bAE = BottleneckAE(input_dim=input_dim, latent_dim=latent_dim, hidden=128)
    detector_ae = DetectorAE(input_dim=latent_dim, hidden=64)

    print("[*] Training bottleneck AE on base data...")
    train_bAE(bAE, X_train_sup, epochs=2, batch_size=256, lr=1e-3)
    print("[*] Extracting latent features for base training...")
    bAE.eval()
    with torch.no_grad():
        Z_train = bAE.encoder(torch.from_numpy(X_train_sup).float()).numpy()
        Z_test = bAE.encoder(torch.from_numpy(X_test_sup).float()).numpy()

    ocsvm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.03)
    ood = OODDetectorOCSVM(ocsvm)
    ood.fit(Z_train)

    detector_ae = train_detector_ae(detector_ae, Z_train, epochs=4, batch_size=128, lr=1e-3)

    Z_train_all = bAE.encoder(torch.from_numpy(X_train_sup).float()).detach().numpy()
    xgb_clf = xgb.XGBClassifier(n_estimators=200, learning_rate=0.1, use_label_encoder=False, eval_metric='logloss')
    xgb_clf.fit(Z_train_all, y_train_sup)

    replay = ReplayBuffer(capacity=2000)
    for i in range(0, min(1000, len(X_train_sup))):
        replay.add(X_train_sup[i], int(y_train_sup[i]))

    rng = np.random.RandomState(SEED+1)
    idxs = rng.permutation(len(X_all))
    stream_X = X_all[idxs]
    stream_Y = Y_all[idxs]

    unknown_buffer = []
    buffer_limit = 20 #90
    cluster_method = 'dbscan'
    cluster_eps = 0.7
    cluster_min_samples = 6

    pa_preds = []
    pa_true = []
    detection_flags = []
    stream_count = 0

    periodic_every = 40  #200
    lam_ewc = 200.0

    ewc_bae = None
    ewc_detector = None

    print("[*] Starting stream...")
    for i, (x_raw, y_true) in enumerate(zip(stream_X, stream_Y)):
        stream_count += 1
        with torch.no_grad():
            z = bAE.encoder(torch.from_numpy(x_raw).float().unsqueeze(0)).numpy()[0]
        oc_pred = ood.predict(z)
        recon = detector_ae(torch.from_numpy(z).float().unsqueeze(0)).detach().numpy()[0]
        recon_err = float(np.mean((recon - z)**2))
        is_unknown = (oc_pred == -1) or (recon_err > np.percentile(np.mean(bAE.encoder(torch.from_numpy(X_train_sup).float()).detach().numpy(), axis=1), 95) + 0.0)
        if is_unknown:
            unknown_buffer.append(x_raw.copy())
            detection_flags.append(1)
            final_label = -1
        else:
            pred = xgb_clf.predict(z.reshape(1, -1))[0]
            final_label = int(pred)
            detection_flags.append(0)
        pa_preds.append(final_label)
        pa_true.append(int(y_true) if y_true != 999 else -2)

        if (len(unknown_buffer) >= buffer_limit) or (stream_count % periodic_every == 0 and len(unknown_buffer) > 20):
            print(f"[+] Handling unknown buffer of size {len(unknown_buffer)} at stream idx {i}")
            Xbuf = np.stack(unknown_buffer, axis=0)
            with torch.no_grad():
                Zbuf = bAE.encoder(torch.from_numpy(Xbuf).float()).numpy()
            labels = cluster_unknowns(Zbuf, method=cluster_method, eps=cluster_eps, min_samples=cluster_min_samples)
            cluster_member_idxs = defaultdict(list)
            for idxc, cl in enumerate(labels):
                if cl != -1:
                    cluster_member_idxs[cl].append(idxc)
            reps = []
            for cl, idxs_list in cluster_member_idxs.items():
                centroid = Zbuf[idxs_list].mean(axis=0)
                reps.append(centroid)
            rep_labels = []
            for rep in reps:
                dists = np.linalg.norm(stream_X - rep, axis=1)
                nearest_idx = int(np.argmin(dists))
                rep_labels.append(int(stream_Y[nearest_idx]) if stream_Y[nearest_idx] != 999 else 999)
            print(f"   -> clusters found: {list(cluster_member_idxs.keys())}; rep_labels: {rep_labels}")
            new_labeled = []
            for cl_idx, lab in zip(cluster_member_idxs.keys(), rep_labels):
                member_idxs = cluster_member_idxs[cl_idx]
                member_raw = Xbuf[member_idxs]
                if lab == 999:
                    new_label_id = max([int(v) for v in y_train_sup] + [0]) + 1 + random.randint(0,0)
                    lab_assigned = new_label_id
                else:
                    lab_assigned = lab
                for s in member_raw[:40]:
                    new_labeled.append((s, lab_assigned))
            if len(new_labeled) == 0:
                unknown_buffer = unknown_buffer[-int(buffer_limit/4):]
                continue
            new_X = np.stack([t[0] for t in new_labeled], axis=0)
            new_y = np.array([t[1] for t in new_labeled], dtype=int)
            for xx, yy in zip(new_X, new_y):
                replay.add(xx, int(yy))
            rx, ry = replay.sample(min(600, len(replay)))
            if len(rx) == 0:
                rx = X_train_sup[:200]; ry = y_train_sup[:200]
            with torch.no_grad():
                Z_replay = bAE.encoder(torch.from_numpy(rx).float()).numpy()
            print("   -> Building EWC from replay samples...")
            ewc_bae = EWC(bAE, rx, device=device)
            ewc_detector = EWC(detector_ae, Z_replay, device=device)
            combined_X = np.vstack([rx, new_X])
            print(f"   -> Fine-tuning bAE on {len(combined_X)} samples (replay+new) ...")
            bAE.to(device); bAE.train()
            opt = optim.Adam(bAE.parameters(), lr=5e-4)
            criterion = nn.MSELoss()
            Xct = torch.from_numpy(combined_X).float().to(device)
            loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(Xct), batch_size=128, shuffle=True)
            for epoch in range(6):
                for (xb,) in loader:
                    opt.zero_grad()
                    recon, _ = bAE(xb)
                    loss = criterion(recon, xb)
                    if ewc_bae is not None:
                        loss = loss + (lam_ewc * 1e-3) * ewc_bae.penalty(bAE)
                    loss.backward(); opt.step()
            bAE.to('cpu')
            with torch.no_grad():
                Z_new = bAE.encoder(torch.from_numpy(new_X).float()).numpy()
                Z_replay = bAE.encoder(torch.from_numpy(rx).float()).numpy()
            Z_comb = np.vstack([Z_replay, Z_new])
            detector_ae = train_detector_ae(detector_ae, Z_comb, epochs=8, batch_size=64, lr=1e-3)
            with torch.no_grad():
                Z_for_oc = bAE.encoder(torch.from_numpy(rx).float()).numpy()
            try:
                ood.fit(Z_for_oc)
            except Exception as e:
                print("   -> OCSVM retrain failed", e)
            rx_sup, ry_sup = replay.sample(min(800, len(replay)))
            with torch.no_grad():
                Z_sup = bAE.encoder(torch.from_numpy(rx_sup).float()).numpy()
            try:
                xgb_clf = xgb.XGBClassifier(n_estimators=200, learning_rate=0.05, use_label_encoder=False, eval_metric='logloss')
                xgb_clf.fit(Z_sup, ry_sup)
            except Exception as e:
                print("   -> XGBoost incremental update failed:", e)
            unknown_buffer = []
            print("   -> Incremental update done. Continue streaming...")

    with torch.no_grad():
        Z_eval = bAE.encoder(torch.from_numpy(X_test_eval).float()).numpy()
    preds = []
    for z in Z_eval:
        ocp = ood.predict(z)
        recon = detector_ae(torch.from_numpy(z).float().unsqueeze(0)).detach().numpy()[0]
        recon_err = float(np.mean((recon - z)**2))
        threshold_recon = np.percentile(np.mean(bAE.encoder(torch.from_numpy(X_train_sup).float()).detach().numpy(), axis=1), 95)
        if (ocp == -1) or (recon_err > threshold_recon):
            preds.append(-1)
        else:
            p = xgb_clf.predict(z.reshape(1, -1))[0]
            preds.append(int(p))
    gt = np.array([int(v) if v!=999 else -1 for v in y_test_eval])
    mask_known = gt != -1
    known_acc = accuracy_score(gt[mask_known], np.array(preds)[mask_known]) if mask_known.sum()>0 else 0.0
    mask_zd = gt == -1
    zd_detected = np.array(preds)[mask_zd] == -1 if mask_zd.sum()>0 else np.array([])
    zd_recall = float(np.mean(zd_detected)) if mask_zd.sum()>0 else 0.0
    print("\\n=== Demo Summary ===")
    print(f"Streamed samples processed: {len(stream_X)}")
    print(f"Final supervised-known accuracy on held-out known samples: {known_acc:.4f}")
    print(f"Zero-day detection recall on holdout: {zd_recall:.4f}")
    if mask_known.sum()>0:
        print("Known-classes F1 (macro):", f1_score(gt[mask_known], np.array(preds)[mask_known], average='macro'))
    return {"known_acc": known_acc, "zd_recall": zd_recall, "preds": preds, "gt": gt}


if __name__ == '__main__':
    run_full_demo(verbose=True)


[*] Training bottleneck AE on base data...
[*] Extracting latent features for base training...
[*] Starting stream...
[+] Handling unknown buffer of size 20 at stream idx 19
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 34
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 49
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 64
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 79
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 94
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 109
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 124
   -> clusters found: []; rep_labels: []
[+] Handling unknown buffer of size 20 at stream idx 139
   -> clusters found: []; rep_labels: []
[+] Ha