In [None]:
import os, sys, csv, random
from pathlib import Path

import numpy as np, torch, torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import TensorDataset, DataLoader
from tqdm.auto import tqdm

ROOT = os.getcwd()
# PKG_DIR = "/home/cis6022/StarLiteGAN/packages"
# if PKG_DIR not in sys.path: sys.path.insert(0, PKG_DIR)
DATA_DIR = Path(ROOT) / "data"; DATA_DIR.mkdir(parents=True, exist_ok=True)

seed = 1337
random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu"); device_type = "cuda" if use_cuda else "cpu"
if use_cuda: torch.cuda.manual_seed_all(seed); torch.backends.cudnn.benchmark = True

print("ROOT:", ROOT); print("device:", device); # print("PKG_DIR:", PKG_DIR); print("DATA_DIR:", str(DATA_DIR))

ROOT: /home/cis6022/Adam/varMax
PKG_DIR: /home/cis6022/StarLiteGAN/packages
DATA_DIR: /home/cis6022/Adam/varMax/data
device: cuda


  from .autonotebook import tqdm as notebook_tqdm


In [51]:
# --- DATA LOAD + PREP (mix benign 50/50 main vs live across ALL splits) ---

MAIN_CSV="CICIDS-2017_preprocessed.csv"
LIVE_CSV="CICIDS_live_capture_preprocessed2.csv"

def _read_preprocessed(name, feature_names=None):
    p=(DATA_DIR/name) if (DATA_DIR/name).exists() else (Path(ROOT)/name)
    print("Loading:", p)
    df=pd.read_csv(p)
    if "label" in df.columns and "Label" not in df.columns: df=df.rename(columns={"label":"Label"})
    if "Label" not in df.columns: raise SystemExit(f"{name} missing Label column. Columns: {list(df.columns)[:20]} ...")

    df=df.replace([np.inf,-np.inf],np.nan).dropna()

    # drop CICIDS meta (not features)
    for c in ("Flow ID","Source IP","Source Port","Destination IP","Destination Port","Timestamp"):
        if c in df.columns: df=df.drop(columns=c)

    labels=df["Label"].astype(str).str.strip()
    y=(labels.str.upper()!="BENIGN").astype(np.int64).to_numpy()  # 0=benign, 1=malicious

    X_df=df.drop(columns=["Label"]).select_dtypes(include=[np.number])

    # enforce exact same columns/order as training feature_names
    if feature_names is not None:
        for c in feature_names:
            if c not in X_df.columns: X_df[c]=0.0
        X_df=X_df[feature_names]

    return X_df, y

# ---- load main first (defines feature_names/order) ----
X_main_df, y_main = _read_preprocessed(MAIN_CSV, feature_names=None)
feature_names = list(X_main_df.columns)
num_features  = int(len(feature_names))
X_main = X_main_df.to_numpy(dtype=np.float32)

# ---- load live (forced to same feature_names/order) ----
X_live_df, y_live = _read_preprocessed(LIVE_CSV, feature_names=feature_names)
X_live = X_live_df.to_numpy(dtype=np.float32)

# ---- desired class counts come from MAIN (keeps your original class mix) ----
seed = int(seed) if "seed" in globals() else 1337
rng  = np.random.default_rng(seed)

idx0_main = np.where(y_main==0)[0]
idx1_main = np.where(y_main==1)[0]
idx0_live = np.where(y_live==0)[0]

max_per_class = base_cfg.get("max_per_class",None) if "base_cfg" in globals() else None
B_des = len(idx0_main)
M_des = len(idx1_main)
if max_per_class is not None:
    max_per_class=int(max_per_class)
    B_des=min(B_des, max_per_class)
    M_des=min(M_des, max_per_class)

# split sizes (70/20/10), per-class so ratios stay consistent
B_tr=int(B_des*0.70); B_va=int(B_des*0.20); B_te=B_des-B_tr-B_va
M_tr=int(M_des*0.70); M_va=int(M_des*0.20); M_te=M_des-M_tr-M_va

# "50/50 benign" per split (exact when even; closest possible when odd)
Btr_live=B_tr//2; Bva_live=B_va//2; Bte_live=B_te//2
Btr_main=B_tr-Btr_live; Bva_main=B_va-Bva_live; Bte_main=B_te-Bte_live
B_live_need=Btr_live+Bva_live+Bte_live
B_main_need=Btr_main+Bva_main+Bte_main

# shuffle pools
idx0_main = rng.permutation(idx0_main)[:B_main_need]
idx1_main = rng.permutation(idx1_main)[:M_des]
idx0_live_sh = rng.permutation(idx0_live)

# if live benign is short, oversample WITH replacement to satisfy the 50/50 requirement
if len(idx0_live_sh) >= B_live_need:
    idx0_live_use = idx0_live_sh[:B_live_need]
else:
    if len(idx0_live_sh)==0:
        raise SystemExit("Live CSV has 0 BENIGN rows; cannot make 50% benign-from-live splits.")
    need=B_live_need-len(idx0_live_sh)
    idx0_live_use = np.concatenate([idx0_live_sh, rng.choice(idx0_live_sh, size=need, replace=True)])
    print(f"[warn] live benign short: have {len(idx0_live_sh)} need {B_live_need} -> oversampling {need} rows")

# slice into train/val/test
p=0
btr_live=idx0_live_use[p:p+Btr_live]; p+=Btr_live
bva_live=idx0_live_use[p:p+Bva_live]; p+=Bva_live
bte_live=idx0_live_use[p:p+Bte_live]; p+=Bte_live

q=0
btr_main=idx0_main[q:q+Btr_main]; q+=Btr_main
bva_main=idx0_main[q:q+Bva_main]; q+=Bva_main
bte_main=idx0_main[q:q+Bte_main]; q+=Bte_main

r=0
mtr=idx1_main[r:r+M_tr]; r+=M_tr
mva=idx1_main[r:r+M_va]; r+=M_va
mte=idx1_main[r:r+M_te]; r+=M_te

# build splits (and shuffle within each split)
def _mk_split(b_main_idx, b_live_idx, m_idx):
    Xb_main = X_main[b_main_idx]
    Xb_live = X_live[b_live_idx]
    Xm      = X_main[m_idx]
    Xs = np.concatenate([Xb_main, Xb_live, Xm], axis=0).astype(np.float32, copy=False)
    ys = np.concatenate([np.zeros(len(Xb_main)+len(Xb_live),dtype=np.int64),
                         np.ones(len(Xm),dtype=np.int64)], axis=0)
    perm = rng.permutation(len(ys))
    return Xs[perm], ys[perm]

X_tr, y_tr = _mk_split(btr_main, btr_live, mtr)
X_va, y_va = _mk_split(bva_main, bva_live, mva)
X_te, y_te = _mk_split(bte_main, bte_live, mte)

# ---- normalize (fit on train only) ----
scaler=StandardScaler()
X_tr=scaler.fit_transform(X_tr).astype(np.float32)
X_va=scaler.transform(X_va).astype(np.float32)
X_te=scaler.transform(X_te).astype(np.float32)

scaler_mean=scaler.mean_.astype(np.float32).tolist()
scaler_scale=scaler.scale_.astype(np.float32).tolist()

# ---- torch datasets (Conv1d: (B,1,F)) ----
X_tr=torch.from_numpy(X_tr).unsqueeze(1); y_tr=torch.from_numpy(y_tr).float()
X_va=torch.from_numpy(X_va).unsqueeze(1); y_va=torch.from_numpy(y_va).float()
X_te=torch.from_numpy(X_te).unsqueeze(1); y_te=torch.from_numpy(y_te).float()

train_ds=TensorDataset(X_tr,y_tr); val_ds=TensorDataset(X_va,y_va); test_ds=TensorDataset(X_te,y_te)

BATCH_SIZE=int(base_cfg.get("batch_size",1000)) if "base_cfg" in globals() else 1000
NUM_WORKERS=int(base_cfg.get("num_workers",8)) if "base_cfg" in globals() else 8
mp_ctx=torch.multiprocessing.get_context("spawn")

train_dl=DataLoader(train_ds,batch_size=BATCH_SIZE,shuffle=True ,num_workers=NUM_WORKERS,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
val_dl  =DataLoader(val_ds  ,batch_size=BATCH_SIZE,shuffle=False,num_workers=NUM_WORKERS,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
test_dl =DataLoader(test_ds ,batch_size=BATCH_SIZE,shuffle=False,num_workers=NUM_WORKERS,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)

pos_weight_value=float((y_tr.numel()-y_tr.sum()).clamp_min(1)/y_tr.sum().clamp_min(1))  # n_neg/n_pos

print("X shape:",tuple(X_tr.shape),"num_features:",num_features)
print("splits:",len(train_ds),len(val_ds),len(test_ds))
print("benign/mal (MAIN original):",int((y_main==0).sum()),int((y_main==1).sum()))
print("benign live available:",int((y_live==0).sum()),"| live used total:",B_live_need)
print("benign live per split train/val/test:",Btr_live,Bva_live,Bte_live)
print("pos rate train/val/test:",float(y_tr.mean()),float(y_va.mean()),float(y_te.mean()))
print("pos_weight_value (n_neg/n_pos):",pos_weight_value)
print("feature_names:",len(feature_names),"| scaler_mean/scale:",len(scaler_mean),len(scaler_scale))

Loading: /home/cis6022/Adam/varMax/data/CICIDS-2017_preprocessed.csv
Loading: /home/cis6022/Adam/varMax/data/CICIDS_live_capture_preprocessed2.csv
[warn] live benign short: have 13970 need 181053 -> oversampling 167083 rows
X shape: (987177, 1, 1504) num_features: 1504
splits: 987177 282050 141028
benign/mal (MAIN original): 362108 1048147
benign live available: 13970 | live used total: 181053
benign live per split train/val/test: 126737 36210 18106
pos rate train/val/test: 0.7432324886322021 0.7432334423065186 0.7432283163070679
pos_weight_value (n_neg/n_pos): 0.3454740345478058
feature_names: 1504 | scaler_mean/scale: 1504 1504


In [52]:
import os, csv, json, math, time
from pathlib import Path
import numpy as np, pandas as pd
import torch, torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import (confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve, auc, f1_score, accuracy_score, balanced_accuracy_score)

# --------------------------
# dirs / config I/O
# --------------------------
def prepare_run_dirs(results_root):
    root=Path(results_root); root.mkdir(parents=True,exist_ok=True)
    ids=[]
    for p in root.iterdir():
        if p.is_dir() and p.name.startswith("I"):
            try: ids.append(int(p.name.split("_")[0][1:]))
            except: pass
    k=1
    while k in ids: k+=1
    run_dir=root/f"I{k}"
    (run_dir/"models").mkdir(parents=True,exist_ok=True); (run_dir/"final_metrics").mkdir(parents=True,exist_ok=True)
    return run_dir

def make_grid_dir_name(run_dir,grid_idx,param_grid):
    run_dir=Path(run_dir); kv="__".join(f"{k}="+"|".join(str(v) for v in vs) for k,vs in param_grid.items())
    grid_dir=run_dir/f"g{grid_idx}__{kv}"
    grid_dir.mkdir(parents=True,exist_ok=True)
    return grid_dir

def write_config_csv(exp_dir,cfg,epochs,extras=None):
    exp_dir=Path(exp_dir); extras={} if extras is None else dict(extras)
    row=dict(cfg); row.update({"epochs":int(epochs)}); row.update(extras)
    path=exp_dir/"config.csv"
    with path.open("w",newline="") as f:
        w=csv.DictWriter(f,fieldnames=list(row.keys())); w.writeheader(); w.writerow(row)

def _save_csv(path,rows):
    path=Path(path); path.parent.mkdir(parents=True,exist_ok=True)
    if not rows: return
    cols=list(rows[0].keys())
    with path.open("w",newline="") as f:
        w=csv.DictWriter(f,fieldnames=cols); w.writeheader(); [w.writerow(r) for r in rows]

def _df_print(rows,sort_by=None,head=None,title=None,round_=4):
    if not rows: print("(no rows)"); return
    df=pd.DataFrame(rows)
    if sort_by is not None and sort_by in df.columns: df=df.sort_values(sort_by,ascending=False)
    if head is not None: df=df.head(int(head))
    if round_ is not None:
        for c in df.columns:
            if pd.api.types.is_float_dtype(df[c]): df[c]=df[c].round(round_)
    if title: print(f"\n{title}")
    print(df.to_string(index=False))

# --------------------------
# data cache + loader rebuild (supports grid over batch_size/max_per_class)
# also caches feature order + scaler stats for live inference export
# --------------------------
_DATA_CACHE=None

def init_data_cache_from_existing(train_ds=None,val_ds=None,test_ds=None,feature_names=None,scaler=None,scaler_mean=None,scaler_scale=None):
    global _DATA_CACHE
    if _DATA_CACHE is not None: return _DATA_CACHE
    if train_ds is None: train_ds=globals().get("train_ds",None)
    if val_ds   is None: val_ds  =globals().get("val_ds"  ,None)
    if test_ds  is None: test_ds =globals().get("test_ds" ,None)
    if train_ds is None or val_ds is None or test_ds is None: return None

    X=torch.cat([train_ds.tensors[0],val_ds.tensors[0],test_ds.tensors[0]],0).cpu().numpy().astype(np.float32)  # (N,1,F)
    y0=torch.cat([train_ds.tensors[1],val_ds.tensors[1],test_ds.tensors[1]],0).cpu().numpy()
    y_int=((y0>0.5).astype(np.int64) if y0.dtype!=np.int64 else y0.astype(np.int64))
    y_f=y_int.astype(np.float32)

    n_tr,n_va,n_te=len(train_ds),len(val_ds),len(test_ds)

    if feature_names is None: feature_names=globals().get("feature_names",None)
    if scaler is None: scaler=globals().get("scaler",None)
    if scaler_mean is None: scaler_mean=globals().get("scaler_mean",None)
    if scaler_scale is None: scaler_scale=globals().get("scaler_scale",None)
    if scaler is not None:
        if scaler_mean is None and hasattr(scaler,"mean_"):  scaler_mean=scaler.mean_
        if scaler_scale is None and hasattr(scaler,"scale_"): scaler_scale=scaler.scale_

    _DATA_CACHE={
        "X":X, "y_int":y_int, "y_f":y_f,
        "n_tr":n_tr, "n_va":n_va, "n_te":n_te,
        "num_features":int(X.shape[-1]),
        "feature_names":(list(feature_names) if feature_names is not None else None),
        "scaler_mean":(np.asarray(scaler_mean,dtype=np.float32).tolist() if scaler_mean is not None else None),
        "scaler_scale":(np.asarray(scaler_scale,dtype=np.float32).tolist() if scaler_scale is not None else None),
    }
    return _DATA_CACHE

def get_ckpt_preproc_fields():
    c=init_data_cache_from_existing()
    if c is None: return {}
    out={}
    if c.get("feature_names") is not None: out["feature_names"]=c["feature_names"]
    if c.get("scaler_mean")  is not None: out["scaler_mean"]=c["scaler_mean"]
    if c.get("scaler_scale") is not None: out["scaler_scale"]=c["scaler_scale"]
    return out

def make_loaders(cfg,seed=1337,use_cuda=None):
    cache=init_data_cache_from_existing()
    bs=int(cfg.get("batch_size",1000)); nw=int(cfg.get("num_workers",8))
    use_cuda=torch.cuda.is_available() if use_cuda is None else bool(use_cuda)
    mp_ctx=torch.multiprocessing.get_context("spawn")

    # fallback: just rewrap existing datasets with new bs/nw
    if cache is None:
        td,vd,sd=globals().get("train_ds"),globals().get("val_ds"),globals().get("test_ds")
        loss=str(cfg.get("loss","ce")).lower()
        ysum=td.tensors[1].float().sum()
        pos_weight=float((td.tensors[1].numel()-ysum).clamp_min(1)/ysum.clamp_min(1))
        return (DataLoader(td,batch_size=bs,shuffle=True ,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx),
                DataLoader(vd,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx),
                DataLoader(sd,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx),
                int(td.tensors[0].shape[-1]), pos_weight)

    X=cache["X"]; y_int=cache["y_int"]; y_f=cache["y_f"]
    n_tr,n_va,n_te=cache["n_tr"],cache["n_va"],cache["n_te"]
    X_tr,X_va,X_te=X[:n_tr],X[n_tr:n_tr+n_va],X[n_tr+n_va:n_tr+n_va+n_te]
    ytr_i,yva_i,yte_i=y_int[:n_tr],y_int[n_tr:n_tr+n_va],y_int[n_tr+n_va:n_tr+n_va+n_te]
    ytr_f,yva_f,yte_f=y_f[:n_tr],y_f[n_tr:n_tr+n_va],y_f[n_tr+n_va:n_tr+n_va+n_te]

    # optional: cap training set per class (deterministic)
    mpc=cfg.get("max_per_class",None)
    if mpc is not None:
        rng=np.random.default_rng(int(cfg.get("seed",seed)))
        mpc=int(mpc)
        idx0=np.where(ytr_i==0)[0]; idx1=np.where(ytr_i==1)[0]
        if len(idx0)>mpc: idx0=rng.choice(idx0,size=mpc,replace=False)
        if len(idx1)>mpc: idx1=rng.choice(idx1,size=mpc,replace=False)
        idx=np.concatenate([idx0,idx1]); rng.shuffle(idx)
        X_tr,ytr_i,ytr_f=X_tr[idx],ytr_i[idx],ytr_f[idx]

    loss=str(cfg.get("loss","ce")).lower()
    y_tr=torch.from_numpy(ytr_i) if loss=="ce" else torch.from_numpy(ytr_f)
    y_va=torch.from_numpy(yva_i) if loss=="ce" else torch.from_numpy(yva_f)
    y_te=torch.from_numpy(yte_i) if loss=="ce" else torch.from_numpy(yte_f)
    X_tr=torch.from_numpy(X_tr); X_va=torch.from_numpy(X_va); X_te=torch.from_numpy(X_te)

    train_ds=TensorDataset(X_tr,y_tr); val_ds=TensorDataset(X_va,y_va); test_ds=TensorDataset(X_te,y_te)
    train_dl=DataLoader(train_ds,batch_size=bs,shuffle=True ,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
    val_dl  =DataLoader(val_ds  ,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
    test_dl =DataLoader(test_ds ,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)

    pos_weight=float((len(y_tr)-float(y_tr.float().sum()))/max(float(y_tr.float().sum()),1.0))
    return train_dl,val_dl,test_dl,int(cache["num_features"]),pos_weight


In [53]:
base_cfg = {
    # data / loader
    "batch_size": 1000,
    "num_workers": 8,
    "max_per_class": None,

    # model (3 conv blocks + 2 FC)
    "conv_channels": (32, 64, 128),
    "kernel_sizes": (5, 5, 3),
    "pools": (2, 2, 2),
    "fc_hidden": (256, 64),
    "conv_dropout": (0.05, 0.05, 0.10),   # fixed unless you later decide to sweep
    "fc_dropout": (0.50, 0.50),           # fixed unless you later decide to sweep
    "negative_slope": 0.01,
    "use_bn": False,

    # optimization
    "loss": "ce",                         # default
    "optimizer": "Adam",
    "lr": 1e-2,
    "weight_decay": 0.0,

    # scheduler (off by default; cfg enables it by setting gamma>0)
    "sched_gamma": 0.0,
    "sched_step_size": 10,

    # training extras (optional; keep 0 to disable)
    "patience": 0,
    "grad_clip": 1.0,
    "amp": False,
}

In [None]:
RESULTS_ROOT=os.path.join(ROOT,"results"); run_dir=prepare_run_dirs(RESULTS_ROOT); print("run_dir:",run_dir)

EPOCHS=100
base_cfg={
    "batch_size":1000,"num_workers":8,"max_per_class":None,
    "conv_channels":(32,64,128),"kernel_sizes":(5,5,3),"pools":(2,2,2),
    "fc_hidden":(256,64),"conv_dropout":(0.05,0.05,0.10),"fc_dropout":(0.50,0.50),
    "negative_slope":0.01,"use_bn":False,
    "loss":"ce","optimizer":"Adam","lr":1e-2,"weight_decay":0.0,
    "sched_gamma":0.0,"sched_step_size":10,
    "patience":0,"grad_clip":1.0,"amp":False,
}

grid_specs=[
    {"params":{"lr":[1e-1,3e-2,1e-2,3e-3,1e-3],"weight_decay":[0,1e-6,1e-5,1e-4,1e-3]},"epochs":25},
    {"params":{"sched_gamma":[0.8,0.9],"sched_step_size":[5,10,20]},"epochs":25},
    {"params":{"batch_size":[256,512,1000,2048],"lr":[1e-1,3e-2,1e-2,3e-3,1e-3]},"epochs":25},
    {"params":{"conv_channels":[(16,32,64),(32,64,128),(64,128,256)],"fc_hidden":[(128,64),(256,64),(512,128)]},"epochs":25},
    {"params":{"kernel_sizes":[(3,3,3),(5,5,3),(7,5,3)],"pools":[(2,2,2),(4,2,2)]},"epochs":25},
    {"params":{"max_per_class":[None,300_000,100_000],"lr":[1e-1,3e-2,1e-2,3e-3,1e-3]},"epochs":25},
    {"params":{"loss":["ce","bce"],"lr":[1e-2,3e-3,1e-3]},"epochs":25},
]

exp_dir=Path(run_dir)/f"final_{EPOCHS}ep"; (exp_dir/"models").mkdir(parents=True,exist_ok=True); (exp_dir/"final_metrics").mkdir(parents=True,exist_ok=True)
write_config_csv(exp_dir,base_cfg,EPOCHS,extras={"num_features":int(num_features)})

history,best_val_f1,metrics_test=run_experiment(base_cfg,EPOCHS,exp_dir,seed=base_cfg.get("seed",1337),score_key="val_best_f1")
print("best_val_macro_f1:",best_val_f1); print("test_metrics:",metrics_test)

# Uncomment to run grids (this can be a lot of runs):
# all_rows=run_grid_searches(run_dir,base_cfg,grid_specs,epochs_default=EPOCHS,seed=base_cfg.get("seed",1337))


run_dir: /home/cis6022/Adam/varMax/results/I5


  amp=bool(cfg.get("amp",False)) and use_cuda; scaler=torch.cuda.amp.GradScaler(enabled=amp)
                                                  

ep001 tr_loss=0.0487 val_loss=0.0109 val_bestF1=0.9987 val_thr=0.4383 val_roc=0.9999


                                                  

ep002 tr_loss=0.0350 val_loss=0.0094 val_bestF1=0.9985 val_thr=0.5771 val_roc=0.9999


                                                  

ep003 tr_loss=0.0238 val_loss=0.0094 val_bestF1=0.9990 val_thr=0.6159 val_roc=0.9998


                                                  

ep004 tr_loss=0.0449 val_loss=0.0279 val_bestF1=0.9988 val_thr=0.5845 val_roc=0.9998


                                                  

ep005 tr_loss=0.0887 val_loss=0.0091 val_bestF1=0.9989 val_thr=0.8103 val_roc=0.9999


                                                  

ep006 tr_loss=0.0169 val_loss=0.0064 val_bestF1=0.9991 val_thr=0.6551 val_roc=0.9999


                                                  

ep007 tr_loss=0.1101 val_loss=0.0138 val_bestF1=0.9988 val_thr=0.6100 val_roc=0.9999


                                                  

ep008 tr_loss=0.0132 val_loss=0.0065 val_bestF1=0.9992 val_thr=0.5045 val_roc=0.9999


                                                  

ep009 tr_loss=28.7291 val_loss=0.0057 val_bestF1=0.9992 val_thr=0.8604 val_roc=1.0000


                                                  

ep010 tr_loss=0.8543 val_loss=0.0064 val_bestF1=0.9992 val_thr=0.8189 val_roc=0.9999


                                                  

ep011 tr_loss=0.0183 val_loss=0.0129 val_bestF1=0.9991 val_thr=0.5109 val_roc=0.9999


                                                  

ep012 tr_loss=4.7544 val_loss=0.0085 val_bestF1=0.9990 val_thr=0.5254 val_roc=1.0000


                                                  

ep013 tr_loss=1.4999 val_loss=0.0078 val_bestF1=0.9992 val_thr=0.8511 val_roc=1.0000


                                                  

ep014 tr_loss=0.0304 val_loss=0.0074 val_bestF1=0.9992 val_thr=0.7272 val_roc=1.0000


                                                  

ep015 tr_loss=1.5762 val_loss=0.0074 val_bestF1=0.9991 val_thr=0.8742 val_roc=1.0000


                                                  

ep016 tr_loss=2.6909 val_loss=0.0159 val_bestF1=0.9985 val_thr=0.9485 val_roc=0.9999


                                                  

ep017 tr_loss=0.0195 val_loss=0.0063 val_bestF1=0.9992 val_thr=0.5115 val_roc=1.0000


                                                  

ep018 tr_loss=0.4708 val_loss=0.0064 val_bestF1=0.9994 val_thr=0.6452 val_roc=1.0000


                                                  

ep019 tr_loss=0.0738 val_loss=0.0098 val_bestF1=0.9993 val_thr=0.7903 val_roc=1.0000


                                                  

ep020 tr_loss=0.0613 val_loss=15.0829 val_bestF1=0.9975 val_thr=0.7081 val_roc=0.9950


                                                  

ep021 tr_loss=0.1714 val_loss=0.0061 val_bestF1=0.9993 val_thr=0.8821 val_roc=1.0000


                                                  

ep022 tr_loss=0.0098 val_loss=0.0053 val_bestF1=0.9994 val_thr=0.6476 val_roc=1.0000


                                                  

ep023 tr_loss=0.0742 val_loss=0.0062 val_bestF1=0.9993 val_thr=0.8894 val_roc=1.0000


                                                  

ep024 tr_loss=0.0163 val_loss=0.0095 val_bestF1=0.9993 val_thr=0.2576 val_roc=1.0000


                                                  

ep025 tr_loss=0.1652 val_loss=0.0084 val_bestF1=0.9991 val_thr=0.7694 val_roc=1.0000


                                                 

ep026 tr_loss=0.8278 val_loss=0.0068 val_bestF1=0.9993 val_thr=0.9050 val_roc=1.0000


                                                  

ep027 tr_loss=0.0356 val_loss=0.0067 val_bestF1=0.9993 val_thr=0.8755 val_roc=1.0000


                                                  

ep028 tr_loss=0.0465 val_loss=0.0054 val_bestF1=0.9994 val_thr=0.8442 val_roc=1.0000


                                                  

ep029 tr_loss=0.0191 val_loss=0.0125 val_bestF1=0.9993 val_thr=0.7427 val_roc=1.0000


                                                  

ep030 tr_loss=0.0161 val_loss=0.0081 val_bestF1=0.9993 val_thr=0.8268 val_roc=1.0000


                                                  

ep031 tr_loss=0.0630 val_loss=0.0117 val_bestF1=0.9987 val_thr=0.8309 val_roc=0.9999


                                                  

ep032 tr_loss=0.1369 val_loss=0.0060 val_bestF1=0.9993 val_thr=0.3324 val_roc=1.0000


                                                 

ep033 tr_loss=0.1841 val_loss=0.0073 val_bestF1=0.9993 val_thr=0.8930 val_roc=1.0000


                                                  

ep034 tr_loss=0.1597 val_loss=0.0123 val_bestF1=0.9991 val_thr=0.6854 val_roc=1.0000


                                                  

ep035 tr_loss=0.0132 val_loss=0.0078 val_bestF1=0.9992 val_thr=0.8877 val_roc=1.0000


                                                  

ep036 tr_loss=0.2439 val_loss=0.0144 val_bestF1=0.9991 val_thr=0.9562 val_roc=0.9998


                                                  

ep037 tr_loss=0.0820 val_loss=0.0071 val_bestF1=0.9994 val_thr=0.6256 val_roc=1.0000


                                                  

ep038 tr_loss=0.4443 val_loss=0.0090 val_bestF1=0.9993 val_thr=0.9517 val_roc=0.9999


                                                  

ep039 tr_loss=0.1064 val_loss=0.0085 val_bestF1=0.9993 val_thr=0.8412 val_roc=1.0000


                                                  

ep040 tr_loss=0.0419 val_loss=0.0100 val_bestF1=0.9994 val_thr=0.4935 val_roc=1.0000


                                                  

ep041 tr_loss=2.1428 val_loss=0.0072 val_bestF1=0.9993 val_thr=0.4291 val_roc=1.0000


                                                  

ep042 tr_loss=0.1015 val_loss=0.0053 val_bestF1=0.9995 val_thr=0.7564 val_roc=1.0000


                                                  

ep043 tr_loss=0.1051 val_loss=0.0080 val_bestF1=0.9993 val_thr=0.8534 val_roc=1.0000


                                                  

ep044 tr_loss=0.0625 val_loss=0.0103 val_bestF1=0.9994 val_thr=0.8798 val_roc=1.0000


                                                  

ep045 tr_loss=0.0125 val_loss=0.0095 val_bestF1=0.9994 val_thr=0.9728 val_roc=1.0000


                                                 

ep046 tr_loss=0.0097 val_loss=0.0116 val_bestF1=0.9995 val_thr=0.4969 val_roc=1.0000


                                                 

ep047 tr_loss=2.5676 val_loss=0.0064 val_bestF1=0.9994 val_thr=0.8376 val_roc=1.0000


                                                  

ep048 tr_loss=0.0109 val_loss=0.0090 val_bestF1=0.9994 val_thr=0.1497 val_roc=1.0000


                                                  

ep049 tr_loss=0.9176 val_loss=0.0097 val_bestF1=0.9994 val_thr=0.8714 val_roc=1.0000


                                                  

ep050 tr_loss=0.0882 val_loss=0.0074 val_bestF1=0.9994 val_thr=0.5199 val_roc=1.0000


                                                  

ep051 tr_loss=0.0256 val_loss=0.0188 val_bestF1=0.9995 val_thr=0.3745 val_roc=1.0000


                                                 

ep052 tr_loss=0.0744 val_loss=0.0072 val_bestF1=0.9994 val_thr=0.5923 val_roc=1.0000


                                                  

ep053 tr_loss=0.0340 val_loss=0.0135 val_bestF1=0.9996 val_thr=0.1859 val_roc=0.9999


                                                  

ep054 tr_loss=0.0454 val_loss=0.0054 val_bestF1=0.9996 val_thr=0.6267 val_roc=1.0000


                                                  

ep055 tr_loss=0.0196 val_loss=0.0067 val_bestF1=0.9993 val_thr=0.6541 val_roc=1.0000


                                                  

ep056 tr_loss=0.0269 val_loss=0.0068 val_bestF1=0.9994 val_thr=0.7139 val_roc=1.0000


                                                 

ep057 tr_loss=0.0294 val_loss=0.0040 val_bestF1=0.9995 val_thr=0.7829 val_roc=1.0000


                                                  

ep058 tr_loss=0.0415 val_loss=0.0041 val_bestF1=0.9996 val_thr=0.1952 val_roc=1.0000


                                                  

ep059 tr_loss=0.1519 val_loss=0.0064 val_bestF1=0.9994 val_thr=0.6740 val_roc=1.0000


                                                  

ep060 tr_loss=0.3611 val_loss=0.0038 val_bestF1=0.9996 val_thr=0.7085 val_roc=1.0000


                                                  

ep061 tr_loss=2.5752 val_loss=0.0067 val_bestF1=0.9994 val_thr=0.8635 val_roc=1.0000


                                                  

ep062 tr_loss=0.0532 val_loss=0.0042 val_bestF1=0.9996 val_thr=0.7125 val_roc=1.0000


                                                  

ep063 tr_loss=0.0591 val_loss=0.0070 val_bestF1=0.9994 val_thr=0.6633 val_roc=1.0000


                                                  

ep064 tr_loss=0.5140 val_loss=0.0099 val_bestF1=0.9993 val_thr=0.9036 val_roc=1.0000


  6%|â–‹         | 64/988 [00:00<00:08, 111.53it/s]

In [37]:
import torch
from pathlib import Path

SRC=Path("/home/cis6022/Adam/varMax/results/I2/final_100ep/models/best.pt")
DST=Path("cicids_cnn_best_ckpt.pt")  # write a new portable ckpt next to your notebook

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
ckpt=torch.load(SRC,map_location="cpu")  # save in cpu format for portability

# pull from your notebook globals (these should exist from your dataloader cell)
if "feature_names" in globals(): ckpt["feature_names"]=list(feature_names)
if "scaler" in globals():
    ckpt["scaler_mean"]=scaler.mean_.astype("float32")
    ckpt["scaler_scale"]=scaler.scale_.astype("float32")
if "scaler_mean" in globals(): ckpt["scaler_mean"]=np.asarray(scaler_mean,dtype="float32")
if "scaler_scale" in globals(): ckpt["scaler_scale"]=np.asarray(scaler_scale,dtype="float32")

torch.save(ckpt,DST)
print("re-saved:",DST,"| keys:",list(ckpt.keys()))

re-saved: cicids_cnn_best_ckpt.pt | keys: ['model_state', 'cfg', 'epoch', 'val_best_thr', 'feature_names', 'scaler_mean', 'scaler_scale']


In [43]:
# --- Cell: build an "injectable" malicious packet pool (10k rows) in LIVE format ---

import numpy as np, pandas as pd, torch
from pathlib import Path

CKPT_PATH="cicids_cnn_best_ckpt.pt"
SOURCE_CSV="# --- Cell: build an "injectable" malicious packet pool (10k rows) in LIVE format ---

import numpy as np, pandas as pd, torch
from pathlib import Path

CKPT_PATH="cicids_cnn_best_ckpt.pt"
SOURCE_CSV="CICIDS-2017_preprocessed.csv"          # <-- your "other dataset" (must contain malicious rows)
OUT_CSV="CICIDS_inject_malicious_pool_10k.csv"
N=10_000
SEED=1337

def _torch_load(path,device):
    try: return torch.load(path,map_location=device,weights_only=False)
    except TypeError: return torch.load(path,map_location=device)

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
ckpt=_torch_load(CKPT_PATH,device)
feature_names=ckpt.get("feature_names",None)
if feature_names is None: raise SystemExit(f"ckpt missing feature_names. Keys: {list(ckpt.keys())}")

# expect LIVE columns: payload_byte_1..1500 + ttl,total_len,proto_bin,t_delta
PAYLOAD_COLS=[f"payload_byte_{i+1}" for i in range(1500)]
ALL_COLS=PAYLOAD_COLS+["ttl","total_len","proto_bin","t_delta"]

src_path=(Path(SOURCE_CSV) if Path(SOURCE_CSV).exists()
          else (DATA_DIR/SOURCE_CSV if "DATA_DIR" in globals() and (DATA_DIR/SOURCE_CSV).exists() else Path(ROOT)/SOURCE_CSV))
if not src_path.exists(): raise SystemExit(f"missing SOURCE_CSV: {src_path}")

# load only what we need (fast) + label column if present
hdr=pd.read_csv(src_path,nrows=0)
cols=set(hdr.columns)
label_col="Label" if "Label" in cols else ("label" if "label" in cols else None)
usecols=[c for c in (set(ALL_COLS)|set(feature_names)) if c in cols] + ([label_col] if label_col else [])
df=pd.read_csv(src_path,usecols=usecols)

if label_col and label_col!="Label": df=df.rename(columns={label_col:"Label"}); label_col="Label"
if not label_col: raise SystemExit(f"{src_path.name} has no Label/label column; cannot filter malicious rows.")

df=df.replace([np.inf,-np.inf],np.nan).dropna()
mal=df[df["Label"].astype(str).str.strip().str.upper()!="BENIGN"].copy()
if len(mal)==0: raise SystemExit("No malicious rows found (Label != BENIGN).")

rng=np.random.default_rng(SEED)
idx=rng.choice(len(mal),size=N,replace=(len(mal)<N))
mal=mal.iloc[idx].copy()

# enforce EXACT LIVE schema + ordering (matches live_detection.py DataFrame)
d=mal.drop(columns=[c for c in ("Flow ID","Source IP","Source Port","Destination IP","Destination Port","Timestamp","Label","label") if c in mal.columns],errors="ignore")
for c in ALL_COLS:
    if c not in d.columns: d[c]=0.0
d=d[ALL_COLS].replace([np.inf,-np.inf],np.nan).fillna(0.0)

out_path=Path(OUT_CSV)
d.to_csv(out_path,index=False)
print("saved:",out_path.resolve(),"rows:",len(d),"cols:",len(d.columns))
print("head cols:",d.columns[:8].tolist()," ... tail cols:",d.columns[-4:].tolist())         # <-- your "other dataset" (must contain malicious rows)
OUT_CSV="CICIDS_inject_malicious_pool_10k.csv"
N=10_000
SEED=1337

def _torch_load(path,device):
    try: return torch.load(path,map_location=device,weights_only=False)
    except TypeError: return torch.load(path,map_location=device)

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
ckpt=_torch_load(CKPT_PATH,device)
feature_names=ckpt.get("feature_names",None)
if feature_names is None: raise SystemExit(f"ckpt missing feature_names. Keys: {list(ckpt.keys())}")

# expect LIVE columns: payload_byte_1..1500 + ttl,total_len,proto_bin,t_delta
PAYLOAD_COLS=[f"payload_byte_{i+1}" for i in range(1500)]
ALL_COLS=PAYLOAD_COLS+["ttl","total_len","proto_bin","t_delta"]

src_path=(Path(SOURCE_CSV) if Path(SOURCE_CSV).exists()
          else (DATA_DIR/SOURCE_CSV if "DATA_DIR" in globals() and (DATA_DIR/SOURCE_CSV).exists() else Path(ROOT)/SOURCE_CSV))
if not src_path.exists(): raise SystemExit(f"missing SOURCE_CSV: {src_path}")

# load only what we need (fast) + label column if present
hdr=pd.read_csv(src_path,nrows=0)
cols=set(hdr.columns)
label_col="Label" if "Label" in cols else ("label" if "label" in cols else None)
usecols=[c for c in (set(ALL_COLS)|set(feature_names)) if c in cols] + ([label_col] if label_col else [])
df=pd.read_csv(src_path,usecols=usecols)

if label_col and label_col!="Label": df=df.rename(columns={label_col:"Label"}); label_col="Label"
if not label_col: raise SystemExit(f"{src_path.name} has no Label/label column; cannot filter malicious rows.")

df=df.replace([np.inf,-np.inf],np.nan).dropna()
mal=df[df["Label"].astype(str).str.strip().str.upper()!="BENIGN"].copy()
if len(mal)==0: raise SystemExit("No malicious rows found (Label != BENIGN).")

rng=np.random.default_rng(SEED)
idx=rng.choice(len(mal),size=N,replace=(len(mal)<N))
mal=mal.iloc[idx].copy()

# enforce EXACT LIVE schema + ordering (matches live_detection.py DataFrame)
d=mal.drop(columns=[c for c in ("Flow ID","Source IP","Source Port","Destination IP","Destination Port","Timestamp","Label","label") if c in mal.columns],errors="ignore")
for c in ALL_COLS:
    if c not in d.columns: d[c]=0.0
d=d[ALL_COLS].replace([np.inf,-np.inf],np.nan).fillna(0.0)

out_path=Path(OUT_CSV)
d.to_csv(out_path,index=False)
print("saved:",out_path.resolve(),"rows:",len(d),"cols:",len(d.columns))
print("head cols:",d.columns[:8].tolist()," ... tail cols:",d.columns[-4:].tolist())

saved: /home/cis6022/Adam/varMax/CICIDS_inject_malicious_pool_10k.csv rows: 10000 cols: 1504
head cols: ['payload_byte_1', 'payload_byte_2', 'payload_byte_3', 'payload_byte_4', 'payload_byte_5', 'payload_byte_6', 'payload_byte_7', 'payload_byte_8']  ... tail cols: ['ttl', 'total_len', 'proto_bin', 't_delta']


In [50]:
print("len(ALL_COLS):", len(ALL_COLS))
print("len(feature_names):", None if feature_names is None else len(feature_names))
print("feature_names == ALL_COLS ?", feature_names == ALL_COLS)

if feature_names is not None:
    missing = [c for c in feature_names if c not in ALL_COLS]
    extra   = [c for c in ALL_COLS if c not in feature_names]
    print("model expects (not in ALL_COLS):", missing[:20], "count:", len(missing))
    print("live has (not in model):", extra[:20], "count:", len(extra))


len(ALL_COLS): 1504
len(feature_names): 1504
feature_names == ALL_COLS ? False
model expects (not in ALL_COLS): ['protocol'] count: 1
live has (not in model): ['proto_bin'] count: 1
