In [7]:
import os, sys, csv, random
from pathlib import Path

import numpy as np, torch, torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import TensorDataset, DataLoader
from tqdm.auto import tqdm

ROOT = os.getcwd()
PKG_DIR = "/home/cis6022/StarLiteGAN/packages"
if PKG_DIR not in sys.path: sys.path.insert(0, PKG_DIR)
DATA_DIR = Path(ROOT) / "data"; DATA_DIR.mkdir(parents=True, exist_ok=True)

seed = 1337
random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu"); device_type = "cuda" if use_cuda else "cpu"
if use_cuda: torch.cuda.manual_seed_all(seed); torch.backends.cudnn.benchmark = True

print("ROOT:", ROOT); print("PKG_DIR:", PKG_DIR); print("DATA_DIR:", str(DATA_DIR)); print("device:", device)

ROOT: /home/cis6022/Adam/varMax
PKG_DIR: /home/cis6022/StarLiteGAN/packages
DATA_DIR: /home/cis6022/Adam/varMax/data
device: cuda


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
CSV_NAME = "CICIDS-2017_preprocessed.csv"
csv_path = (DATA_DIR / CSV_NAME) if (DATA_DIR / CSV_NAME).exists() else (Path(ROOT) / CSV_NAME)
print("Loading:", csv_path)

df = pd.read_csv(csv_path)
if "label" in df.columns and "Label" not in df.columns: df = df.rename(columns={"label": "Label"})
label_col = "Label"

df = df.replace([np.inf, -np.inf], np.nan).dropna()
for c in ("Flow ID","Source IP","Source Port","Destination IP","Destination Port","Timestamp"):
    if c in df.columns: df = df.drop(columns=c)

y_raw = df[label_col].astype(str).to_numpy()
X_df  = df.drop(columns=[label_col])

# keep only numeric features (preprocessed CICIDS should already be numeric)
X_df = X_df.select_dtypes(include=[np.number])
feature_names = list(X_df.columns)

le = LabelEncoder()
y = le.fit_transform(y_raw).astype(np.int64)

X = X_df.to_numpy(dtype=np.float32)
X_tr, X_tmp, y_tr, y_tmp = train_test_split(X, y, test_size=0.30, random_state=seed, stratify=y)
X_va, X_te,  y_va, y_te  = train_test_split(X_tmp, y_tmp, test_size=(1/3), random_state=seed, stratify=y_tmp)  # 20% / 10%

scaler = StandardScaler()
X_tr = scaler.fit_transform(X_tr).astype(np.float32)
X_va = scaler.transform(X_va).astype(np.float32)
X_te = scaler.transform(X_te).astype(np.float32)

# Conv1d-friendly shape: (B, C=1, F)
X_tr = torch.from_numpy(X_tr).unsqueeze(1); y_tr = torch.from_numpy(y_tr)
X_va = torch.from_numpy(X_va).unsqueeze(1); y_va = torch.from_numpy(y_va)
X_te = torch.from_numpy(X_te).unsqueeze(1); y_te = torch.from_numpy(y_te)

train_ds = TensorDataset(X_tr, y_tr)
val_ds   = TensorDataset(X_va, y_va)
test_ds  = TensorDataset(X_te, y_te)

BATCH_SIZE = 1024
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=8, pin_memory=use_cuda, persistent_workers=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=8, pin_memory=use_cuda, persistent_workers=True)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=8, pin_memory=use_cuda, persistent_workers=True)

print("X shape (conv):", tuple(X_tr.shape), "num_features:", X_tr.shape[-1])
print("classes:", len(le.classes_), "| splits:", len(train_ds), len(val_ds), len(test_ds))


Loading: /home/cis6022/Adam/varMax/CICIDS-2017_preprocessed.csv
X shape (conv): (987178, 1, 1504) num_features: 1504
classes: 15 | splits: 987178 282051 141026


In [10]:
CSV_NAME="CICIDS-2017_preprocessed.csv"
csv_path=(DATA_DIR/CSV_NAME) if (DATA_DIR/CSV_NAME).exists() else (Path(ROOT)/CSV_NAME)
print("Loading:",csv_path)

df=pd.read_csv(csv_path)
if "label" in df.columns and "Label" not in df.columns: df=df.rename(columns={"label":"Label"})
label_col="Label"

df=df.replace([np.inf,-np.inf],np.nan).dropna()
for c in ("Flow ID","Source IP","Source Port","Destination IP","Destination Port","Timestamp"):
    if c in df.columns: df=df.drop(columns=c)

labels=df[label_col].astype(str).str.strip()
y=(labels.str.upper()!="BENIGN").astype(np.int64).to_numpy()  # 0=benign, 1=malicious

X_df=df.drop(columns=[label_col]).select_dtypes(include=[np.number])
feature_names=list(X_df.columns)
X=X_df.to_numpy(dtype=np.float32)

max_per_class=base_cfg.get("max_per_class",None) if "base_cfg" in globals() else None
if max_per_class is not None:
    max_per_class=int(max_per_class)
    idx0=np.where(y==0)[0]; idx1=np.where(y==1)[0]
    rng=np.random.default_rng(seed)
    if len(idx0)>max_per_class: idx0=rng.choice(idx0,size=max_per_class,replace=False)
    if len(idx1)>max_per_class: idx1=rng.choice(idx1,size=max_per_class,replace=False)
    idx=np.concatenate([idx0,idx1]); rng.shuffle(idx)
    X,y=X[idx],y[idx]

X_tr,X_tmp,y_tr,y_tmp=train_test_split(X,y,test_size=0.30,random_state=seed,stratify=y)
X_va,X_te ,y_va,y_te =train_test_split(X_tmp,y_tmp,test_size=(1/3),random_state=seed,stratify=y_tmp)  # 20% / 10%

scaler=StandardScaler()
X_tr=scaler.fit_transform(X_tr).astype(np.float32)
X_va=scaler.transform(X_va).astype(np.float32)
X_te=scaler.transform(X_te).astype(np.float32)

# Conv1d: (B,1,F). y float for BCEWithLogitsLoss.
X_tr=torch.from_numpy(X_tr).unsqueeze(1); y_tr=torch.from_numpy(y_tr).float()
X_va=torch.from_numpy(X_va).unsqueeze(1); y_va=torch.from_numpy(y_va).float()
X_te=torch.from_numpy(X_te).unsqueeze(1); y_te=torch.from_numpy(y_te).float()

train_ds=TensorDataset(X_tr,y_tr); val_ds=TensorDataset(X_va,y_va); test_ds=TensorDataset(X_te,y_te)

BATCH_SIZE=int(base_cfg.get("batch_size",1000)) if "base_cfg" in globals() else 1000
NUM_WORKERS=int(base_cfg.get("num_workers",8)) if "base_cfg" in globals() else 8
mp_ctx=torch.multiprocessing.get_context("spawn")

train_dl=DataLoader(train_ds,batch_size=BATCH_SIZE,shuffle=True ,num_workers=NUM_WORKERS,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
val_dl  =DataLoader(val_ds  ,batch_size=BATCH_SIZE,shuffle=False,num_workers=NUM_WORKERS,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
test_dl =DataLoader(test_ds ,batch_size=BATCH_SIZE,shuffle=False,num_workers=NUM_WORKERS,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)

pos_weight_value=float((y_tr.numel()-y_tr.sum()).clamp_min(1)/y_tr.sum().clamp_min(1))  # n_neg/n_pos
num_features=int(X_tr.shape[-1])

print("X shape:",tuple(X_tr.shape),"num_features:",num_features)
print("splits:",len(train_ds),len(val_ds),len(test_ds))
print("benign/mal:",int((y==0).sum()),int((y==1).sum()))
print("pos rate train/val/test:",float(y_tr.mean()),float(y_va.mean()),float(y_te.mean()))
print("pos_weight_value (n_neg/n_pos):",pos_weight_value)

Loading: /home/cis6022/Adam/varMax/CICIDS-2017_preprocessed.csv
X shape: (987178, 1, 1504) num_features: 1504
splits: 987178 282051 141026
benign/mal: 362108 1048147
pos rate train/val/test: 0.7432327270507812 0.7432308197021484 0.7432317733764648
pos_weight_value (n_neg/n_pos): 0.34547358751296997


In [13]:
import os, csv, json, math, time
from pathlib import Path
import numpy as np, pandas as pd
import torch, torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import (confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve, auc, f1_score, accuracy_score, balanced_accuracy_score)

# --------------------------
# dirs / config I/O
# --------------------------
def prepare_run_dirs(results_root):
    root=Path(results_root); root.mkdir(parents=True,exist_ok=True)
    ids=[]
    for p in root.iterdir():
        if p.is_dir() and p.name.startswith("I"):
            try: ids.append(int(p.name.split("_")[0][1:]))
            except: pass
    k=1
    while k in ids: k+=1
    run_dir=root/f"I{k}"
    (run_dir/"models").mkdir(parents=True,exist_ok=True); (run_dir/"final_metrics").mkdir(parents=True,exist_ok=True)
    return run_dir

def make_grid_dir_name(run_dir,grid_idx,param_grid):
    run_dir=Path(run_dir); kv="__".join(f"{k}="+"|".join(str(v) for v in vs) for k,vs in param_grid.items())
    grid_dir=run_dir/f"g{grid_idx}__{kv}"
    grid_dir.mkdir(parents=True,exist_ok=True)
    return grid_dir

def write_config_csv(exp_dir,cfg,epochs,extras=None):
    exp_dir=Path(exp_dir); extras={} if extras is None else dict(extras)
    row=dict(cfg); row.update({"epochs":int(epochs)}); row.update(extras)
    path=exp_dir/"config.csv"
    with path.open("w",newline="") as f:
        w=csv.DictWriter(f,fieldnames=list(row.keys())); w.writeheader(); w.writerow(row)

def _save_csv(path,rows):
    path=Path(path); path.parent.mkdir(parents=True,exist_ok=True)
    if not rows: return
    cols=list(rows[0].keys())
    with path.open("w",newline="") as f:
        w=csv.DictWriter(f,fieldnames=cols); w.writeheader(); [w.writerow(r) for r in rows]

def _df_print(rows,sort_by=None,head=None,title=None,round_=4):
    if not rows: print("(no rows)"); return
    df=pd.DataFrame(rows)
    if sort_by is not None and sort_by in df.columns: df=df.sort_values(sort_by,ascending=False)
    if head is not None: df=df.head(int(head))
    if round_ is not None:
        for c in df.columns:
            if pd.api.types.is_float_dtype(df[c]): df[c]=df[c].round(round_)
    if title: print(f"\n{title}")
    print(df.to_string(index=False))

# --------------------------
# data cache + loader rebuild (supports grid over batch_size/max_per_class)
# --------------------------
_DATA_CACHE=None

def init_data_cache_from_existing(train_ds=None,val_ds=None,test_ds=None):
    global _DATA_CACHE
    if _DATA_CACHE is not None: return _DATA_CACHE
    if train_ds is None: train_ds=globals().get("train_ds",None)
    if val_ds   is None: val_ds  =globals().get("val_ds"  ,None)
    if test_ds  is None: test_ds =globals().get("test_ds" ,None)
    if train_ds is None or val_ds is None or test_ds is None: return None
    X=torch.cat([train_ds.tensors[0],val_ds.tensors[0],test_ds.tensors[0]],0).cpu().numpy().astype(np.float32)  # (N,1,F)
    y=torch.cat([train_ds.tensors[1],val_ds.tensors[1],test_ds.tensors[1]],0).cpu().numpy()
    y=(y>0.5).astype(np.int64) if y.dtype!=np.int64 else y.astype(np.int64)
    _DATA_CACHE={"X":X,"y":y}; return _DATA_CACHE

def make_loaders(cfg,seed=1337,use_cuda=None):
    cache=init_data_cache_from_existing()
    if cache is None:
        td,vd,sd=globals().get("train_ds"),globals().get("val_ds"),globals().get("test_ds")
        bs=int(cfg.get("batch_size",1000)); nw=int(cfg.get("num_workers",8)); use_cuda=torch.cuda.is_available() if use_cuda is None else bool(use_cuda)
        mp_ctx=torch.multiprocessing.get_context("spawn")
        return (DataLoader(td,batch_size=bs,shuffle=True ,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx),
                DataLoader(vd,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx),
                DataLoader(sd,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx),
                int(td.tensors[0].shape[-1]),
                float((td.tensors[1].numel()-td.tensors[1].sum()).clamp_min(1)/td.tensors[1].sum().clamp_min(1)))
    X,y=cache["X"],cache["y"]  # X already standardized in your current pipeline
    mpc=cfg.get("max_per_class",None)
    rng=np.random.default_rng(int(cfg.get("seed",seed)))
    if mpc is not None:
        idx0=np.where(y==0)[0]; idx1=np.where(y==1)[0]; mpc=int(mpc)
        if len(idx0)>mpc: idx0=rng.choice(idx0,size=mpc,replace=False)
        if len(idx1)>mpc: idx1=rng.choice(idx1,size=mpc,replace=False)
        idx=np.concatenate([idx0,idx1]); rng.shuffle(idx); Xs,ys=X[idx],y[idx]
    else:
        idx=rng.permutation(len(y)); Xs,ys=X[idx],y[idx]
    n=len(ys); ntr=int(0.70*n); nva=int(0.20*n); X_tr, y_tr = Xs[:ntr], ys[:ntr]; X_va, y_va = Xs[ntr:ntr+nva], ys[ntr:ntr+nva]; X_te, y_te = Xs[ntr+nva:], ys[ntr+nva:]
    # NOTE: X is already standardized from your original split; if you want leak-free rescaling per cfg, cache raw X before scaling in the data cell.
    X_tr=torch.from_numpy(X_tr); X_va=torch.from_numpy(X_va); X_te=torch.from_numpy(X_te)
    y_tr=torch.from_numpy(y_tr); y_va=torch.from_numpy(y_va); y_te=torch.from_numpy(y_te)
    train_ds=TensorDataset(X_tr,y_tr.float()); val_ds=TensorDataset(X_va,y_va.float()); test_ds=TensorDataset(X_te,y_te.float())
    bs=int(cfg.get("batch_size",1000)); nw=int(cfg.get("num_workers",8)); use_cuda=torch.cuda.is_available() if use_cuda is None else bool(use_cuda)
    mp_ctx=torch.multiprocessing.get_context("spawn")
    train_dl=DataLoader(train_ds,batch_size=bs,shuffle=True ,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
    val_dl  =DataLoader(val_ds  ,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
    test_dl =DataLoader(test_ds ,batch_size=bs,shuffle=False,num_workers=nw,pin_memory=use_cuda,persistent_workers=True,multiprocessing_context=mp_ctx)
    pos=float((y_tr.numel()-y_tr.sum()).clamp_min(1)/y_tr.sum().clamp_min(1))
    return train_dl,val_dl,test_dl,int(X_tr.shape[-1]),pos

# --------------------------
# model
# --------------------------
class ConvBlock1d(nn.Module):
    def __init__(self,in_ch,out_ch,k,pool,drop,neg_slope=0.01,bn=False):
        super().__init__(); p=k//2
        layers=[nn.Conv1d(in_ch,out_ch,k,padding=p,bias=not bn)]
        if bn: layers.append(nn.BatchNorm1d(out_ch))
        layers.append(nn.LeakyReLU(neg_slope,inplace=True))
        if pool and pool>1: layers.append(nn.MaxPool1d(pool,pool))
        if drop and drop>0: layers.append(nn.Dropout(drop))
        self.net=nn.Sequential(*layers)
    def forward(self,x): return self.net(x)

class FC_CNN(nn.Module):
    def __init__(self,num_features,cfg):
        super().__init__(); c=dict(cfg)
        ch=tuple(c.get("conv_channels",(32,64,128))); ks=tuple(c.get("kernel_sizes",(5,5,3))); pools=tuple(c.get("pools",(2,2,2)))
        cd=c.get("conv_dropout",(0.05,0.05,0.10)); cd=(cd,cd,cd) if isinstance(cd,(int,float)) else tuple(cd)
        fd=c.get("fc_dropout",(0.5,0.5)); fd=(fd,fd) if isinstance(fd,(int,float)) else tuple(fd)
        ns=float(c.get("negative_slope",0.01)); bn=bool(c.get("use_bn",False))
        blocks=[]; in_ch=1
        for out_ch,k,pool,drop in zip(ch,ks,pools,cd): blocks.append(ConvBlock1d(in_ch,out_ch,int(k),int(pool),float(drop),ns,bn)); in_ch=out_ch
        self.features=nn.Sequential(*blocks)
        with torch.no_grad(): flat=self.features(torch.zeros(1,1,int(num_features))).flatten(1).shape[1]
        h=tuple(c.get("fc_hidden",(256,64)))
        self.fc1=nn.Linear(flat,int(h[0])); self.fc2=nn.Linear(int(h[0]),int(h[1]))
        self.act=nn.LeakyReLU(ns,inplace=True); self.do1=nn.Dropout(float(fd[0])); self.do2=nn.Dropout(float(fd[1]))
        loss=str(c.get("loss","ce")).lower()
        self.out=nn.Linear(int(h[1]), 2 if loss=="ce" else 1)
    def forward(self,x):
        x=self.features(x).flatten(1); x=self.do1(self.act(self.fc1(x))); x=self.do2(self.act(self.fc2(x))); x=self.out(x)
        return x

def build_model(cfg,num_features,device): return FC_CNN(num_features, cfg).to(device)

# --------------------------
# scoring + metrics (val-calibrated best-F1; no threshold hyperparam)
# --------------------------
@torch.no_grad()
def predict_scores(model,dl,crit,device,loss_type):
    model.eval(); tot=0.0; n=0; ys=[]; ss=[]
    for xb,yb in dl:
        xb=xb.to(device,non_blocking=True); yb=yb.to(device,non_blocking=True)
        logits=model(xb)
        if loss_type=="ce":
            y=yb.long(); loss=crit(logits,y)
            score=torch.softmax(logits,1)[:,1]
        else:
            y=yb.float(); logits=logits.squeeze(-1); loss=crit(logits,y)
            score=torch.sigmoid(logits)
        bs=yb.size(0); tot+=loss.item()*bs; n+=bs
        ys.append(y.detach().cpu().numpy()); ss.append(score.detach().cpu().numpy())
    y_true=np.concatenate(ys).astype(np.int64); scores=np.concatenate(ss).astype(np.float64)
    return tot/max(n,1), y_true, scores

def best_f1_threshold(y_true,scores):
    p,r,t=precision_recall_curve(y_true,scores)  # p,r len=n+1, t len=n
    f1=(2*p*r/(p+r+1e-12))[:-1]
    if f1.size==0: return 0.0, 0.5
    k=int(np.nanargmax(f1)); thr=float(t[k]) if k < len(t) else 0.5
    return float(f1[k]), thr

def metrics_at_threshold(y_true,scores,thr):
    y_pred=(scores>=thr).astype(np.int64)
    tn,fp,fn,tp=confusion_matrix(y_true,y_pred,labels=[0,1]).ravel()
    acc=float(accuracy_score(y_true,y_pred)); bacc=float(balanced_accuracy_score(y_true,y_pred))
    f1_b=float(f1_score(y_true,y_pred,pos_label=0)); f1_m=float(f1_score(y_true,y_pred,pos_label=1)); macro_f1=0.5*(f1_b+f1_m)
    try: roc=float(roc_auc_score(y_true,scores))
    except ValueError: roc=float("nan")
    pr_p,pr_r,_=precision_recall_curve(y_true,scores); pr=float(auc(pr_r,pr_p))
    return {"acc":acc,"bal_acc":bacc,"macro_f1":macro_f1,"f1_benign":f1_b,"f1_malicious":f1_m,"roc_auc":roc,"pr_auc":pr,"tn":int(tn),"fp":int(fp),"fn":int(fn),"tp":int(tp)}

# --------------------------
# plots
# --------------------------
def _savefig(path): Path(path).parent.mkdir(parents=True,exist_ok=True); plt.tight_layout(); plt.savefig(path); plt.close()

def plot_learning(history,final_dir):
    final_dir=Path(final_dir); ep=[r["epoch"] for r in history]
    plt.figure(); plt.plot(ep,[r["train_loss"] for r in history],label="train_loss"); plt.plot(ep,[r["val_loss"] for r in history],label="val_loss"); plt.legend(); _savefig(final_dir/"learning_curve_loss.png")
    plt.figure(); plt.plot(ep,[r["val_roc_auc"] for r in history],label="val_roc_auc"); plt.plot(ep,[r["val_pr_auc"] for r in history],label="val_pr_auc"); plt.legend(); _savefig(final_dir/"learning_curve_auc.png")
    plt.figure(); plt.plot(ep,[r["val_best_f1"] for r in history],label="val_best_macro_f1"); plt.legend(); _savefig(final_dir/"learning_curve_f1.png")

def plot_confusion(metrics,final_dir,name="confusion_matrix"):
    cm=np.array([[metrics["tn"],metrics["fp"]],[metrics["fn"],metrics["tp"]]],dtype=np.int64)
    plt.figure(); plt.imshow(cm,interpolation="nearest"); plt.colorbar()
    ticks=["BENIGN","MAL"]; plt.xticks([0,1],ticks); plt.yticks([0,1],ticks)
    m=cm.max(); 
    for i in range(2):
        for j in range(2): plt.text(j,i,str(cm[i,j]),ha="center",va="center",color="white" if cm[i,j]>(m/2) else "black")
    plt.xlabel("Pred"); plt.ylabel("True"); _savefig(Path(final_dir)/f"{name}.png")

def plot_roc_pr(y_true,scores,final_dir):
    fpr,tpr,_=roc_curve(y_true,scores); plt.figure(); plt.plot(fpr,tpr); plt.plot([0,1],[0,1],"--"); plt.xlabel("FPR"); plt.ylabel("TPR"); _savefig(Path(final_dir)/"roc_curve.png")
    p,r,_=precision_recall_curve(y_true,scores); plt.figure(); plt.plot(r,p); plt.xlabel("Recall"); plt.ylabel("Precision"); _savefig(Path(final_dir)/"pr_curve.png")

def plot_score_hist(y_true,scores,thr,final_dir):
    s0=scores[y_true==0]; s1=scores[y_true==1]
    plt.figure(); plt.hist(s0,bins=80,alpha=0.6,label="benign"); plt.hist(s1,bins=80,alpha=0.6,label="mal"); plt.axvline(thr,linestyle="--"); plt.legend(); plt.xlabel("score"); plt.ylabel("count"); _savefig(Path(final_dir)/"score_hist.png")

# --------------------------
# train + experiment
# --------------------------
def make_opt(cfg,params):
    name=str(cfg.get("optimizer","Adam"))
    lr=float(cfg.get("lr",1e-2)); wd=float(cfg.get("weight_decay",0.0))
    if name.lower()=="sgd": return torch.optim.SGD(params,lr=lr,weight_decay=wd,momentum=float(cfg.get("momentum",0.9)))
    if name.lower()=="rmsprop": return torch.optim.RMSprop(params,lr=lr,weight_decay=wd)
    return torch.optim.Adam(params,lr=lr,weight_decay=wd)

def make_sched(cfg,opt):
    gamma=float(cfg.get("sched_gamma",0.0))
    if gamma<=0: return None
    step=int(cfg.get("sched_step_size",10))
    return torch.optim.lr_scheduler.StepLR(opt,step_size=step,gamma=gamma)

def make_crit(cfg,device,pos_weight_value=None):
    loss=str(cfg.get("loss","ce")).lower()
    if loss=="bce":
        pw=cfg.get("pos_weight_value",pos_weight_value)
        return nn.BCEWithLogitsLoss(pos_weight=torch.tensor(float(pw),device=device)) if pw is not None else nn.BCEWithLogitsLoss()
    return nn.CrossEntropyLoss()

def train_one_epoch(model,dl,opt,crit,device,loss_type,scaler=None,amp=False,grad_clip=1.0):
    model.train(); tot=0.0; n=0
    for xb,yb in tqdm(dl,leave=False):
        xb=xb.to(device,non_blocking=True); yb=yb.to(device,non_blocking=True)
        opt.zero_grad(set_to_none=True)
        if amp and scaler is not None:
            with torch.autocast("cuda",enabled=True):
                logits=model(xb)
                loss=crit(logits,yb.long()) if loss_type=="ce" else crit(logits.squeeze(-1),yb.float())
            scaler.scale(loss).backward(); scaler.unscale_(opt)
            if grad_clip: nn.utils.clip_grad_norm_(model.parameters(),float(grad_clip))
            scaler.step(opt); scaler.update()
        else:
            logits=model(xb)
            loss=crit(logits,yb.long()) if loss_type=="ce" else crit(logits.squeeze(-1),yb.float())
            loss.backward()
            if grad_clip: nn.utils.clip_grad_norm_(model.parameters(),float(grad_clip))
            opt.step()
        bs=yb.size(0); tot+=loss.item()*bs; n+=bs
    return tot/max(n,1)

def run_experiment(cfg,epochs,exp_dir,device=None,seed=1337,use_cuda=None,eval_every=1,score_key="val_best_f1"):
    device=torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
    use_cuda=torch.cuda.is_available() if use_cuda is None else bool(use_cuda)
    exp_dir=Path(exp_dir); model_dir=exp_dir/"models"; final_dir=exp_dir/"final_metrics"
    model_dir.mkdir(parents=True,exist_ok=True); final_dir.mkdir(parents=True,exist_ok=True)

    train_dl,val_dl,test_dl,num_features,pos_weight=make_loaders(cfg,seed=seed,use_cuda=use_cuda)
    loss_type=str(cfg.get("loss","ce")).lower()
    model=build_model(cfg,num_features,device)
    crit=make_crit(cfg,device,pos_weight_value=pos_weight)
    opt=make_opt(cfg,model.parameters()); sched=make_sched(cfg,opt)
    amp=bool(cfg.get("amp",False)) and use_cuda; scaler=torch.cuda.amp.GradScaler(enabled=amp)
    grad_clip=float(cfg.get("grad_clip",1.0)); patience=int(cfg.get("patience",0)) if cfg.get("patience",0) else 0

    extras={"num_features":int(num_features),"pos_weight_value":float(cfg.get("pos_weight_value",pos_weight)),"train_n":len(train_dl.dataset),"val_n":len(val_dl.dataset),"test_n":len(test_dl.dataset)}
    write_config_csv(exp_dir,cfg,epochs,extras=extras)

    history=[]; best=-1.0; best_ep=0; best_thr=0.5; bad=0

    for ep in range(1,int(epochs)+1):
        tr_loss=train_one_epoch(model,train_dl,opt,crit,device,loss_type,scaler=scaler,amp=amp,grad_clip=grad_clip)
        if sched is not None: sched.step()
        if (ep % int(eval_every))!=0 and ep!=epochs:
            row={"epoch":ep,"train_loss":tr_loss}; history.append(row)
            torch.save({"model_state":model.state_dict(),"cfg":dict(cfg),"epoch":ep}, model_dir/"latest.pt")
            continue

        va_loss,yv,sv=predict_scores(model,val_dl,crit,device,loss_type)
        va_best_f1,va_thr=best_f1_threshold(yv,sv)
        va_m=metrics_at_threshold(yv,sv,va_thr)

        row={"epoch":ep,"train_loss":tr_loss,"val_loss":va_loss,"val_best_f1":va_best_f1,"val_best_thr":va_thr,**{f"val_{k}":v for k,v in va_m.items()}}
        history.append(row)

        torch.save({"model_state":model.state_dict(),"cfg":dict(cfg),"epoch":ep,"val_best_thr":va_thr}, model_dir/"latest.pt")
        score=float(row[score_key]) if score_key in row else float(va_best_f1)
        if score>best:
            best=score; best_ep=ep; best_thr=va_thr; bad=0
            torch.save({"model_state":model.state_dict(),"cfg":dict(cfg),"epoch":ep,"val_best_thr":va_thr}, model_dir/"best.pt")
        else:
            bad+=1
            if patience and bad>=patience: break

        print(f"ep{ep:03d} tr_loss={tr_loss:.4f} val_loss={va_loss:.4f} val_bestF1={va_best_f1:.4f} val_thr={va_thr:.4f} val_roc={va_m['roc_auc']:.4f}")

    _save_csv(exp_dir/"epoch_metrics.csv",history)

    ck=torch.load(model_dir/"best.pt",map_location=device); model.load_state_dict(ck["model_state"]); best_thr=float(ck.get("val_best_thr",best_thr))
    te_loss,yt,st=predict_scores(model,test_dl,crit,device,loss_type)
    te_m=metrics_at_threshold(yt,st,best_thr)
    metrics_test={"best_epoch":int(ck.get("epoch",best_ep)),"val_best_thr":best_thr,"test_loss":float(te_loss),**{f"test_{k}":v for k,v in te_m.items()}}
    _save_csv(final_dir/"final_metrics.csv",[metrics_test])
    np.savez_compressed(final_dir/"test_scores.npz",y_true=yt,scores=st,thr=best_thr)

    plot_learning(history,final_dir); plot_confusion(te_m,final_dir,"confusion_matrix_test"); plot_roc_pr(yt,st,final_dir); plot_score_hist(yt,st,best_thr,final_dir)
    return history, float(best), metrics_test

# --------------------------
# grid search (2D heatmaps) + console tables
# --------------------------
def run_grid_searches(run_dir,base_cfg,grid_specs,epochs_default=100,seed=1337):
    run_dir=Path(run_dir); all_rows=[]; global_best_val=-1.0; global_best_test=-1.0; grid_idx=1
    for spec in grid_specs:
        param_grid=spec["params"]; epochs=int(spec.get("epochs",epochs_default)); names=list(param_grid.keys())
        if len(names)<2: raise ValueError("grid spec needs >=2 params")
        if len(names)!=2: names=names[:2]
        k1,k2=names[0],names[1]; v1=list(param_grid[k1]); v2=list(param_grid[k2])
        grid_dir=make_grid_dir_name(run_dir,grid_idx,{k1:v1,k2:v2})
        vmat=np.full((len(v1),len(v2)),np.nan,dtype=np.float64); rmat=np.full((len(v1),len(v2)),np.nan,dtype=np.float64)
        grid_rows=[]; idx=0
        for i,a in enumerate(v1):
            for j,b in enumerate(v2):
                cfg=dict(base_cfg); cfg[k1]=a; cfg[k2]=b
                exp_name=f"run_{idx}__{k1}_{a}__{k2}_{b}"; exp_dir=grid_dir/exp_name
                print(f"\n=== Grid {grid_idx} combo {idx}: {k1}={a}, {k2}={b} ===")
                _,best_val,mt=run_experiment(cfg,epochs,exp_dir,seed=seed,score_key="val_best_f1")
                vmat[i,j]=best_val; rmat[i,j]=mt.get("test_roc_auc",mt.get("test_roc_auc",np.nan))
                row={"grid_idx":grid_idx,"combo_idx":idx,k1:a,k2:b,"best_val_macro_f1":best_val,"test_macro_f1":mt["test_macro_f1"],"test_roc_auc":mt["test_roc_auc"],"test_pr_auc":mt["test_pr_auc"],"best_epoch":mt["best_epoch"]}
                grid_rows.append(row); all_rows.append(row)
                if best_val>global_best_val: global_best_val=best_val; global_best_test=float(mt["test_macro_f1"])
                _df_print(grid_rows,sort_by="best_val_macro_f1",head=10,title=f"Grid {grid_idx} (top 10 so far)")
                idx+=1

        _save_csv(grid_dir/"grid_results.csv",grid_rows)
        plt.figure(); plt.imshow(vmat,origin="lower",aspect="auto"); plt.colorbar(label="best val macro F1")
        plt.xticks(range(len(v2)),[str(x) for x in v2],rotation=45); plt.yticks(range(len(v1)),[str(x) for x in v1]); plt.xlabel(k2); plt.ylabel(k1); plt.title("Validation best macro F1"); _savefig(grid_dir/"heatmap_val_macro_f1.png")
        plt.figure(); plt.imshow(rmat,origin="lower",aspect="auto"); plt.colorbar(label="test ROC AUC")
        plt.xticks(range(len(v2)),[str(x) for x in v2],rotation=45); plt.yticks(range(len(v1)),[str(x) for x in v1]); plt.xlabel(k2); plt.ylabel(k1); plt.title("Test ROC AUC"); _savefig(grid_dir/"heatmap_test_roc_auc.png")

        _df_print(grid_rows,sort_by="best_val_macro_f1",title=f"Grid {grid_idx} final results")
        grid_idx+=1

    if all_rows:
        _save_csv(run_dir/"all_grid_results.csv",all_rows)
        _df_print(all_rows,sort_by="best_val_macro_f1",head=25,title="ALL grids (top 25)")
        base=run_dir.name.split("_")[0]; new=f"{base}_valF1_{global_best_val:.3f}_testF1_{global_best_test:.3f}"
        new_dir=run_dir.parent/new
        try: os.rename(run_dir,new_dir); print("results saved in:",new_dir)
        except: print("results saved in:",run_dir)
    return all_rows

In [None]:
base_cfg = {
    # data / loader
    "batch_size": 1000,
    "num_workers": 8,
    "max_per_class": None,

    # model (3 conv blocks + 2 FC)
    "conv_channels": (32, 64, 128),
    "kernel_sizes": (5, 5, 3),
    "pools": (2, 2, 2),
    "fc_hidden": (256, 64),
    "conv_dropout": (0.05, 0.05, 0.10),   # fixed unless you later decide to sweep
    "fc_dropout": (0.50, 0.50),           # fixed unless you later decide to sweep
    "negative_slope": 0.01,
    "use_bn": False,

    # optimization
    "loss": "ce",                         # default
    "optimizer": "Adam",
    "lr": 1e-2,
    "weight_decay": 0.0,

    # scheduler (off by default; cfg enables it by setting gamma>0)
    "sched_gamma": 0.0,
    "sched_step_size": 10,

    # training extras (optional; keep 0 to disable)
    "patience": 0,
    "grad_clip": 1.0,
    "amp": False,
}

In [14]:
RESULTS_ROOT=os.path.join(ROOT,"results"); run_dir=prepare_run_dirs(RESULTS_ROOT); print("run_dir:",run_dir)

EPOCHS=100
base_cfg={
    "batch_size":1000,"num_workers":8,"max_per_class":None,
    "conv_channels":(32,64,128),"kernel_sizes":(5,5,3),"pools":(2,2,2),
    "fc_hidden":(256,64),"conv_dropout":(0.05,0.05,0.10),"fc_dropout":(0.50,0.50),
    "negative_slope":0.01,"use_bn":False,
    "loss":"ce","optimizer":"Adam","lr":1e-2,"weight_decay":0.0,
    "sched_gamma":0.0,"sched_step_size":10,
    "patience":0,"grad_clip":1.0,"amp":False,
}

grid_specs=[
    {"params":{"lr":[1e-1,3e-2,1e-2,3e-3,1e-3],"weight_decay":[0,1e-6,1e-5,1e-4,1e-3]},"epochs":25},
    {"params":{"sched_gamma":[0.8,0.9],"sched_step_size":[5,10,20]},"epochs":25},
    {"params":{"batch_size":[256,512,1000,2048],"lr":[1e-1,3e-2,1e-2,3e-3,1e-3]},"epochs":25},
    {"params":{"conv_channels":[(16,32,64),(32,64,128),(64,128,256)],"fc_hidden":[(128,64),(256,64),(512,128)]},"epochs":25},
    {"params":{"kernel_sizes":[(3,3,3),(5,5,3),(7,5,3)],"pools":[(2,2,2),(4,2,2)]},"epochs":25},
    {"params":{"max_per_class":[None,300_000,100_000],"lr":[1e-1,3e-2,1e-2,3e-3,1e-3]},"epochs":25},
    {"params":{"loss":["ce","bce"],"lr":[1e-2,3e-3,1e-3]},"epochs":25},
]

exp_dir=Path(run_dir)/f"final_{EPOCHS}ep"; (exp_dir/"models").mkdir(parents=True,exist_ok=True); (exp_dir/"final_metrics").mkdir(parents=True,exist_ok=True)
write_config_csv(exp_dir,base_cfg,EPOCHS,extras={"num_features":int(num_features)})

history,best_val_f1,metrics_test=run_experiment(base_cfg,EPOCHS,exp_dir,seed=base_cfg.get("seed",1337),score_key="val_best_f1")
print("best_val_macro_f1:",best_val_f1); print("test_metrics:",metrics_test)

# Uncomment to run grids (this can be a lot of runs):
# all_rows=run_grid_searches(run_dir,base_cfg,grid_specs,epochs_default=EPOCHS,seed=base_cfg.get("seed",1337))


run_dir: /home/cis6022/Adam/varMax/results/I1


  amp=bool(cfg.get("amp",False)) and use_cuda; scaler=torch.cuda.amp.GradScaler(enabled=amp)
                                                  

ep001 tr_loss=0.1010 val_loss=0.0115 val_bestF1=0.9980 val_thr=0.5510 val_roc=0.9999


                                                  

ep002 tr_loss=0.0870 val_loss=0.0118 val_bestF1=0.9984 val_thr=0.7170 val_roc=0.9999


                                                  

ep003 tr_loss=0.0718 val_loss=0.0126 val_bestF1=0.9986 val_thr=0.3597 val_roc=0.9998


                                                  

ep004 tr_loss=0.0371 val_loss=0.0069 val_bestF1=0.9991 val_thr=0.7652 val_roc=0.9999


                                                  

ep005 tr_loss=2.6454 val_loss=0.0063 val_bestF1=0.9991 val_thr=0.7780 val_roc=0.9999


                                                  

ep006 tr_loss=0.3206 val_loss=0.0066 val_bestF1=0.9992 val_thr=0.5429 val_roc=0.9999


                                                  

ep007 tr_loss=0.6234 val_loss=0.0067 val_bestF1=0.9992 val_thr=0.5661 val_roc=0.9999


                                                  

ep008 tr_loss=0.1015 val_loss=0.0083 val_bestF1=0.9988 val_thr=0.8619 val_roc=0.9999


                                                  

ep009 tr_loss=0.0889 val_loss=0.0065 val_bestF1=0.9992 val_thr=0.3222 val_roc=0.9999


                                                  

ep010 tr_loss=0.3993 val_loss=0.0064 val_bestF1=0.9993 val_thr=0.1598 val_roc=0.9999


                                                  

ep011 tr_loss=0.8578 val_loss=0.0050 val_bestF1=0.9993 val_thr=0.4489 val_roc=0.9999


                                                  

ep012 tr_loss=0.0358 val_loss=0.0083 val_bestF1=0.9991 val_thr=0.8226 val_roc=0.9999


                                                  

ep013 tr_loss=0.0177 val_loss=0.2097 val_bestF1=0.9993 val_thr=0.8029 val_roc=0.9998


                                                  

ep014 tr_loss=0.0388 val_loss=0.0064 val_bestF1=0.9991 val_thr=0.7614 val_roc=0.9999


                                                  

ep015 tr_loss=0.3075 val_loss=0.0069 val_bestF1=0.9992 val_thr=0.1159 val_roc=0.9999


                                                 

ep016 tr_loss=0.0160 val_loss=0.0048 val_bestF1=0.9994 val_thr=0.7750 val_roc=0.9999


                                                 

ep017 tr_loss=0.0276 val_loss=0.0062 val_bestF1=0.9993 val_thr=0.1956 val_roc=0.9999


                                                  

ep018 tr_loss=0.0363 val_loss=0.0051 val_bestF1=0.9994 val_thr=0.0276 val_roc=1.0000


                                                  

ep019 tr_loss=0.0129 val_loss=0.0070 val_bestF1=0.9992 val_thr=0.6977 val_roc=0.9999


                                                  

ep020 tr_loss=0.0819 val_loss=0.0073 val_bestF1=0.9992 val_thr=0.9765 val_roc=0.9999


                                                  

ep021 tr_loss=0.0205 val_loss=0.0048 val_bestF1=0.9994 val_thr=0.1966 val_roc=1.0000


                                                  

ep022 tr_loss=0.0354 val_loss=0.0054 val_bestF1=0.9992 val_thr=0.2572 val_roc=1.0000


                                                  

ep023 tr_loss=0.3439 val_loss=0.0063 val_bestF1=0.9992 val_thr=0.1644 val_roc=0.9999


                                                  

ep024 tr_loss=0.0253 val_loss=0.0069 val_bestF1=0.9994 val_thr=0.2656 val_roc=1.0000


                                                  

ep025 tr_loss=0.0128 val_loss=0.0047 val_bestF1=0.9995 val_thr=0.3694 val_roc=0.9999


                                                 

ep026 tr_loss=0.1639 val_loss=0.0046 val_bestF1=0.9994 val_thr=0.0506 val_roc=1.0000


                                                  

ep027 tr_loss=0.1319 val_loss=0.0365 val_bestF1=0.9994 val_thr=0.0753 val_roc=0.9999


                                                  

ep028 tr_loss=0.0117 val_loss=0.0027 val_bestF1=0.9996 val_thr=0.2634 val_roc=1.0000


                                                 

ep029 tr_loss=1.1682 val_loss=0.0044 val_bestF1=0.9994 val_thr=0.7706 val_roc=0.9999


                                                 

ep030 tr_loss=0.0094 val_loss=0.0037 val_bestF1=0.9996 val_thr=0.0302 val_roc=1.0000


                                                 

ep031 tr_loss=0.0156 val_loss=0.0030 val_bestF1=0.9996 val_thr=0.0247 val_roc=1.0000


                                                 

ep032 tr_loss=0.6974 val_loss=0.0049 val_bestF1=0.9995 val_thr=0.1306 val_roc=1.0000


                                                  

ep033 tr_loss=0.0500 val_loss=0.0044 val_bestF1=0.9994 val_thr=0.7429 val_roc=1.0000


                                                  

ep034 tr_loss=0.3644 val_loss=0.0053 val_bestF1=0.9994 val_thr=0.0073 val_roc=1.0000


                                                  

ep035 tr_loss=0.0208 val_loss=0.0051 val_bestF1=0.9993 val_thr=0.2459 val_roc=1.0000


                                                  

ep036 tr_loss=0.0233 val_loss=0.0039 val_bestF1=0.9996 val_thr=0.0944 val_roc=1.0000


                                                  

ep037 tr_loss=0.0167 val_loss=0.0116 val_bestF1=0.9993 val_thr=0.1537 val_roc=1.0000


                                                  

ep038 tr_loss=0.0154 val_loss=0.0030 val_bestF1=0.9996 val_thr=0.5133 val_roc=1.0000


                                                  

ep039 tr_loss=2.2051 val_loss=0.4445 val_bestF1=0.9995 val_thr=0.9998 val_roc=0.9998


                                                  

ep040 tr_loss=0.0094 val_loss=0.0033 val_bestF1=0.9995 val_thr=0.1983 val_roc=1.0000


                                                  

ep041 tr_loss=0.0307 val_loss=0.0036 val_bestF1=0.9996 val_thr=0.0235 val_roc=1.0000


                                                  

ep042 tr_loss=0.0119 val_loss=0.0054 val_bestF1=0.9996 val_thr=0.9870 val_roc=0.9999


                                                 

ep043 tr_loss=0.0125 val_loss=0.0076 val_bestF1=0.9996 val_thr=0.0526 val_roc=1.0000


                                                  

ep044 tr_loss=0.0157 val_loss=0.0033 val_bestF1=0.9995 val_thr=0.7611 val_roc=1.0000


                                                  

ep045 tr_loss=0.0460 val_loss=0.0090 val_bestF1=0.9996 val_thr=0.4718 val_roc=1.0000


                                                  

ep046 tr_loss=0.1884 val_loss=0.0028 val_bestF1=0.9996 val_thr=0.0788 val_roc=1.0000


                                                 

ep047 tr_loss=0.0119 val_loss=0.0031 val_bestF1=0.9995 val_thr=0.0637 val_roc=1.0000


                                                  

ep048 tr_loss=0.0475 val_loss=0.0028 val_bestF1=0.9996 val_thr=0.3056 val_roc=1.0000


                                                  

ep049 tr_loss=18.3281 val_loss=0.0050 val_bestF1=0.9996 val_thr=0.0055 val_roc=1.0000


                                                  

ep050 tr_loss=0.1724 val_loss=0.0029 val_bestF1=0.9996 val_thr=0.0446 val_roc=1.0000


                                                 

ep051 tr_loss=0.0727 val_loss=0.0038 val_bestF1=0.9996 val_thr=0.0223 val_roc=1.0000


                                                 

ep052 tr_loss=0.0290 val_loss=0.0037 val_bestF1=0.9995 val_thr=0.5257 val_roc=1.0000


                                                  

ep053 tr_loss=0.0197 val_loss=0.0056 val_bestF1=0.9996 val_thr=0.0048 val_roc=1.0000


                                                  

ep054 tr_loss=0.0126 val_loss=0.0039 val_bestF1=0.9996 val_thr=0.0753 val_roc=1.0000


                                                 

ep055 tr_loss=0.0241 val_loss=0.0029 val_bestF1=0.9997 val_thr=0.0957 val_roc=1.0000


                                                  

ep056 tr_loss=0.2357 val_loss=0.0039 val_bestF1=0.9995 val_thr=0.2546 val_roc=1.0000


                                                  

ep057 tr_loss=0.0176 val_loss=0.0022 val_bestF1=0.9997 val_thr=0.2704 val_roc=1.0000


                                                  

ep058 tr_loss=0.0370 val_loss=0.0045 val_bestF1=0.9996 val_thr=0.0209 val_roc=1.0000


                                                 

ep059 tr_loss=0.0968 val_loss=0.0030 val_bestF1=0.9996 val_thr=0.0459 val_roc=1.0000


                                                  

ep060 tr_loss=0.2767 val_loss=0.0064 val_bestF1=0.9994 val_thr=1.0000 val_roc=0.9999


                                                 

ep061 tr_loss=0.0386 val_loss=0.0038 val_bestF1=0.9997 val_thr=0.0348 val_roc=1.0000


                                                  

ep062 tr_loss=0.0163 val_loss=0.0026 val_bestF1=0.9997 val_thr=0.1355 val_roc=1.0000


                                                  

ep063 tr_loss=0.0421 val_loss=0.0027 val_bestF1=0.9996 val_thr=0.4120 val_roc=1.0000


                                                  

ep064 tr_loss=0.0423 val_loss=0.0047 val_bestF1=0.9996 val_thr=0.0106 val_roc=1.0000


                                                  

ep065 tr_loss=0.5776 val_loss=0.0040 val_bestF1=0.9996 val_thr=0.0093 val_roc=1.0000


                                                  

ep066 tr_loss=0.1324 val_loss=0.0096 val_bestF1=0.9994 val_thr=0.9705 val_roc=0.9999


                                                  

ep067 tr_loss=0.0152 val_loss=0.0032 val_bestF1=0.9997 val_thr=0.0217 val_roc=1.0000


                                                  

ep068 tr_loss=0.0213 val_loss=0.0031 val_bestF1=0.9996 val_thr=0.0735 val_roc=1.0000


                                                  

ep069 tr_loss=0.0936 val_loss=0.0038 val_bestF1=0.9997 val_thr=0.0023 val_roc=1.0000


                                                  

ep070 tr_loss=0.0416 val_loss=0.0055 val_bestF1=0.9996 val_thr=0.0101 val_roc=1.0000


                                                  

ep071 tr_loss=0.0084 val_loss=0.2081 val_bestF1=0.9996 val_thr=0.0025 val_roc=1.0000


                                                  

ep072 tr_loss=0.0905 val_loss=0.0058 val_bestF1=0.9996 val_thr=0.0008 val_roc=1.0000


                                                  

ep073 tr_loss=0.0147 val_loss=0.0321 val_bestF1=0.9995 val_thr=0.8990 val_roc=0.9999


                                                  

ep074 tr_loss=0.0929 val_loss=0.0057 val_bestF1=0.9996 val_thr=0.0245 val_roc=1.0000


                                                  

ep075 tr_loss=0.0098 val_loss=0.0033 val_bestF1=0.9997 val_thr=0.0303 val_roc=1.0000


                                                  

ep076 tr_loss=0.0483 val_loss=0.0029 val_bestF1=0.9997 val_thr=0.0836 val_roc=1.0000


                                                  

ep077 tr_loss=0.0687 val_loss=0.0062 val_bestF1=0.9996 val_thr=0.0007 val_roc=1.0000


                                                 

ep078 tr_loss=0.0326 val_loss=0.0124 val_bestF1=0.9996 val_thr=0.0403 val_roc=1.0000


                                                  

ep079 tr_loss=0.0146 val_loss=0.0042 val_bestF1=0.9996 val_thr=0.0486 val_roc=1.0000


                                                  

ep080 tr_loss=0.0189 val_loss=0.0024 val_bestF1=0.9997 val_thr=0.0488 val_roc=1.0000


                                                  

ep081 tr_loss=0.4073 val_loss=0.0061 val_bestF1=0.9997 val_thr=0.0076 val_roc=1.0000


                                                  

ep082 tr_loss=0.0198 val_loss=0.0033 val_bestF1=0.9997 val_thr=0.0091 val_roc=1.0000


                                                 

ep083 tr_loss=0.0214 val_loss=0.0209 val_bestF1=0.9996 val_thr=0.1506 val_roc=0.9999


                                                  

ep084 tr_loss=0.0202 val_loss=0.0035 val_bestF1=0.9996 val_thr=0.0534 val_roc=1.0000


                                                  

ep085 tr_loss=0.0563 val_loss=0.0067 val_bestF1=0.9996 val_thr=0.9984 val_roc=0.9999


                                                  

ep086 tr_loss=0.0161 val_loss=0.0051 val_bestF1=0.9997 val_thr=0.0021 val_roc=1.0000


                                                  

ep087 tr_loss=0.2632 val_loss=0.0043 val_bestF1=0.9994 val_thr=0.5787 val_roc=1.0000


                                                  

ep088 tr_loss=0.5067 val_loss=0.0033 val_bestF1=0.9997 val_thr=0.0234 val_roc=1.0000


                                                 

ep089 tr_loss=0.0085 val_loss=0.0036 val_bestF1=0.9997 val_thr=0.9573 val_roc=1.0000


                                                  

ep090 tr_loss=0.0161 val_loss=0.0068 val_bestF1=0.9996 val_thr=0.0815 val_roc=1.0000


                                                  

ep091 tr_loss=0.1809 val_loss=0.0059 val_bestF1=0.9994 val_thr=0.9944 val_roc=0.9999


                                                  

ep092 tr_loss=0.0509 val_loss=0.0075 val_bestF1=0.9996 val_thr=0.0045 val_roc=1.0000


                                                  

ep093 tr_loss=0.0431 val_loss=0.0042 val_bestF1=0.9996 val_thr=0.1177 val_roc=1.0000


                                                  

ep094 tr_loss=0.0304 val_loss=0.0033 val_bestF1=0.9996 val_thr=0.9905 val_roc=1.0000


                                                  

ep095 tr_loss=0.0793 val_loss=0.0042 val_bestF1=0.9997 val_thr=0.2192 val_roc=0.9999


                                                  

ep096 tr_loss=0.0240 val_loss=0.0044 val_bestF1=0.9997 val_thr=0.1474 val_roc=0.9999


                                                  

ep097 tr_loss=0.0458 val_loss=0.0104 val_bestF1=0.9997 val_thr=0.9275 val_roc=0.9999


                                                  

ep098 tr_loss=0.0321 val_loss=0.0036 val_bestF1=0.9997 val_thr=0.0227 val_roc=1.0000


                                                  

ep099 tr_loss=0.0839 val_loss=0.0031 val_bestF1=0.9997 val_thr=0.8406 val_roc=1.0000


                                                  

ep100 tr_loss=0.0242 val_loss=0.0031 val_bestF1=0.9997 val_thr=0.2561 val_roc=1.0000
best_val_macro_f1: 0.999728370939956
test_metrics: {'best_epoch': 80, 'val_best_thr': 0.048753250390291214, 'test_loss': 0.41878908208788523, 'test_acc': 0.9995887283196007, 'test_bal_acc': 0.9995423269004343, 'test_macro_f1': 0.999460918258009, 'test_f1_benign': 0.9991984300284695, 'test_f1_malicious': 0.9997234064875485, 'test_roc_auc': 0.9999754908288827, 'test_pr_auc': 0.9999924089519447, 'test_tn': 36150, 'test_fp': 20, 'test_fn': 38, 'test_tp': 104818}
