In [2]:
import sys, os
sys.path.append(os.path.abspath("../src"))

In [3]:
import time, psutil, joblib
from sklearn.ensemble import RandomForestClassifier
from feature_analysis import preprocess
from load_data import load_nsl_kdd, load_cicids

# ==== Choose dataset ====
dataset = "nsl"  # or "cicids"
path = "../data/NSL-KDD/KDDTrain+.txt"  # change to CICIDS file if needed

# ==== Load & preprocess ====
if dataset == "nsl":
    df = load_nsl_kdd(path, nrows=20000)
else:
    df = load_cicids(path, nrows=20000)

X, y = preprocess(df)

# ==== Full retrain measurement ====
proc = psutil.Process()
t0 = time.time()
mem_before = proc.memory_info().rss / 1024 / 1024  # in MB

clf = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=1)
clf.fit(X, y)

t1 = time.time()
mem_after = proc.memory_info().rss / 1024 / 1024  # in MB

# ==== Print results ====
print(f"⏱️ Full retrain time: {t1 - t0:.2f} s")
print(f"🧠 Memory used: {mem_after - mem_before:.2f} MB")

# Optional: Save for report
import pandas as pd
pd.DataFrame([{
    "Experiment": "Full Retrain (Baseline)",
    "Time Taken (s)": round(t1 - t0, 2),
    "Memory (MB)": round(mem_after - mem_before, 2),
    "Remarks": "Baseline (no drift adaptation)"
}]).to_csv(f"../results/{dataset}_baseline_runtime.csv", index=False)

✅ Preprocessing done! 4 categorical columns encoded. Shape: (20000, 42)
⏱️ Full retrain time: 1.63 s
🧠 Memory used: 82.52 MB


In [5]:
pip install river

Collecting river
  Downloading river-0.22.0-cp312-cp312-win_amd64.whl.metadata (9.0 kB)
Collecting pandas<3.0.0,>=2.2.3 (from river)
  Downloading pandas-2.3.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Downloading river-0.22.0-cp312-cp312-win_amd64.whl (2.2 MB)
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ------------------- -------------------- 1.0/2.2 MB 8.4 MB/s eta 0:00:01
   ------------------- -------------------- 1.0/2.2 MB 8.4 MB/s eta 0:00:01
   ---------------------------------- ----- 1.8/2.2 MB 2.6 MB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 3.0 MB/s eta 0:00:01
   ---------------------------------------- 2.2/2.2 MB 2.3 MB/s eta 0:00:00
Downloading pandas-2.3.3-cp312-cp312-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ---- ----------------------------------- 1.3/11.0 MB 5.2 MB/s eta 0:00:02
   ---------- ----------------------------- 2.9/11.0 MB 6.5 MB/s eta 0:00:02
   

  You can safely remove it manually.
  You can safely remove it manually.

[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
