In [1]:
import re
from pathlib import Path

import pandas as pd

from src.utils.paths import load_paths
from src.utils.logging import setup_logger

paths = load_paths()
paths.ensure_dirs(create_raw_dirs=False)  # don't auto-create raw folders if you download manually
logger = setup_logger(level="INFO")

logger.info(f"Repo root: {paths.repo_root}")
logger.info(f"VNAT raw dir: {paths.data_raw_vnat}")
logger.info(f"Processed dir: {paths.data_processed}")

2026-02-10 23:23:49 | INFO | ai-vpn-firewall | Repo root: C:\Users\scoti\PycharmProjects\ai-vpn-firewall
2026-02-10 23:23:49 | INFO | ai-vpn-firewall | VNAT raw dir: C:\Users\scoti\PycharmProjects\ai-vpn-firewall\data\raw\vnat
2026-02-10 23:23:49 | INFO | ai-vpn-firewall | Processed dir: C:\Users\scoti\PycharmProjects\ai-vpn-firewall\data\processed


In [2]:
h5_path = paths.data_raw_vnat / "VNAT_Dataframe_release_1.h5"
if not h5_path.exists():
    raise FileNotFoundError(f"VNAT H5 not found at: {h5_path}")

logger.info(f"Found VNAT file: {h5_path}")

store = pd.HDFStore(h5_path)
keys = store.keys()
store.close()

logger.info(f"H5 keys: {keys}")
if "/data" not in keys:
    raise ValueError(f"Expected key '/data' in H5 store. Found: {keys}")

2026-02-10 23:23:49 | INFO | ai-vpn-firewall | Found VNAT file: C:\Users\scoti\PycharmProjects\ai-vpn-firewall\data\raw\vnat\VNAT_Dataframe_release_1.h5
2026-02-10 23:23:50 | INFO | ai-vpn-firewall | H5 keys: ['/data']


In [3]:
df = pd.read_hdf(h5_path, key="/data")
logger.info(f"Loaded VNAT df: shape={df.shape}")

df.head()

2026-02-10 23:24:43 | INFO | ai-vpn-firewall | Loaded VNAT df: shape=(33711, 5)


Unnamed: 0,connection,timestamps,sizes,directions,file_names
0,"(10.123.1.2, 1195, 10.123.1.1, 1195, 17)","[1563289706.330096, 1563289706.330207, 1563289...","[120, 88, 120, 88, 120, 88, 120, 120, 152, 120...","[1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, ...",vpn_youtube_capture2.pcap
0,"(10.113.1.2, 22924, 10.115.1.2, 53, 17)","[1561391908.523659, 1561391908.524042]","[63, 79]","[1, 0]",nonvpn_sftp_newcapture1.pcap
1,"(10.113.1.2, 53065, 10.115.1.2, 53, 17)","[1561391908.523706, 1561391908.524059]","[63, 63]","[1, 0]",nonvpn_sftp_newcapture1.pcap
2,"(10.113.1.150, 39816, 10.115.1.123, 22, 6)","[1561391908.524836, 1561391908.525027, 1561391...","[60, 60, 52, 73, 52, 73, 52, 1378, 222, 52, 13...","[1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, ...",nonvpn_sftp_newcapture1.pcap
3,"(10.115.1.2, 6589, 10.113.1.2, 53, 17)","[1561391908.594887, 1561391908.595301]","[51, 102]","[1, 0]",nonvpn_sftp_newcapture1.pcap


In [4]:
df = df.reset_index(drop=True)

assert df.index.is_unique
assert df.index.min() == 0
assert df.index.max() == len(df) - 1

logger.info("Index reset OK: unique, 0..N-1")

2026-02-10 23:24:51 | INFO | ai-vpn-firewall | Index reset OK: unique, 0..N-1


In [5]:
expected_cols = {"connection", "timestamps", "sizes", "directions", "file_names"}
missing = expected_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing expected columns: {missing}")

logger.info(f"Columns OK: {list(df.columns)}")

df["file_names"] = df["file_names"].astype(str)

2026-02-10 23:24:55 | INFO | ai-vpn-firewall | Columns OK: ['connection', 'timestamps', 'sizes', 'directions', 'file_names']


In [6]:
def derive_label_from_filename(s: str) -> int | None:
    s = str(s).strip().lower()
    if s.startswith("vpn_"):
        return 1
    if s.startswith("nonvpn_"):
        return 0
    return None

derived = df["file_names"].map(derive_label_from_filename)

if derived.isna().any():
    bad = df.loc[derived.isna(), "file_names"].head(20)
    raise ValueError(f"Found file_names without vpn_/nonvpn_ prefix. Examples:\n{bad}")

if "label" in df.columns:
    mismatch = (df["label"].astype(int) != derived.astype(int)).sum()
    logger.info(f"Stored label exists. Mismatches vs derived: {mismatch}")
    if mismatch:
        ex = df.loc[df["label"].astype(int) != derived.astype(int), ["file_names", "label"]].head(20)
        raise ValueError(f"Label mismatch between stored label and derived label. Examples:\n{ex}")
else:
    df["label"] = derived.astype(int)
    logger.info("Created label column from file_names.")

logger.info("Label distribution:\n" + str(df["label"].value_counts()))

2026-02-10 23:24:55 | INFO | ai-vpn-firewall | Created label column from file_names.
2026-02-10 23:24:56 | INFO | ai-vpn-firewall | Label distribution:
label
0    33332
1      379
Name: count, dtype: int64


In [7]:
s = df["file_names"].str.lower().str.strip()

s = s.str.replace("\\", "/", regex=False).str.split("/").str[-1]

s = s.str.replace(".pcap", "", regex=False)

s = s.str.replace(r"^(vpn_|nonvpn_)", "", regex=True)

df["app"] = s.str.split("_", n=1).str[0]

logger.info("Top apps:\n" + str(df["app"].value_counts().head(15)))
df[["file_names", "app", "label"]].head()

2026-02-10 23:25:22 | INFO | ai-vpn-firewall | Top apps:
app
ssh           13563
scp           12845
rsync          1915
sftp           1670
skype-chat     1301
vimeo          1218
voip            617
youtube         341
netflix         205
rdp              36
Name: count, dtype: int64


Unnamed: 0,file_names,app,label
0,vpn_youtube_capture2.pcap,youtube,1
1,nonvpn_sftp_newcapture1.pcap,sftp,0
2,nonvpn_sftp_newcapture1.pcap,sftp,0
3,nonvpn_sftp_newcapture1.pcap,sftp,0
4,nonvpn_sftp_newcapture1.pcap,sftp,0


In [8]:
def is_listlike(x) -> bool:
    return isinstance(x, (list, tuple))

for col in ["timestamps", "sizes", "directions"]:
    bad = df[~df[col].map(is_listlike)]
    if len(bad) > 0:
        raise ValueError(f"Column {col} has non-listlike entries. Examples:\n{bad[[col, 'file_names']].head()}")

lens = pd.DataFrame({
    "t": df["timestamps"].map(len),
    "s": df["sizes"].map(len),
    "d": df["directions"].map(len),
})

mismatch = df[(lens["t"] != lens["s"]) | (lens["t"] != lens["d"])]
logger.info(f"Rows with mismatched packet list lengths: {len(mismatch)}")
if len(mismatch) > 0:
    show = mismatch[["file_names"]].head(10).copy()
    show["len_t"] = lens.loc[mismatch.index, "t"].values
    show["len_s"] = lens.loc[mismatch.index, "s"].values
    show["len_d"] = lens.loc[mismatch.index, "d"].values
    raise ValueError(f"Found mismatched list lengths. Examples:\n{show}")

2026-02-10 23:25:24 | INFO | ai-vpn-firewall | Rows with mismatched packet list lengths: 0


In [9]:
def dirs_valid(dirs) -> bool:
    return set(dirs).issubset({0, 1})

bad_dir = df[~df["directions"].map(dirs_valid)]
logger.info(f"Rows with invalid direction values: {len(bad_dir)}")
if len(bad_dir) > 0:
    ex = bad_dir[["file_names", "directions"]].head(5)
    raise ValueError(f"Invalid direction values found. Examples:\n{ex}")

2026-02-10 23:25:25 | INFO | ai-vpn-firewall | Rows with invalid direction values: 0


In [10]:
import numpy as np

def backward_stats(ts):
    ts = np.asarray(ts, dtype=float)
    diffs = np.diff(ts)
    backward = diffs[diffs < 0]
    if backward.size == 0:
        return 0, 0.0, 0.0
    return int(backward.size), float(backward.min()), float(backward.mean())

stats = df["timestamps"].map(backward_stats)
df["n_backsteps"] = stats.map(lambda x: x[0])
df["min_backstep"] = stats.map(lambda x: x[1])   # most negative delta
df["mean_backstep"] = stats.map(lambda x: x[2])

n_bad = (df["n_backsteps"] > 0).sum()
logger.info(f"Rows with non-monotonic timestamps: {n_bad} / {len(df)} ({100*n_bad/len(df):.4f}%)")

df.loc[df["n_backsteps"] > 0, ["file_names", "n_backsteps", "min_backstep", "mean_backstep"]].head(10)

2026-02-10 23:25:29 | INFO | ai-vpn-firewall | Rows with non-monotonic timestamps: 246 / 33711 (0.7297%)


Unnamed: 0,file_names,n_backsteps,min_backstep,mean_backstep
0,vpn_youtube_capture2.pcap,4,-5.00679e-06,-2.503395e-06
3,nonvpn_sftp_newcapture1.pcap,831,-0.0001358986,-1.711392e-06
651,nonvpn_sftp_newcapture1.pcap,593,-0.0001819134,-1.978914e-06
681,nonvpn_sftp_newcapture1.pcap,701,-3.695488e-05,-1.913811e-06
734,nonvpn_rdp_capture4.pcap,1,-2.861023e-06,-2.861023e-06
739,nonvpn_rdp_capture4.pcap,1,-9.536743e-07,-9.536743e-07
740,vpn_rsync_capture2.pcap,367,-9.298325e-05,-2.12303e-06
787,nonvpn_sftp_capture1.pcap,191,-1.192093e-05,-1.529124e-06
972,vpn_sftp_capture1.pcap,1696,-0.0001020432,-1.606373e-06
1648,nonvpn_scp_long_capture1.pcap,1,-9.536743e-07,-9.536743e-07


In [11]:
def make_non_decreasing(ts, eps=1e-6):
    """
    Fix timestamp jitter without reordering packets.
    Ensures t[i] >= t[i-1] by pushing forward when needed.
    eps ensures strict progress if you want it.
    """
    out = []
    prev = None
    for t in ts:
        t = float(t)
        if prev is None:
            out.append(t)
            prev = t
            continue
        if t < prev:
            t = prev + eps
        out.append(t)
        prev = t
    return out

bad_mask = df["n_backsteps"] > 0
logger.info(f"Repairing timestamps for {bad_mask.sum()} rows...")

df.loc[bad_mask, "timestamps"] = df.loc[bad_mask, "timestamps"].map(make_non_decreasing)

# verify
def is_non_decreasing(ts) -> bool:
    return all(ts[i] <= ts[i+1] for i in range(len(ts)-1))

bad_ts_after = (~df["timestamps"].map(is_non_decreasing)).sum()
logger.info(f"Rows still non-monotonic after repair: {bad_ts_after}")
if bad_ts_after:
    raise ValueError("Timestamp repair failed for some rows. Investigate.")

2026-02-10 23:25:30 | INFO | ai-vpn-firewall | Repairing timestamps for 246 rows...
2026-02-10 23:25:35 | INFO | ai-vpn-firewall | Rows still non-monotonic after repair: 0


In [12]:
repair_summary = {
    "rows_total": int(len(df)),
    "rows_repaired": int((df["n_backsteps"] > 0).sum()),
    "pct_repaired": float(100 * (df["n_backsteps"] > 0).mean()),
    "max_backsteps_in_row": int(df["n_backsteps"].max()),
    "worst_min_backstep": float(df["min_backstep"].min()),  # most negative jump
}

logger.info("Timestamp repair summary:\n" + str(repair_summary))
repair_summary

2026-02-10 23:25:35 | INFO | ai-vpn-firewall | Timestamp repair summary:
{'rows_total': 33711, 'rows_repaired': 246, 'pct_repaired': 0.7297321349114533, 'max_backsteps_in_row': 1696, 'worst_min_backstep': -0.0001819133758544922}


{'rows_total': 33711,
 'rows_repaired': 246,
 'pct_repaired': 0.7297321349114533,
 'max_backsteps_in_row': 1696,
 'worst_min_backstep': -0.0001819133758544922}

In [13]:
df = df.drop(columns=["n_backsteps", "min_backstep", "mean_backstep"], errors="ignore")

In [14]:
bad_ts = df[~df["timestamps"].map(is_non_decreasing)]
logger.info(f"Rows with non-monotonic timestamps (final): {len(bad_ts)}")
assert len(bad_ts) == 0

2026-02-10 23:25:38 | INFO | ai-vpn-firewall | Rows with non-monotonic timestamps (final): 0


In [15]:
df["packet_count"] = df["sizes"].map(len)
logger.info(df["packet_count"].describe().to_string())

2026-02-10 23:25:38 | INFO | ai-vpn-firewall | count    3.371100e+04
mean     1.130292e+03
std      3.741679e+04
min      1.000000e+00
25%      2.000000e+00
50%      2.000000e+00
75%      2.000000e+00
max      3.842411e+06


In [16]:
def sizes_valid(sz) -> bool:
    if any(x is None for x in sz):
        return False
    return all((isinstance(x, (int, float)) and x >= 0 and x < 20000) for x in sz)

bad_sizes = df[~df["sizes"].map(sizes_valid)]
logger.info(f"Rows with invalid sizes: {len(bad_sizes)}")
if len(bad_sizes) > 0:
    ex = bad_sizes[["file_names", "sizes"]].head(3)
    raise ValueError(f"Invalid packet sizes found. Examples:\n{ex}")

2026-02-10 23:25:46 | INFO | ai-vpn-firewall | Rows with invalid sizes: 0


In [17]:
def normalize_capture_name(s: str) -> str:
    s = str(s).strip().lower()
    s = s.replace("\\", "/").split("/")[-1]
    return s

df["capture_name"] = df["file_names"].map(normalize_capture_name)

df["capture_id"] = df["capture_name"]

logger.info(f"Unique captures: {df['capture_id'].nunique()}")
df["capture_id"].value_counts().head(10)

2026-02-10 23:25:47 | INFO | ai-vpn-firewall | Unique captures: 165


capture_id
nonvpn_ssh_capture5.pcap         11368
nonvpn_scp_long_capture1.pcap    10555
nonvpn_ssh_capture3.pcap          1600
nonvpn_vimeo_capture1.pcap        1217
nonvpn_scp_newcapture1.pcap       1214
nonvpn_scp_capture1.pcap          1074
nonvpn_rsync_newcapture1.pcap     1013
nonvpn_rsync_capture1.pcap         898
nonvpn_sftp_newcapture1.pcap       709
nonvpn_sftp_newcapture2.pcap       648
Name: count, dtype: int64

In [18]:
def normalize_connection(conn) -> str:
    return str(conn)

df["flow_id"] = df["connection"].map(normalize_connection)

logger.info(f"Unique flows: {df['flow_id'].nunique()}")

2026-02-10 23:25:49 | INFO | ai-vpn-firewall | Unique flows: 32682


In [19]:
label_per_capture = df.groupby("capture_id")["label"].nunique()
mixed = (label_per_capture > 1).sum()
logger.info(f"Captures with mixed labels (both VPN and nonVPN): {mixed}")

cap_stats = df.groupby(["capture_id", "label"]).size().unstack(fill_value=0)
cap_stats["total"] = cap_stats.sum(axis=1)
cap_stats.sort_values("total", ascending=False).head(10)

2026-02-10 23:25:49 | INFO | ai-vpn-firewall | Captures with mixed labels (both VPN and nonVPN): 0


label,0,1,total
capture_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
nonvpn_ssh_capture5.pcap,11368,0,11368
nonvpn_scp_long_capture1.pcap,10555,0,10555
nonvpn_ssh_capture3.pcap,1600,0,1600
nonvpn_vimeo_capture1.pcap,1217,0,1217
nonvpn_scp_newcapture1.pcap,1214,0,1214
nonvpn_scp_capture1.pcap,1074,0,1074
nonvpn_rsync_newcapture1.pcap,1013,0,1013
nonvpn_rsync_capture1.pcap,898,0,898
nonvpn_sftp_newcapture1.pcap,709,0,709
nonvpn_sftp_newcapture2.pcap,648,0,648


In [20]:
df["packet_count"] = df["sizes"].map(len)
logger.info(df["packet_count"].describe().to_string())

for n in [10, 20, 50, 100]:
    pct = (df["packet_count"] >= n).mean() * 100
    logger.info(f"Flows with >= {n} packets: {pct:.2f}%")

2026-02-10 23:25:49 | INFO | ai-vpn-firewall | count    3.371100e+04
mean     1.130292e+03
std      3.741679e+04
min      1.000000e+00
25%      2.000000e+00
50%      2.000000e+00
75%      2.000000e+00
max      3.842411e+06
2026-02-10 23:25:49 | INFO | ai-vpn-firewall | Flows with >= 10 packets: 10.75%
2026-02-10 23:25:49 | INFO | ai-vpn-firewall | Flows with >= 20 packets: 8.67%
2026-02-10 23:25:49 | INFO | ai-vpn-firewall | Flows with >= 50 packets: 4.71%
2026-02-10 23:25:49 | INFO | ai-vpn-firewall | Flows with >= 100 packets: 2.82%


In [21]:
flows = df[
    [
        "capture_id",
        "capture_name",
        "flow_id",
        "connection",
        "timestamps",
        "sizes",
        "directions",
        "file_names",
        "app",
        "label",
        "packet_count",
    ]
].copy()

logger.info(f"flows table: shape={flows.shape}")
flows.head()

2026-02-10 23:25:49 | INFO | ai-vpn-firewall | flows table: shape=(33711, 11)


Unnamed: 0,capture_id,capture_name,flow_id,connection,timestamps,sizes,directions,file_names,app,label,packet_count
0,vpn_youtube_capture2.pcap,vpn_youtube_capture2.pcap,"('10.123.1.2', 1195, '10.123.1.1', 1195, 17)","(10.123.1.2, 1195, 10.123.1.1, 1195, 17)","[1563289706.330096, 1563289706.330207, 1563289...","[120, 88, 120, 88, 120, 88, 120, 120, 152, 120...","[1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, ...",vpn_youtube_capture2.pcap,youtube,1,62283
1,nonvpn_sftp_newcapture1.pcap,nonvpn_sftp_newcapture1.pcap,"('10.113.1.2', 22924, '10.115.1.2', 53, 17)","(10.113.1.2, 22924, 10.115.1.2, 53, 17)","[1561391908.523659, 1561391908.524042]","[63, 79]","[1, 0]",nonvpn_sftp_newcapture1.pcap,sftp,0,2
2,nonvpn_sftp_newcapture1.pcap,nonvpn_sftp_newcapture1.pcap,"('10.113.1.2', 53065, '10.115.1.2', 53, 17)","(10.113.1.2, 53065, 10.115.1.2, 53, 17)","[1561391908.523706, 1561391908.524059]","[63, 63]","[1, 0]",nonvpn_sftp_newcapture1.pcap,sftp,0,2
3,nonvpn_sftp_newcapture1.pcap,nonvpn_sftp_newcapture1.pcap,"('10.113.1.150', 39816, '10.115.1.123', 22, 6)","(10.113.1.150, 39816, 10.115.1.123, 22, 6)","[1561391908.524836, 1561391908.525027, 1561391...","[60, 60, 52, 73, 52, 73, 52, 1378, 222, 52, 13...","[1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, ...",nonvpn_sftp_newcapture1.pcap,sftp,0,2002346
4,nonvpn_sftp_newcapture1.pcap,nonvpn_sftp_newcapture1.pcap,"('10.115.1.2', 6589, '10.113.1.2', 53, 17)","(10.115.1.2, 6589, 10.113.1.2, 53, 17)","[1561391908.594887, 1561391908.595301]","[51, 102]","[1, 0]",nonvpn_sftp_newcapture1.pcap,sftp,0,2


In [22]:
dup = flows.duplicated(subset=["capture_id", "flow_id"]).sum()
logger.info(f"Duplicate (capture_id, flow_id) pairs: {dup}")

if dup > 0:
    ex = flows[flows.duplicated(subset=["capture_id", "flow_id"], keep=False)].head(10)
    raise ValueError(f"Found duplicate flow_id within same capture. Examples:\n{ex[['capture_id','flow_id','file_names']]}")

logger.info("All integrity checks passed ")

2026-02-10 23:25:50 | INFO | ai-vpn-firewall | Duplicate (capture_id, flow_id) pairs: 0
2026-02-10 23:25:50 | INFO | ai-vpn-firewall | All integrity checks passed 


In [23]:
for c in flows.columns:
    if flows[c].dtype == "object":
        print("object col:", c)

flows["connection"].head()
type(flows.loc[0, "connection"])

object col: connection
object col: timestamps
object col: sizes
object col: directions
object col: app


tuple

In [24]:
def conn_to_str(conn) -> str:
    try:
        src_ip, src_port, dst_ip, dst_port, proto = conn
        return f"{src_ip}:{int(src_port)}-{dst_ip}:{int(dst_port)}-p{int(proto)}"
    except Exception:
        return str(conn)

flows["connection_str"] = flows["connection"].map(conn_to_str)

In [25]:
flows_to_save = flows.drop(columns=["connection"])
flows_to_save["timestamps"] = flows_to_save["timestamps"].map(lambda xs: [float(x) for x in xs])
flows_to_save["sizes"] = flows_to_save["sizes"].map(lambda xs: [int(x) for x in xs])
flows_to_save["directions"] = flows_to_save["directions"].map(lambda xs: [int(x) for x in xs])

In [26]:
out_dir = paths.data_processed / "vnat"
out_dir.mkdir(parents=True, exist_ok=True)

out_path = out_dir / "flows.parquet"
flows_to_save.to_parquet(out_path, index=False)

logger.info(f"Saved flows parquet: {out_path}")

2026-02-10 23:26:52 | INFO | ai-vpn-firewall | Saved flows parquet: C:\Users\scoti\PycharmProjects\ai-vpn-firewall\data\processed\vnat\flows.parquet


In [30]:
reloaded = pd.read_parquet(out_path)
logger.info(f"Reloaded parquet: shape={reloaded.shape}")

assert set(reloaded.columns) == set(flows.columns)
assert reloaded["capture_id"].nunique() == flows["capture_id"].nunique()

reloaded.head()

2026-02-10 23:36:11 | INFO | ai-vpn-firewall | Reloaded parquet: shape=(33711, 11)


AssertionError: 

In [31]:
print("Saved columns:", sorted(reloaded.columns))
print("In-memory columns:", sorted(flows.columns))

print("Missing in reloaded:", sorted(set(flows.columns) - set(reloaded.columns)))
print("Extra in reloaded:", sorted(set(reloaded.columns) - set(flows.columns)))

Saved columns: ['app', 'capture_id', 'capture_name', 'connection_str', 'directions', 'file_names', 'flow_id', 'label', 'packet_count', 'sizes', 'timestamps']
In-memory columns: ['app', 'capture_id', 'capture_name', 'connection', 'connection_str', 'directions', 'file_names', 'flow_id', 'label', 'packet_count', 'sizes', 'timestamps']
Missing in reloaded: ['connection']
Extra in reloaded: []


In [32]:
assert set(reloaded.columns) == set(flows_to_save.columns)
assert reloaded["capture_id"].nunique() == flows_to_save["capture_id"].nunique()
assert len(reloaded) == len(flows_to_save)

In [33]:
EXPECTED_COLS = {
    "capture_id",
    "capture_name",
    "flow_id",
    "connection_str",
    "timestamps",
    "sizes",
    "directions",
    "file_names",
    "app",
    "label",
    "packet_count",
}

assert set(reloaded.columns) == EXPECTED_COLS, (
    f"Schema mismatch. "
    f"Missing: {EXPECTED_COLS - set(reloaded.columns)}, "
    f"Extra: {set(reloaded.columns) - EXPECTED_COLS}"
)

assert len(reloaded) > 0
assert reloaded["capture_id"].nunique() > 0
assert reloaded["flow_id"].nunique() > 0

logger.info("flows.parquet schema and integrity checks passed")

2026-02-10 23:36:32 | INFO | ai-vpn-firewall | flows.parquet schema and integrity checks passed


In [34]:
del flows

In [35]:
reloaded["label"].value_counts()

label
0    33332
1      379
Name: count, dtype: int64