Test framework

In [None]:

from imports import *

import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from networkx.algorithms import community  

import time
import config
from utiles import record_memory_usage
from imports import * 

# ─────────────────────────────────────────────────────────────────────────────
# 0) Reiniciar 
# ─────────────────────────────────────────────────────────────────────────────
config.memory_times  = []
config.memory_values = []
config.start_time    = time.time()

# ─────────────────────────────────────────────────────────────────────────────
# 1)  dataframe
# ─────────────────────────────────────────────────────────────────────────────
df = pd.read_csv("../IoT Network Intrusion Dataset.csv")
record_memory_usage(label="after_load", record_every=1, iteration=0)

df = df.drop(columns=[
    'Flow_ID', 'Cat', 'Sub_Cat', 'Timestamp',
    'Src_IP', 'Dst_IP', 'Label'
])
record_memory_usage(label="after_drop_cols", record_every=1, iteration=1)

df_sample = df.sample(n=70000, random_state=42)
df_sample.replace([np.inf, -np.inf], np.nan, inplace=True)
df_sample.dropna(inplace=True)
record_memory_usage(label="after_sample_clean", record_every=1, iteration=2)

# ─────────────────────────────────────────────────────────────────────────────
# 2) Transposición y similitud
# ─────────────────────────────────────────────────────────────────────────────
df_transposed = df_sample.transpose()
record_memory_usage(label="after_transpose", record_every=1, iteration=3)

similarity_matrix = cosine_similarity(df_transposed)
record_memory_usage(label="after_cosine", record_every=1, iteration=4)

# ─────────────────────────────────────────────────────────────────────────────
# 3) grafo 
# ─────────────────────────────────────────────────────────────────────────────
threshold = 0.99
features  = list(df_transposed.index)

G = nx.Graph()
G.add_nodes_from(features)
record_memory_usage(label="after_add_nodes", record_every=1, iteration=5)

for i in range(len(features)):
    for j in range(i + 1, len(features)):
        if similarity_matrix[i, j] >= threshold:
            G.add_edge(features[i], features[j], weight=similarity_matrix[i, j])
    record_memory_usage(label="edge_build", record_every=500, iteration=i)

t_fs_end = time.time() - config.start_time
print(f"Feature selection done in {t_fs_end:.1f}s")

# ─────────────────────────────────────────────────────────────────────────────
# 4) comunidades
# ─────────────────────────────────────────────────────────────────────────────
communities = community.greedy_modularity_communities(G)
record_memory_usage(label="after_communities", record_every=1, iteration=0)

selected_features = []
for idx, comm in enumerate(communities):
    if len(comm) > 1:
        subg = G.subgraph(comm)
        cent = nx.degree_centrality(subg)
        rep = max(cent, key=cent.get)
        selected_features.append(rep)
    else:
        selected_features.extend(comm)
    record_memory_usage(label="centrality", record_every=5, iteration=idx)

t_fs_end2 = time.time() - config.start_time
print(f"Community & repr. selection done in {t_fs_end2:.1f}s")

# ─────────────────────────────────────────────────────────────────────────────
# 5) plot
# ─────────────────────────────────────────────────────────────────────────────
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G)
colors = ["red" if n in selected_features else "skyblue" for n in G]
nx.draw_networkx(
    G, pos,
    node_color=colors, node_size=50,
    with_labels=False, edge_color="gray"
)
plt.title("Graph-Based Feature Selection")
plt.axis("off")
record_memory_usage(label="after_plot", record_every=1, iteration=0)

# ─────────────────────────────────────────────────────────────────────────────
# 6) DataFrame final
# ─────────────────────────────────────────────────────────────────────────────
all_feats       = set(G.nodes())
unselected      = all_feats - set(selected_features)
df_final        = pd.read_csv("../IoT Network Intrusion Dataset.csv")
df_final        = df_final.drop(columns=list(unselected))
record_memory_usage(label="after_final_drop", record_every=1, iteration=0)

t_fs_complete = time.time() - config.start_time
print(f"Total feature selection phase in {t_fs_complete:.1f}s")


In [12]:
import optuna
import torch
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import matthews_corrcoef
from data import *
from dataset import *
from neural import *
from symbolic import *
from trainer import *

df=df_final
#df= df[0:100]
time.sleep(0.1) 
t_fs_complete = time.time() - config.start_time

# ————————————————————————————————————————————————————————————————
# --- FASE 1: Crear grafos directos ---------------
graphs_info = create_graphs_from_dataset(df, record_every=100)
t_directos   = time.time() - config.start_time

# ————————————————————————————————————————————————————————————————
# --- FASE 2:  estadisticos de los grafos ---
results = analyze_graphs(graphs_info,
                         output_filename=config.grafos_directos2,
                         record_every=100)
t_analysis   = time.time() - config.start_time

# ————————————————————————————————————————————————————————————————
# --- FASE 3: recíprocos --------
convert_and_save_line_graphs(config.grafos_directos2,
                             config.grafos_reciprocos3,
                             record_every=100)
t_recip      = time.time() - config.start_time

# ————————————————————————————————————————————————————————————————
# --- FASE 4: Extraer MST --------------------------
compute_and_save_mst_graphs(config.grafos_reciprocos3,
                            config.grafos_mst,
                            record_every=50)
t_mst        = time.time() - config.start_time


# ————————————————————————————————————————————————————————————————
# ————————————————————————————————————————————————————————————————
# ————————————————————————————————————————————————————————————————
# --- FASE 5: Extraer MST - Optuna hyperparameter search
t_opt_start    = time.time() - config.start_time

def objective(trial: optuna.Trial) -> float:
    h_dim      = trial.suggest_categorical("h_dim",      [32, 64, 128])
    z_dim      = trial.suggest_categorical("z_dim",      [16, 32, 64])
    n_layers   = trial.suggest_int        ("n_layers",   2, 4)
    dropout    = trial.suggest_float      ("dropout",    0.1, 0.5)
    act_fn     = trial.suggest_categorical("act_fn",     ["relu", "tanh", "leakyrelu"])
    lr         = trial.suggest_float      ("lr",         1e-4, 1e-2, log=True)
    beta_KL    = trial.suggest_float      ("beta_KL",    1e-3, 1.0, log=True)
    gamma_CE   = trial.suggest_float      ("gamma_CE",   1e-3, 1.0, log=True)
    lam_rec    = trial.suggest_float      ("lam_rec",    0.0, 1.0)
    lam_lat    = trial.suggest_float      ("lam_lat",    0.0, 1.0 - lam_rec)
    eta_fuse   = trial.suggest_float      ("eta_fuse",   0.0, 1.0)
    k_rule     = trial.suggest_int        ("k_rule",     1, 3)
    tau        = trial.suggest_float      ("tau",        0.0, 1.0)

    train_raw, test_raw, val_raw = partition_graphs(
    pkl_file    = r"\grafos\grafos_mst.pkl",
    train       = 200,
    test        = 2000,
    validacion  = 1400,
    min_normals_test=58,
    min_normals_valid=40)

    rule_stats = compute_descriptors_statistics(train_raw)
    scaler     = fit_feature_scaler(train_raw)

    train_ds = GraphDataset(
        train_raw, scaler=scaler,
        rule_stats=rule_stats,
        k_sigma=k_rule, tau=tau
    )
    val_ds = GraphDataset(
        val_raw, scaler=scaler,
        rule_stats=rule_stats,
        k_sigma=k_rule, tau=tau
    )
    train_loader = DataLoader(
        train_ds, batch_size=16, shuffle=True,
        collate_fn=collate_small_graphs
    )
    val_loader = DataLoader(
        val_ds,   batch_size=16, shuffle=False,
        collate_fn=collate_small_graphs
    )

   
    in_dim    = train_ds[0][1].shape[1]
    model     = VGAE(
        in_dim      = in_dim,
        hidden_dim  = h_dim,
        latent_dim  = z_dim,
        num_layers  = n_layers,
        activation  = act_fn,
        dropout     = dropout,
    )
    optimizer = Adam(model.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=5)
    early_st  = EarlyStop(patience=10, delta=1e-4)

    trainer = Trainer(
        model         = model,
        loaders       = (train_loader, val_loader),
        optimizer     = optimizer,
        scheduler     = scheduler,
        early_stopper = early_st,
        beta_KL       = beta_KL,
        gamma_CE      = gamma_CE,
        lambdas       = (lam_rec, lam_lat),
        eta_fuse      = eta_fuse,
        device        = "cpu",
    )

    best_val_mcc = trainer.fit(epochs=50, trial=trial)
    return best_val_mcc


sampler = optuna.samplers.TPESampler(seed=42)
pruner  = optuna.pruners.MedianPruner(n_warmup_steps=5)
study   = optuna.create_study(direction="maximize",
                              sampler=sampler,
                              pruner=pruner)
study.optimize(objective, n_trials=40)

t_opt_end      = time.time() - config.start_time

# ————————————————————————————————————————————————————————————————
best = study.best_params

train_raw, test_raw, val_raw = partition_graphs(
    pkl_file          = r"\grafos\grafos_mst.pkl",
    train             = 200,
    test              = 1500,
    validacion        = 1500,
    min_normals_test  = 68,
    min_normals_valid = 30,
)

rule_stats = compute_descriptors_statistics(train_raw)
scaler     = fit_feature_scaler(train_raw)

trainval_raw     = train_raw + val_raw
trainval_ds      = GraphDataset(trainval_raw, scaler, rule_stats,
                                k_sigma=best["k_rule"], tau=best["tau"])
test_ds          = GraphDataset(test_raw,    scaler, rule_stats,
                                k_sigma=best["k_rule"], tau=best["tau"])

trainval_loader  = DataLoader(trainval_ds, batch_size=16, shuffle=True,
                              collate_fn=collate_small_graphs)
test_loader      = DataLoader(test_ds,     batch_size=16, shuffle=False,
                              collate_fn=collate_small_graphs)

in_dim    = trainval_ds[0][1].shape[1]
model     = VGAE(
    in_dim      = in_dim,
    hidden_dim  = best["h_dim"],
    latent_dim  = best["z_dim"],
    num_layers  = best["n_layers"],
    activation  = best["act_fn"],
    dropout     = best["dropout"],
)
optimizer = Adam(model.parameters(), lr=best["lr"])
scheduler = ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=5)
early_st  = EarlyStop(patience=10, delta=1e-4)

trainer = Trainer(
    model         = model,
    loaders       = (trainval_loader, test_loader),
    optimizer     = optimizer,
    scheduler     = scheduler,
    early_stopper = early_st,
    beta_KL       = best["beta_KL"],
    gamma_CE      = best["gamma_CE"],
    lambdas       = (best["lam_rec"], best["lam_lat"]),
    eta_fuse      = best["eta_fuse"],
    device        = "cpu",
)


start_final = time.time()
best_val_mcc = trainer.fit(epochs=20, trial=None)
t_final_train = time.time() - start_final

scores, latents, p_hats, s_syms, y_true = trainer.evaluate(test_loader)

s_neural = [
    best["lam_rec"] * rec
  + best["lam_lat"] * lat
  + (1 - best["lam_rec"] - best["lam_lat"]) * p
  for rec, lat, p in zip(scores, latents, p_hats)
]
s_final  = [
    best["eta_fuse"] * sn + (1 - best["eta_fuse"]) * ss
    for sn, ss in zip(s_neural, s_syms)
]

y_pred = [1 if sf >= trainer.best_thr else 0 for sf in s_final]

print(">>> TEST MCC:", matthews_corrcoef(y_true, y_pred))
print("\nClassification Report:\n",
      classification_report(y_true, y_pred,
                            labels=[0,1],
                            target_names=["Normal","Anomaly"],
                            digits=4))
print("\nConfusion Matrix:\n",
      confusion_matrix(y_true, y_pred, labels=[0,1]))

t_final = time.time() - config.start_time
print(f"\n(Fase 7 completada en {t_final:.1f}s desde el inicio)")



times = config.memory_times
mems  = config.memory_values

fig, ax = plt.subplots(figsize=(12,5))
ax.plot(times, mems, '-o', label='Memory (GB)')

ax.axvspan(0.0,           t_fs_complete, color='C0', alpha=0.3, label='0. Feature sel.')
ax.axvspan(t_fs_complete, t_directos,    color='C1', alpha=0.3, label='1. Direct graphs')
ax.axvspan(t_directos,    t_analysis,    color='C2', alpha=0.3, label='2. Analysis graphs')
ax.axvspan(t_analysis,    t_recip,       color='C3', alpha=0.3, label='3. Reciprocal graphs')
ax.axvspan(t_recip,       t_mst,         color='C4', alpha=0.3, label='4. MST extraction')
ax.axvspan(t_opt_start,   t_opt_end,     color='C5', alpha=0.3, label='5. Optuna search')
ax.axvspan(t_opt_end,     t_final,       color='C6', alpha=0.3, label='6. Final test')

ax.set_xlabel("Time (s)")
ax.set_ylabel("Memory (GB)")
ax.set_title("Memory vs time in the pipeline")
ax.legend(loc='upper left', bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()


[I 2025-04-23 23:54:27,890] A new study created in memory with name: no-name-7d7d3b97-0d51-48dc-97c8-d8c96ba3bbb4
2025-04-23 23:54:27,893 - INFO - Loading graphs from \grafos\grafos_mst.pkl
2025-04-23 23:54:37,965 - INFO - Total loaded: 4304
2025-04-23 23:54:37,968 - INFO - Training set: 200 normals
2025-04-23 23:54:37,971 - INFO - Test set: 2000 graphs (58 normals)
2025-04-23 23:54:37,972 - INFO - Validation set: 1400 graphs (40 normals)


Graphs loaded from \grafos\grafos_mst.pkl


2025-04-23 23:54:41,331 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-23 23:54:41,352 - INFO - Descriptor 'num_nodes': mean = 4.0200, std = 1.9182
2025-04-23 23:54:41,353 - INFO - Descriptor 'num_edges': mean = 2.1650, std = 1.2197
2025-04-23 23:54:41,353 - INFO - Descriptor 'density': mean = 0.5315, std = 0.3388
2025-04-23 23:54:41,353 - INFO - Descriptor 'average_degree': mean = 1.0542, std = 0.1095
2025-04-23 23:54:41,354 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-23 23:54:41,354 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-23 23:54:41,355 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-23 23:54:41,355 - INFO - Descriptor 'spectral_gap': mean = 1.7748, std = 0.4271
2025-04-23 23:54:41,355 - INFO - Descriptor 'data_mean_Src_Port': mean = 15094.9909, std = 12951.6082
2025-04-23 23:54:41,356 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42682.3978, std = 12974.1427
2025-04-23

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-23 23:57:05,922 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-23 23:57:05,943 - INFO - Descriptor 'num_nodes': mean = 3.9550, std = 1.9552
2025-04-23 23:57:05,944 - INFO - Descriptor 'num_edges': mean = 2.1100, std = 1.2074
2025-04-23 23:57:05,944 - INFO - Descriptor 'density': mean = 0.5554, std = 0.3507
2025-04-23 23:57:05,944 - INFO - Descriptor 'average_degree': mean = 1.0464, std = 0.1043
2025-04-23 23:57:05,945 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-23 23:57:05,945 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-23 23:57:05,945 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-23 23:57:05,946 - INFO - Descriptor 'spectral_gap': mean = 1.8098, std = 0.4024
2025-04-23 23:57:05,946 - INFO - Descriptor 'data_mean_Src_Port': mean = 15131.5671, std = 12965.3056
2025-04-23 23:57:05,947 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42644.8676, std = 12989.9480
2025-04-23

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-23 23:59:00,109 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-23 23:59:00,129 - INFO - Descriptor 'num_nodes': mean = 4.0600, std = 1.8805
2025-04-23 23:59:00,130 - INFO - Descriptor 'num_edges': mean = 2.1900, std = 1.1636
2025-04-23 23:59:00,130 - INFO - Descriptor 'density': mean = 0.5257, std = 0.3376
2025-04-23 23:59:00,131 - INFO - Descriptor 'average_degree': mean = 1.0589, std = 0.1141
2025-04-23 23:59:00,131 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-23 23:59:00,131 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-23 23:59:00,132 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-23 23:59:00,132 - INFO - Descriptor 'spectral_gap': mean = 1.7628, std = 0.4380
2025-04-23 23:59:00,132 - INFO - Descriptor 'data_mean_Src_Port': mean = 15807.2953, std = 12990.3031
2025-04-23 23:59:00,133 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42116.5938, std = 13127.4543
2025-04-23

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:01:52,776 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:01:52,797 - INFO - Descriptor 'num_nodes': mean = 3.9900, std = 1.8761
2025-04-24 00:01:52,797 - INFO - Descriptor 'num_edges': mean = 2.1400, std = 1.1792
2025-04-24 00:01:52,798 - INFO - Descriptor 'density': mean = 0.5347, std = 0.3402
2025-04-24 00:01:52,798 - INFO - Descriptor 'average_degree': mean = 1.0514, std = 0.1085
2025-04-24 00:01:52,798 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:01:52,799 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:01:52,799 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:01:52,799 - INFO - Descriptor 'spectral_gap': mean = 1.7878, std = 0.4220
2025-04-24 00:01:52,800 - INFO - Descriptor 'data_mean_Src_Port': mean = 15135.7716, std = 12594.2964
2025-04-24 00:01:52,800 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42764.7604, std = 12674.6216
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:04:12,432 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:04:12,453 - INFO - Descriptor 'num_nodes': mean = 3.9950, std = 1.9300
2025-04-24 00:04:12,454 - INFO - Descriptor 'num_edges': mean = 2.1400, std = 1.2167
2025-04-24 00:04:12,455 - INFO - Descriptor 'density': mean = 0.5377, std = 0.3420
2025-04-24 00:04:12,455 - INFO - Descriptor 'average_degree': mean = 1.0490, std = 0.1078
2025-04-24 00:04:12,456 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:04:12,456 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:04:12,456 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:04:12,457 - INFO - Descriptor 'spectral_gap': mean = 1.8028, std = 0.4113
2025-04-24 00:04:12,457 - INFO - Descriptor 'data_mean_Src_Port': mean = 15340.2390, std = 12905.8309
2025-04-24 00:04:12,457 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42395.8523, std = 12929.6631
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:05:58,886 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:05:58,908 - INFO - Descriptor 'num_nodes': mean = 3.8700, std = 1.8823
2025-04-24 00:05:58,908 - INFO - Descriptor 'num_edges': mean = 2.0700, std = 1.1854
2025-04-24 00:05:58,909 - INFO - Descriptor 'density': mean = 0.5610, std = 0.3475
2025-04-24 00:05:58,909 - INFO - Descriptor 'average_degree': mean = 1.0463, std = 0.1039
2025-04-24 00:05:58,910 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:05:58,910 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:05:58,910 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:05:58,911 - INFO - Descriptor 'spectral_gap': mean = 1.8047, std = 0.4160
2025-04-24 00:05:58,911 - INFO - Descriptor 'data_mean_Src_Port': mean = 14277.3579, std = 11963.9520
2025-04-24 00:05:58,911 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43570.7862, std = 11991.2979
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:07:50,430 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:07:50,451 - INFO - Descriptor 'num_nodes': mean = 4.0500, std = 1.9281
2025-04-24 00:07:50,451 - INFO - Descriptor 'num_edges': mean = 2.1900, std = 1.2385
2025-04-24 00:07:50,451 - INFO - Descriptor 'density': mean = 0.5322, std = 0.3421
2025-04-24 00:07:50,452 - INFO - Descriptor 'average_degree': mean = 1.0563, std = 0.1150
2025-04-24 00:07:50,452 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:07:50,453 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:07:50,453 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:07:50,453 - INFO - Descriptor 'spectral_gap': mean = 1.7767, std = 0.4317
2025-04-24 00:07:50,454 - INFO - Descriptor 'data_mean_Src_Port': mean = 14355.2216, std = 11695.0949
2025-04-24 00:07:50,454 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43413.2541, std = 11816.8911
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:08:53,422 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:08:53,442 - INFO - Descriptor 'num_nodes': mean = 3.9050, std = 1.7538
2025-04-24 00:08:53,443 - INFO - Descriptor 'num_edges': mean = 2.0850, std = 1.0760
2025-04-24 00:08:53,443 - INFO - Descriptor 'density': mean = 0.5417, std = 0.3415
2025-04-24 00:08:53,444 - INFO - Descriptor 'average_degree': mean = 1.0472, std = 0.1033
2025-04-24 00:08:53,444 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:08:53,445 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:08:53,445 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:08:53,445 - INFO - Descriptor 'spectral_gap': mean = 1.8028, std = 0.4113
2025-04-24 00:08:53,445 - INFO - Descriptor 'data_mean_Src_Port': mean = 13768.2651, std = 11120.1711
2025-04-24 00:08:53,446 - INFO - Descriptor 'data_mean_Dst_Port': mean = 44156.1472, std = 11169.4295
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:10:50,281 - INFO - Descriptor 'num_nodes': mean = 4.0850, std = 1.8968
2025-04-24 00:10:50,282 - INFO - Descriptor 'num_edges': mean = 2.1950, std = 1.1987
2025-04-24 00:10:50,282 - INFO - Descriptor 'density': mean = 0.5171, std = 0.3351
2025-04-24 00:10:50,283 - INFO - Descriptor 'average_degree': mean = 1.0544, std = 0.1151
2025-04-24 00:10:50,283 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:10:50,283 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:10:50,284 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:10:50,284 - INFO - Descriptor 'spectral_gap': mean = 1.7867, std = 0.4251
2025-04-24 00:10:50,284 - INFO - Descriptor 'data_mean_Src_Port': mean = 14092.2485, std = 11347.3875
2025-04-24 00:10:50,285 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43840.8632, std = 11499.9248
2025-04-24 00:10:50,285 - INFO - Descriptor 'data_mean_Protocol': mean = 6.0267, std = 0.1233
2

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:11:59,510 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:11:59,532 - INFO - Descriptor 'num_nodes': mean = 3.9300, std = 1.9963
2025-04-24 00:11:59,533 - INFO - Descriptor 'num_edges': mean = 2.1200, std = 1.2750
2025-04-24 00:11:59,533 - INFO - Descriptor 'density': mean = 0.5667, std = 0.3526
2025-04-24 00:11:59,533 - INFO - Descriptor 'average_degree': mean = 1.0524, std = 0.1119
2025-04-24 00:11:59,534 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:11:59,534 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:11:59,534 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:11:59,535 - INFO - Descriptor 'spectral_gap': mean = 1.7897, std = 0.4265
2025-04-24 00:11:59,535 - INFO - Descriptor 'data_mean_Src_Port': mean = 14560.6594, std = 12244.8932
2025-04-24 00:11:59,536 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43343.5653, std = 12332.1311
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:13:07,718 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:13:07,739 - INFO - Descriptor 'num_nodes': mean = 3.9800, std = 1.8054
2025-04-24 00:13:07,739 - INFO - Descriptor 'num_edges': mean = 2.1350, std = 1.1166
2025-04-24 00:13:07,740 - INFO - Descriptor 'density': mean = 0.5340, std = 0.3389
2025-04-24 00:13:07,740 - INFO - Descriptor 'average_degree': mean = 1.0541, std = 0.1147
2025-04-24 00:13:07,740 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:13:07,741 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:13:07,741 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:13:07,741 - INFO - Descriptor 'spectral_gap': mean = 1.7948, std = 0.4135
2025-04-24 00:13:07,742 - INFO - Descriptor 'data_mean_Src_Port': mean = 14629.5308, std = 12346.6394
2025-04-24 00:13:07,742 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43165.1532, std = 12361.2600
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:15:58,033 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:15:58,053 - INFO - Descriptor 'num_nodes': mean = 4.0400, std = 2.0021
2025-04-24 00:15:58,054 - INFO - Descriptor 'num_edges': mean = 2.1800, std = 1.2797
2025-04-24 00:15:58,055 - INFO - Descriptor 'density': mean = 0.5421, std = 0.3478
2025-04-24 00:15:58,055 - INFO - Descriptor 'average_degree': mean = 1.0523, std = 0.1090
2025-04-24 00:15:58,056 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:15:58,056 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:15:58,056 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:15:58,057 - INFO - Descriptor 'spectral_gap': mean = 1.7797, std = 0.4332
2025-04-24 00:15:58,057 - INFO - Descriptor 'data_mean_Src_Port': mean = 14738.2202, std = 12453.0398
2025-04-24 00:15:58,057 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42933.4008, std = 12497.0864
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:18:54,110 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:18:54,132 - INFO - Descriptor 'num_nodes': mean = 3.9350, std = 1.8790
2025-04-24 00:18:54,132 - INFO - Descriptor 'num_edges': mean = 2.1000, std = 1.1874
2025-04-24 00:18:54,133 - INFO - Descriptor 'density': mean = 0.5451, std = 0.3436
2025-04-24 00:18:54,133 - INFO - Descriptor 'average_degree': mean = 1.0462, std = 0.1100
2025-04-24 00:18:54,133 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:18:54,134 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:18:54,134 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:18:54,134 - INFO - Descriptor 'spectral_gap': mean = 1.8217, std = 0.3993
2025-04-24 00:18:54,135 - INFO - Descriptor 'data_mean_Src_Port': mean = 15045.3842, std = 12630.5344
2025-04-24 00:18:54,135 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42831.6351, std = 12615.3225
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:20:28,018 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:20:28,037 - INFO - Descriptor 'num_nodes': mean = 4.0050, std = 1.8802
2025-04-24 00:20:28,037 - INFO - Descriptor 'num_edges': mean = 2.1700, std = 1.2046
2025-04-24 00:20:28,038 - INFO - Descriptor 'density': mean = 0.5362, std = 0.3399
2025-04-24 00:20:28,038 - INFO - Descriptor 'average_degree': mean = 1.0598, std = 0.1177
2025-04-24 00:20:28,039 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:20:28,039 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:20:28,039 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:20:28,040 - INFO - Descriptor 'spectral_gap': mean = 1.7647, std = 0.4425
2025-04-24 00:20:28,040 - INFO - Descriptor 'data_mean_Src_Port': mean = 14563.2197, std = 11733.7351
2025-04-24 00:20:28,040 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43412.7103, std = 11906.7164
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:23:20,211 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:23:20,231 - INFO - Descriptor 'num_nodes': mean = 3.9250, std = 1.8866
2025-04-24 00:23:20,232 - INFO - Descriptor 'num_edges': mean = 2.1100, std = 1.2116
2025-04-24 00:23:20,232 - INFO - Descriptor 'density': mean = 0.5470, std = 0.3419
2025-04-24 00:23:20,232 - INFO - Descriptor 'average_degree': mean = 1.0512, std = 0.1127
2025-04-24 00:23:20,233 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:23:20,233 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:23:20,233 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:23:20,234 - INFO - Descriptor 'spectral_gap': mean = 1.8017, std = 0.4146
2025-04-24 00:23:20,234 - INFO - Descriptor 'data_mean_Src_Port': mean = 14935.1781, std = 12552.3965
2025-04-24 00:23:20,235 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43002.7221, std = 12599.2448
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:25:43,001 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:25:43,021 - INFO - Descriptor 'num_nodes': mean = 3.9250, std = 1.9103
2025-04-24 00:25:43,022 - INFO - Descriptor 'num_edges': mean = 2.0900, std = 1.2008
2025-04-24 00:25:43,022 - INFO - Descriptor 'density': mean = 0.5483, std = 0.3455
2025-04-24 00:25:43,023 - INFO - Descriptor 'average_degree': mean = 1.0426, std = 0.1013
2025-04-24 00:25:43,023 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:25:43,023 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:25:43,024 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:25:43,024 - INFO - Descriptor 'spectral_gap': mean = 1.8197, std = 0.4046
2025-04-24 00:25:43,024 - INFO - Descriptor 'data_mean_Src_Port': mean = 14630.8491, std = 12373.7420
2025-04-24 00:25:43,025 - INFO - Descriptor 'data_mean_Dst_Port': mean = 43348.9928, std = 12502.9697
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:28:07,421 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:28:07,440 - INFO - Descriptor 'num_nodes': mean = 4.0100, std = 1.9313
2025-04-24 00:28:07,441 - INFO - Descriptor 'num_edges': mean = 2.1700, std = 1.2414
2025-04-24 00:28:07,441 - INFO - Descriptor 'density': mean = 0.5416, std = 0.3451
2025-04-24 00:28:07,441 - INFO - Descriptor 'average_degree': mean = 1.0565, std = 0.1119
2025-04-24 00:28:07,442 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:28:07,442 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:28:07,442 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:28:07,443 - INFO - Descriptor 'spectral_gap': mean = 1.7647, std = 0.4425
2025-04-24 00:28:07,443 - INFO - Descriptor 'data_mean_Src_Port': mean = 14897.1957, std = 12664.3644
2025-04-24 00:28:07,443 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42923.0438, std = 12718.4806
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:29:04,385 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:29:04,405 - INFO - Descriptor 'num_nodes': mean = 4.0900, std = 1.8926
2025-04-24 00:29:04,406 - INFO - Descriptor 'num_edges': mean = 2.2150, std = 1.2202
2025-04-24 00:29:04,406 - INFO - Descriptor 'density': mean = 0.5171, std = 0.3333
2025-04-24 00:29:04,407 - INFO - Descriptor 'average_degree': mean = 1.0608, std = 0.1198
2025-04-24 00:29:04,407 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:29:04,407 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:29:04,408 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:29:04,408 - INFO - Descriptor 'spectral_gap': mean = 1.7667, std = 0.4379
2025-04-24 00:29:04,409 - INFO - Descriptor 'data_mean_Src_Port': mean = 15097.7416, std = 12816.5936
2025-04-24 00:29:04,409 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42715.8119, std = 12844.3171
2025-04-24

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:30:57,193 - INFO - Descriptor 'num_nodes': mean = 3.9800, std = 1.8438
2025-04-24 00:30:57,194 - INFO - Descriptor 'num_edges': mean = 2.1400, std = 1.1360
2025-04-24 00:30:57,195 - INFO - Descriptor 'density': mean = 0.5379, std = 0.3399
2025-04-24 00:30:57,195 - INFO - Descriptor 'average_degree': mean = 1.0561, std = 0.1139
2025-04-24 00:30:57,195 - INFO - Descriptor 'average_clustering': mean = 0.0015, std = 0.0212
2025-04-24 00:30:57,196 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:30:57,196 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:30:57,196 - INFO - Descriptor 'spectral_gap': mean = 1.7798, std = 0.4238
2025-04-24 00:30:57,197 - INFO - Descriptor 'data_mean_Src_Port': mean = 15383.4275, std = 12829.2887
2025-04-24 00:30:57,197 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42495.8692, std = 12919.6601
2025-04-24 00:30:57,197 - INFO - Descriptor 'data_mean_Protocol': mean = 6.0401, std = 0.1561
2

Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:31:47,426 - INFO - Partitioning done: train=200, test=2000, valid=1400
2025-04-24 00:31:47,448 - INFO - Descriptor 'num_nodes': mean = 3.8750, std = 1.7491
2025-04-24 00:31:47,448 - INFO - Descriptor 'num_edges': mean = 2.0650, std = 1.0587
2025-04-24 00:31:47,449 - INFO - Descriptor 'density': mean = 0.5479, std = 0.3411
2025-04-24 00:31:47,449 - INFO - Descriptor 'average_degree': mean = 1.0481, std = 0.1052
2025-04-24 00:31:47,449 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:31:47,450 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:31:47,450 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:31:47,451 - INFO - Descriptor 'spectral_gap': mean = 1.8048, std = 0.4062
2025-04-24 00:31:47,451 - INFO - Descriptor 'data_mean_Src_Port': mean = 15393.0513, std = 12859.1185
2025-04-24 00:31:47,451 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42572.1331, std = 12935.9329
2025-04-24

=== Resultados Optuna ===
Best M​CC:     0.961937531746931
Best params:  {'h_dim': 32, 'z_dim': 16, 'n_layers': 3, 'dropout': 0.2770713002465045, 'act_fn': 'relu', 'lr': 0.0037643083521274566, 'beta_KL': 0.04535094634779837, 'gamma_CE': 0.015043838978395768, 'lam_rec': 0.43729139171842235, 'lam_lat': 0.31045483654546724, 'eta_fuse': 0.028478291139971468, 'k_rule': 3, 'tau': 0.9168184602315099}
Mejores params: {'h_dim': 32, 'z_dim': 16, 'n_layers': 3, 'dropout': 0.2770713002465045, 'act_fn': 'relu', 'lr': 0.0037643083521274566, 'beta_KL': 0.04535094634779837, 'gamma_CE': 0.015043838978395768, 'lam_rec': 0.43729139171842235, 'lam_lat': 0.31045483654546724, 'eta_fuse': 0.028478291139971468, 'k_rule': 3, 'tau': 0.9168184602315099}


2025-04-24 00:35:18,756 - INFO - Total loaded: 4304
2025-04-24 00:35:18,759 - INFO - Training set: 200 normals
2025-04-24 00:35:18,762 - INFO - Test set: 1500 graphs (68 normals)
2025-04-24 00:35:18,763 - INFO - Validation set: 1500 graphs (30 normals)
2025-04-24 00:35:22,084 - INFO - Partitioning done: train=200, test=1500, valid=1500


Graphs loaded from \grafos\grafos_mst.pkl


2025-04-24 00:35:22,108 - INFO - Descriptor 'num_nodes': mean = 4.2150, std = 2.0122
2025-04-24 00:35:22,109 - INFO - Descriptor 'num_edges': mean = 2.2700, std = 1.2717
2025-04-24 00:35:22,110 - INFO - Descriptor 'density': mean = 0.5122, std = 0.3434
2025-04-24 00:35:22,110 - INFO - Descriptor 'average_degree': mean = 1.0531, std = 0.1108
2025-04-24 00:35:22,110 - INFO - Descriptor 'average_clustering': mean = 0.0000, std = 0.0000
2025-04-24 00:35:22,111 - INFO - Descriptor 'diameter': mean = nan, std = nan
2025-04-24 00:35:22,111 - INFO - Descriptor 'average_shortest_path_length': mean = nan, std = nan
2025-04-24 00:35:22,111 - INFO - Descriptor 'spectral_gap': mean = 1.7797, std = 0.4332
2025-04-24 00:35:22,112 - INFO - Descriptor 'data_mean_Src_Port': mean = 14830.1290, std = 12364.5429
2025-04-24 00:35:22,112 - INFO - Descriptor 'data_mean_Dst_Port': mean = 42925.0088, std = 12381.6448
2025-04-24 00:35:22,112 - INFO - Descriptor 'data_mean_Protocol': mean = 6.0313, std = 0.1232
2

>>> TEST MCC: 0.9661389485159884

Classification Report:
               precision    recall  f1-score   support

      Normal     1.0000    0.9367    0.9673        79
     Anomaly     0.9965    1.0000    0.9982      1421

    accuracy                         0.9967      1500
   macro avg     0.9982    0.9684    0.9828      1500
weighted avg     0.9967    0.9967    0.9966      1500


Confusion Matrix:
 [[  74    5]
 [   0 1421]]
