In [9]:
# ADIM 0: Kurulum
import os, glob, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from collections import Counter

from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline

from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

DATA_DIR = "/kaggle/input/cicddos2019"
TARGET   = "__label__"

print("ADIM 0 hazÄ±r âœ”")


ADIM 0 hazÄ±r âœ”


In [10]:
# ADIM 1: CIC-DDoS verisini yÃ¼kleme + sÃ¼tun temizliÄŸi

def infer_label(fname):
    # Syn-training.parquet -> Syn
    return os.path.basename(fname).split("-")[0]

def infer_split(fname):
    b = os.path.basename(fname).lower()
    if "train" in b:
        return "train"
    elif "test" in b:
        return "test"
    else:
        return "unknown"

frames = []
for f in sorted(glob.glob(os.path.join(DATA_DIR, "*.parquet"))):
    df = pd.read_parquet(f)
    df["__split__"] = infer_split(f)
    df[TARGET]      = infer_label(f)
    frames.append(df)

raw = pd.concat(frames, ignore_index=True)
raw = raw.replace([np.inf, -np.inf], np.nan)

META     = [TARGET, "__split__"]
features = [c for c in raw.columns if c not in META]
num_cols = [c for c in features if pd.api.types.is_numeric_dtype(raw[c])]

# Sadece sayÄ±sal Ã¶zellikleri tut
df = raw.drop(columns=[c for c in features if c not in num_cols]).copy()

# Sabit sÃ¼tunlarÄ± at
nunique = df[num_cols].nunique(dropna=False)
const_cols = nunique[nunique <= 1].index.tolist()
if const_cols:
    df.drop(columns=const_cols, inplace=True)

# Train/Test ayÄ±r
train_df = df[df["__split__"] == "train"].copy()
test_df  = df[df["__split__"] == "test"].copy()

X_train = train_df.drop(columns=META)
y_train = train_df[TARGET].astype(str)

X_test  = test_df.drop(columns=META)
y_test  = test_df[TARGET].astype(str)

print("YÃ¼klendi âœ“")
print("Train:", X_train.shape, "| Test:", X_test.shape)
print("Train sÄ±nÄ±flarÄ±:", sorted(y_train.unique()))
print("Test  sÄ±nÄ±flarÄ±:", sorted(y_test.unique()))


YÃ¼klendi âœ“
Train: (125170, 65) | Test: (306201, 65)
Train sÄ±nÄ±flarÄ±: ['LDAP', 'MSSQL', 'NetBIOS', 'Portmap', 'Syn', 'UDP', 'UDPLag']
Test  sÄ±nÄ±flarÄ±: ['DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'SNMP', 'Syn', 'TFTP', 'UDP', 'UDPLag']


In [11]:
# ADIM 2: Closed-set (ortak sÄ±nÄ±flar) + manuel undersampling

common = sorted(set(y_train.unique()).intersection(set(y_test.unique())))
train_mask = y_train.isin(common)
test_mask  = y_test.isin(common)

X_tr = X_train[train_mask].copy()
y_tr = y_train[train_mask].copy()

X_te = X_test[test_mask].copy()
y_te = y_test[test_mask].copy()

print("Ortak sÄ±nÄ±flar:", common)
print("Yeni Train/Test:", X_tr.shape, X_te.shape)

# Manuel undersampling
tmp = X_tr.copy()
tmp["__y__"] = y_tr.values
min_count = tmp["__y__"].value_counts().min()

balanced = tmp.groupby("__y__", group_keys=False).apply(
    lambda x: x.sample(min_count, random_state=RANDOM_STATE)
)

y_tr_bal = balanced["__y__"].astype(str)
X_tr_bal = balanced.drop(columns="__y__")

print("\nDengeleme Ã¶nce:", Counter(y_tr))
print("Dengeleme sonra:", Counter(y_tr_bal))
print("Yeni eÄŸitim boyutu:", X_tr_bal.shape)


Ortak sÄ±nÄ±flar: ['LDAP', 'MSSQL', 'NetBIOS', 'Syn', 'UDP', 'UDPLag']
Yeni Train/Test: (120065, 65) (38973, 65)

Dengeleme Ã¶nce: Counter({'Syn': 70336, 'UDP': 17770, 'UDPLag': 12639, 'MSSQL': 10974, 'LDAP': 6715, 'NetBIOS': 1631})
Dengeleme sonra: Counter({'LDAP': 1631, 'MSSQL': 1631, 'NetBIOS': 1631, 'Syn': 1631, 'UDP': 1631, 'UDPLag': 1631})
Yeni eÄŸitim boyutu: (9786, 65)


In [12]:
# ADIM 3: LabelEncoder (tÃ¼m optimizasyonlarda bunu kullanacaÄŸÄ±z)

label_enc = LabelEncoder()
y_tr_bal_enc = label_enc.fit_transform(y_tr_bal)
y_te_enc     = label_enc.transform(y_te)

print("SÄ±nÄ±f sayÄ±sÄ±:", len(label_enc.classes_))
print("SÄ±nÄ±flar:", list(label_enc.classes_))


SÄ±nÄ±f sayÄ±sÄ±: 6
SÄ±nÄ±flar: ['LDAP', 'MSSQL', 'NetBIOS', 'Syn', 'UDP', 'UDPLag']


In [13]:
# ADIM 4: Baz modellerin karÅŸÄ±laÅŸtÄ±rÄ±lmasÄ± (CV: 3-fold)

from sklearn.compose import ColumnTransformer

numeric_features = X_tr_bal.columns.tolist()
preprocess = ColumnTransformer(
    [("num", StandardScaler(), numeric_features)],
    remainder="drop"
)

models = {
    "NaiveBayes": GaussianNB(),
    "LogReg": LogisticRegression(max_iter=1000, n_jobs=-1),
    "DecisionTree": DecisionTreeClassifier(random_state=RANDOM_STATE),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE, n_jobs=-1),
    "KNN": KNeighborsClassifier(n_neighbors=5, n_jobs=-1),
    "GradBoost": GradientBoostingClassifier(random_state=RANDOM_STATE),
    "AdaBoost": AdaBoostClassifier(random_state=RANDOM_STATE),
    "LightGBM": LGBMClassifier(random_state=RANDOM_STATE, n_estimators=200, n_jobs=-1),
    "XGBoost": XGBClassifier(
        random_state=RANDOM_STATE,
        n_estimators=200,
        n_jobs=-1,
        tree_method="hist",
        objective="multi:softmax",
        num_class=len(np.unique(y_tr_bal_enc)),
        eval_metric="mlogloss",
        verbosity=0,
    ),
}

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)

print("âœ… Baz modeller (3-fold CV, balanced data):")
baseline_results = []

for name, clf in models.items():
    pipe = Pipeline([
        ("prep", preprocess),
        ("clf", clf)
    ])
    acc = cross_val_score(pipe, X_tr_bal, y_tr_bal_enc, cv=cv, scoring="accuracy").mean()
    f1m = cross_val_score(pipe, X_tr_bal, y_tr_bal_enc, cv=cv, scoring="f1_macro").mean()
    baseline_results.append((name, acc, f1m))
    print(f"{name:10s} | Acc={acc:.4f} | F1-macro={f1m:.4f}")

baseline_results = sorted(baseline_results, key=lambda x: x[1], reverse=True)


âœ… Baz modeller (3-fold CV, balanced data):
NaiveBayes | Acc=0.4123 | F1-macro=0.3188
LogReg     | Acc=0.5855 | F1-macro=0.5722
DecisionTree | Acc=0.6087 | F1-macro=0.6082
RandomForest | Acc=0.6540 | F1-macro=0.6481
KNN        | Acc=0.6148 | F1-macro=0.6094
GradBoost  | Acc=0.6661 | F1-macro=0.6558
AdaBoost   | Acc=0.5029 | F1-macro=0.4603
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002792 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12351
[LightGBM] [Info] Number of data points in the train set: 6524, number of used features: 64
[LightGBM] [Info] Start training from score -1.792066
[LightGBM] [Info] Start training from score -1.792066
[LightGBM] [Info] Start training from score -1.791147
[LightGBM] [Info] Start training from score -1.792066
[LightGBM] [Info] Start training from score -1.792066
[LightGBM] [Info] Start training from score -1.791147
[LightGBM] [Info] Auto-choosing col-wise mul

In [14]:
# ADIM 5: Mutual Information ile Ã¶zellik Ã¶nem sÄ±ralamasÄ±

from sklearn.metrics import mutual_info_score

def mi_scores(X, y_enc, q=10):
    scores = {}
    for col in X.columns:
        ranks = X[col].rank(method="first")
        try:
            bins = pd.qcut(ranks, q=q, duplicates="drop")
            enc = pd.factorize(bins)[0]
            scores[col] = mutual_info_score(enc, y_enc)
        except Exception:
            scores[col] = 0.0
    return pd.Series(scores).sort_values(ascending=False)

mi_series = mi_scores(X_tr_bal, y_tr_bal_enc)

print("En yÃ¼ksek bilgiye sahip 10 Ã¶zellik:")
print(mi_series.head(10))


En yÃ¼ksek bilgiye sahip 10 Ã¶zellik:
SYN Flag Count           1.524883
CWE Flag Count           1.405789
RST Flag Count           1.385206
Fwd PSH Flags            1.385206
Bwd Packet Length Std    1.377581
ACK Flag Count           1.339930
Active Min               1.300542
Protocol                 1.294937
Active Std               1.292313
Idle Std                 1.291538
dtype: float64


In [15]:
# ADIM 6 (GÃœNCEL): Sessiz MBO + LightGBM (log spam yok)

import contextlib, io
from sklearn.utils import check_random_state

rng = check_random_state(RANDOM_STATE)

# --- LightGBM + k (Ã¶zellik sayÄ±sÄ±) arama alanÄ± ---
bounds_lgbm = {
    "learning_rate": (0.02, 0.2),
    "num_leaves": (31, 255),
    "max_depth": (6, 32),
    "subsample": (0.6, 1.0),
    "colsample_bytree": (0.6, 1.0),
    "min_child_samples": (10, 100),
    "reg_alpha": (0.0, 1.0),
    "reg_lambda": (0.0, 2.0),
    "n_estimators": (200, 800),
    "k_feats": (20, 60),
}
int_keys_lgbm = ["num_leaves", "max_depth",
                 "min_child_samples", "n_estimators", "k_feats"]


def clamp_lgbm(p):
    out = {}
    for k, (lo, hi) in bounds_lgbm.items():
        v = max(lo, min(hi, p[k]))
        if k in int_keys_lgbm:
            v = int(round(v))
        out[k] = v
    return out


def sample_lgbm():
    return clamp_lgbm({k: rng.uniform(lo, hi) for k, (lo, hi) in bounds_lgbm.items()})


cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)

def fitness_lgbm(p):
    """0.5*Acc + 0.5*Macro-F1 fitness, LightGBM Ã§Ä±ktÄ±larÄ±nÄ± sessize alÄ±r."""
    p = clamp_lgbm(p)
    k = p["k_feats"]
    cols = mi_series.head(k).index          # ADIM 5'te hesaplanan MI sÄ±rasÄ±
    X = X_tr_bal[cols].values
    y = y_tr_bal_enc

    accs, f1s = [], []

    for tr_idx, va_idx in cv.split(X, y):
        Xtr, Xva = X[tr_idx], X[va_idx]
        ytr, yva = y[tr_idx], y[va_idx]

        model = LGBMClassifier(
            random_state=RANDOM_STATE,
            n_jobs=1,
            # lightgbm loglarÄ±nÄ± kapat
            verbosity=-1
        )

        # TÃ¼m LightGBM Ã§Ä±ktÄ±sÄ±nÄ± yut (stdout + stderr)
        buf_out, buf_err = io.StringIO(), io.StringIO()
        with contextlib.redirect_stdout(buf_out), contextlib.redirect_stderr(buf_err):
            model.set_params(**{kk: vv for kk, vv in p.items() if kk != "k_feats"})
            model.fit(Xtr, ytr)

        pred = model.predict(Xva)
        accs.append(accuracy_score(yva, pred))
        f1s.append(f1_score(yva, pred, average="macro"))

    acc = float(np.mean(accs))
    f1m = float(np.mean(f1s))
    score = 0.5 * acc + 0.5 * f1m
    return score, acc, f1m, cols


# --- MBO ayarlarÄ± ---
pop_size = 10
gens     = 6

population = [sample_lgbm() for _ in range(pop_size)]
best_s, best_p, best_cols = -1, None, None
best_acc = best_f1 = 0

print("MBO + LightGBM (sessiz) baÅŸlatÄ±lÄ±yor...\n")

# BaÅŸlangÄ±Ã§ populasyonu
for p in population:
    s, a, f, c = fitness_lgbm(p)
    if s > best_s:
        best_s, best_p, best_cols = s, p, c
        best_acc, best_f1 = a, f

print(f"BaÅŸlangÄ±Ã§ | Score={best_s:.4f} (Acc={best_acc:.4f}, F1={best_f1:.4f}), k={len(best_cols)}")

# --- Ana MBO dÃ¶ngÃ¼sÃ¼ ---
for g in range(gens):
    new_pop = []
    # Mutasyon
    for p in population:
        q = p.copy()
        for k, (lo, hi) in bounds_lgbm.items():
            if rng.rand() < 0.15:
                q[k] += rng.uniform(-0.1, 0.1) * (hi - lo)
        new_pop.append(clamp_lgbm(q))

    # En iyi Ã§Ã¶zÃ¼me Ã§ekilme
    for i in range(pop_size):
        for k, (lo, hi) in bounds_lgbm.items():
            delta = best_p[k] - new_pop[i][k]
            new_pop[i][k] += 0.3 * delta + rng.uniform(-0.05, 0.05)
        new_pop[i] = clamp_lgbm(new_pop[i])

    # Yeni populasyonu deÄŸerlendir
    for p in new_pop:
        s, a, f, c = fitness_lgbm(p)
        if s > best_s:
            best_s, best_p, best_cols = s, p, c
            best_acc, best_f1 = a, f

    population = new_pop
    print(f"Nesil {g+1}/{gens} | Score={best_s:.4f} (Acc={best_acc:.4f}, F1={best_f1:.4f}), k={len(best_cols)}")

print("\nâœ… MBO + LightGBM (sessiz) bitti.")
print("En iyi parametreler:")
for k, v in best_p.items():
    print(f" - {k}: {v}")


MBO + LightGBM (sessiz) baÅŸlatÄ±lÄ±yor...

BaÅŸlangÄ±Ã§ | Score=0.6499 (Acc=0.6523, F1=0.6475), k=59
Nesil 1/6 | Score=0.6499 (Acc=0.6523, F1=0.6475), k=59
Nesil 2/6 | Score=0.6557 (Acc=0.6595, F1=0.6520), k=39
Nesil 3/6 | Score=0.6577 (Acc=0.6609, F1=0.6545), k=52
Nesil 4/6 | Score=0.6670 (Acc=0.6708, F1=0.6632), k=50
Nesil 5/6 | Score=0.6696 (Acc=0.6734, F1=0.6657), k=46
Nesil 6/6 | Score=0.6701 (Acc=0.6740, F1=0.6662), k=45

âœ… MBO + LightGBM (sessiz) bitti.
En iyi parametreler:
 - learning_rate: 0.02
 - num_leaves: 126
 - max_depth: 26
 - subsample: 0.8137200054769711
 - colsample_bytree: 0.754106945578895
 - min_child_samples: 58
 - reg_alpha: 0.2798538694889267
 - reg_lambda: 1.5044153110420453
 - n_estimators: 241
 - k_feats: 45


In [16]:
# ADIM 7: Test setinde MBO + LightGBM performansÄ±

X_tr_final = X_tr_bal[best_cols].values
X_te_final = X_te[best_cols].values

final_lgbm = LGBMClassifier(
    random_state=RANDOM_STATE,
    n_jobs=-1,
    **{kk:vv for kk,vv in best_p.items() if kk!="k_feats"}
).fit(X_tr_final, y_tr_bal_enc)

y_pred_enc = final_lgbm.predict(X_te_final)
y_pred = label_enc.inverse_transform(y_pred_enc)

acc = accuracy_score(y_te, y_pred)
f1m = f1_score(y_te, y_pred, average="macro")

print("ðŸ“Š TEST (MBO + LightGBM)")
print("Accuracy =", acc)
print("Macro-F1 =", f1m)
print("\nSÄ±nÄ±f bazlÄ± rapor:")
print(classification_report(y_te, y_pred, digits=4))


ðŸ“Š TEST (MBO + LightGBM)
Accuracy = 0.5245426320786185
Macro-F1 = 0.45739061682359705

SÄ±nÄ±f bazlÄ± rapor:
              precision    recall  f1-score   support

        LDAP     0.3865    0.6401    0.4820      2831
       MSSQL     0.8831    0.7515    0.8120      8083
     NetBIOS     0.3199    0.5366    0.4009      2225
         Syn     0.0450    0.4454    0.0817       907
         UDP     0.8000    0.7665    0.7829     12462
      UDPLag     0.5115    0.1129    0.1849     12465

    accuracy                         0.5245     38973
   macro avg     0.4910    0.5422    0.4574     38973
weighted avg     0.6499    0.5245    0.5377     38973



In [17]:
# ADIM 8: FAST MBO + XGBoost (aynÄ± MI, aynÄ± balanced veri)

print("\nâš¡ FAST MBO + XGBoost baÅŸlÄ±yor...\n")

rng = np.random.RandomState(RANDOM_STATE)
cv_fast = StratifiedKFold(n_splits=2, shuffle=True, random_state=RANDOM_STATE)

bounds_xgb = {
    "learning_rate": (0.01, 0.3),
    "max_depth": (3, 12),
    "min_child_weight": (1, 8),
    "subsample": (0.6, 1.0),
    "colsample_bytree": (0.6, 1.0),
    "gamma": (0, 3),
    "reg_lambda": (0.3, 2),
    "n_estimators": (200, 400),
    "k_feats": (15, 40)
}
int_keys_xgb = ["max_depth","min_child_weight","n_estimators","k_feats"]

def clamp_xgb(p):
    out = {}
    for k,(lo,hi) in bounds_xgb.items():
        v = max(lo, min(hi, p[k]))
        if k in int_keys_xgb:
            v = int(round(v))
        out[k] = v
    return out

def sample_xgb():
    return clamp_xgb({k: rng.uniform(lo,hi) for k,(lo,hi) in bounds_xgb.items()})

def fitness_xgb(p):
    p = clamp_xgb(p)
    k = p["k_feats"]
    cols = mi_series.head(k).index
    X = X_tr_bal[cols].values
    y = y_tr_bal_enc

    accs, f1s = [], []
    for tr_idx, va_idx in cv_fast.split(X, y):
        Xtr, Xva = X[tr_idx], X[va_idx]
        ytr, yva = y[tr_idx], y[va_idx]

        model = XGBClassifier(
            random_state=RANDOM_STATE,
            n_jobs=1,
            tree_method="approx",
            eval_metric="mlogloss",
            objective="multi:softmax",
            num_class=len(np.unique(y)),
            **{kk:vv for kk,vv in p.items() if kk!="k_feats"}
        )
        model.fit(Xtr, ytr)
        pred = model.predict(Xva)
        accs.append(accuracy_score(yva, pred))
        f1s.append(f1_score(yva, pred, average="macro"))

    acc = float(np.mean(accs))
    f1m = float(np.mean(f1s))
    score = 0.5*acc + 0.5*f1m
    return score, acc, f1m, cols

pop_size = 8
gens     = 5
population = [sample_xgb() for _ in range(pop_size)]
best_s2, best_p2, best_cols2 = -1, None, None
best_acc2 = best_f12 = 0

# baÅŸlangÄ±Ã§
for p in population:
    s,a,f,c = fitness_xgb(p)
    if s > best_s2:
        best_s2, best_p2, best_cols2 = s,p,c
        best_acc2, best_f12 = a,f

print(f"BaÅŸlangÄ±Ã§ | Score={best_s2:.4f} (Acc={best_acc2:.4f}, F1={best_f12:.4f}), k={len(best_cols2)}")

for g in range(gens):
    new_pop = []
    for p in population:
        q = p.copy()
        for k,(lo,hi) in bounds_xgb.items():
            if rng.rand() < 0.15:
                q[k] += rng.uniform(-0.1,0.1)*(hi-lo)
        new_pop.append(clamp_xgb(q))

    for i in range(pop_size):
        for k,(lo,hi) in bounds_xgb.items():
            delta = best_p2[k] - new_pop[i][k]
            new_pop[i][k] += 0.3*delta + rng.uniform(-0.05,0.05)
        new_pop[i] = clamp_xgb(new_pop[i])

    for p in new_pop:
        s,a,f,c = fitness_xgb(p)
        if s > best_s2:
            best_s2, best_p2, best_cols2 = s,p,c
            best_acc2, best_f12 = a,f

    population = new_pop
    print(f"Nesil {g+1}/{gens} | Score={best_s2:.4f} (Acc={best_acc2:.4f}, F1={best_f12:.4f}), k={len(best_cols2)}")

print("\nâœ… FAST MBO + XGBoost bitti.")
print("En iyi XGBoost parametreleri:")
for k,v in best_p2.items():
    print(f" - {k}: {v}")



âš¡ FAST MBO + XGBoost baÅŸlÄ±yor...

BaÅŸlangÄ±Ã§ | Score=0.6425 (Acc=0.6510, F1=0.6340), k=37
Nesil 1/5 | Score=0.6456 (Acc=0.6508, F1=0.6403), k=37
Nesil 2/5 | Score=0.6465 (Acc=0.6516, F1=0.6413), k=37
Nesil 3/5 | Score=0.6472 (Acc=0.6549, F1=0.6394), k=37
Nesil 4/5 | Score=0.6551 (Acc=0.6609, F1=0.6492), k=38
Nesil 5/5 | Score=0.6551 (Acc=0.6609, F1=0.6492), k=38

âœ… FAST MBO + XGBoost bitti.
En iyi XGBoost parametreleri:
 - learning_rate: 0.2074357360564174
 - max_depth: 9
 - min_child_weight: 3
 - subsample: 0.6595140100584167
 - colsample_bytree: 0.644422993929691
 - gamma: 1.7282582942036426
 - reg_lambda: 1.2995772327613526
 - n_estimators: 298
 - k_feats: 38


In [18]:
# ADIM 9: Test setinde FAST MBO + XGBoost performansÄ±

X_tr_final2 = X_tr_bal[best_cols2].values
X_te_final2 = X_te[best_cols2].values

final_xgb = XGBClassifier(
    random_state=RANDOM_STATE,
    n_jobs=-1,
    tree_method="approx",
    eval_metric="mlogloss",
    objective="multi:softmax",
    num_class=len(np.unique(y_tr_bal_enc)),
    **{kk:vv for kk,vv in best_p2.items() if kk!="k_feats"}
).fit(X_tr_final2, y_tr_bal_enc)

y_pred_enc2 = final_xgb.predict(X_te_final2)
y_pred2 = label_enc.inverse_transform(y_pred_enc2)

acc2 = accuracy_score(y_te, y_pred2)
f1m2 = f1_score(y_te, y_pred2, average="macro")

print("ðŸ“Š TEST (FAST MBO + XGBoost)")
print("Accuracy =", acc2)
print("Macro-F1 =", f1m2)
print("\nSÄ±nÄ±f bazlÄ± rapor:")
print(classification_report(y_te, y_pred2, digits=4))


ðŸ“Š TEST (FAST MBO + XGBoost)
Accuracy = 0.5243886793421086
Macro-F1 = 0.45354286789269826

SÄ±nÄ±f bazlÄ± rapor:
              precision    recall  f1-score   support

        LDAP     0.3875    0.6623    0.4889      2831
       MSSQL     0.8865    0.7537    0.8147      8083
     NetBIOS     0.2939    0.5303    0.3782      2225
         Syn     0.0370    0.3616    0.0672       907
         UDP     0.8138    0.7705    0.7916     12462
      UDPLag     0.5257    0.1091    0.1807     12465

    accuracy                         0.5244     38973
   macro avg     0.4907    0.5313    0.4535     38973
weighted avg     0.6580    0.5244    0.5385     38973

