In [29]:
CORE_FEATURES = [
    # Flow / packet dynamics
    "disp_pakt",
    "disp_byte",
    "mean_pkt",
    "mean_byte",

    # Flow behavior
    "avg_durat",
    "avg_flow_dst",

    # Rate-based features (VERY important)
    "rate_pkt_in",
    "disp_interval",

    # Network / switch context
    "switch_id",

    # Label (always kept)
    "label"
]

OPTIONAL_FEATURES = [
    "flow_count",  #controller workload
    "unique_src_ip",  #spoofing & distribution
    "unique_dst_ip",  #scanning & amplification
    "packet_in_count"  #direct controller stress
]


In [30]:
import pandas as pd


def prepare_dataframe(df, core_features, optional_features):
    # 1️⃣ Check core features
    missing_core = [f for f in core_features if f not in df.columns]
    if missing_core:
        raise ValueError(f"Missing CORE features: {missing_core}")

    # 2️⃣ Add missing optional features as zeros
    for f in optional_features:
        if f not in df.columns:
            df[f] = 0

    # 3️⃣ Keep only core + optional
    final_features = core_features + optional_features
    df = df[final_features]

    return df


def merge_two_csvs(
    csv1,
    csv2,
    output_csv,
    core_features=CORE_FEATURES,
    optional_features=OPTIONAL_FEATURES
):
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)

    df1 = prepare_dataframe(df1, core_features, optional_features)
    df2 = prepare_dataframe(df2, core_features, optional_features)

    merged = pd.concat([df1, df2], ignore_index=True)

    merged.to_csv(output_csv, index=False)

    print(f"Merged {csv1} + {csv2}")
    print(f"Saved → {output_csv}")
    print("\nLabel distribution:")
    print(merged["label"].value_counts())


merge_two_csvs(
    "SYN_FLOOD_ATK.csv",   # first file
    "UDP_FLOOD_atk.csv",   # second file
    "SDN_ATK.csv"          # master dataset
)


Merged SYN_FLOOD_ATK.csv + UDP_FLOOD_atk.csv
Saved → SDN_ATK.csv

Label distribution:
label
0    32955
1    12170
2     8841
Name: count, dtype: int64


In [19]:
#Label Changer
import pandas as pd

def change_label(
    input_csv,
    output_csv,
    old_label,
    new_label,
    label_column="label"
):
    df = pd.read_csv(input_csv)

    if label_column not in df.columns:
        raise ValueError(f"'{label_column}' column not found")

    print(f"Before change:\n{df[label_column].value_counts()}\n")

    df[label_column] = df[label_column].replace(old_label, new_label)

    print(f"After change:\n{df[label_column].value_counts()}\n")

    df.to_csv(output_csv, index=False)
    print(f"Saved updated file → {output_csv}")
    
# Convert binary ICMP dataset:
# 1 (attack) → 2 (ICMP_FLOOD)
#change file name and see which label to change to what ist
change_label(
    input_csv="UDP_FLOOD_ATK.csv",
    output_csv="UDP_FLOOD_atk.csv",
    old_label=1,
    new_label=2
)


Before change:
label
0    18352
1     8841
Name: count, dtype: int64

After change:
label
0    18352
2     8841
Name: count, dtype: int64

Saved updated file → UDP_FLOOD_atk.csv


In [27]:
#label and create and atk id for the end of data set

#use only after u have created the final dataset of 5 attacks
import pandas as pd

# =========================
# EDIT THIS MAPPING ONLY
LABEL_TO_ATTACK_ID = {
    0: "normal",  # Normal
    1: "SYN flood",  # SYN Flood
    2: "UDP flood",  # UDP Flood
    3: "ICMP flood",  # ICMP Flood (add later if you want)
}

# =========================
# SIMPLE FUNCTION
# =========================
def add_attack_id_column(
    input_csv,
    output_csv,
    label_column="label",
    attack_id_column="attack_id"
):
    df = pd.read_csv(input_csv)

    # Map labels to attack IDs
    df[attack_id_column] = df[label_column].map(LABEL_TO_ATTACK_ID)

    df.to_csv(output_csv, index=False)

    print("Done.")
    print("Saved file:", output_csv)
    print("\nattack_id distribution:")
    print(df[attack_id_column].value_counts())


add_attack_id_column(
    input_csv="dataset_sdn.csv",
    output_csv="Master_Dataset_SDN.csv"#remember to keep it no changes here
)



Done.
Saved file: MASTER_DATASET_WITH_ATTACK_ID.csv

attack_id distribution:
attack_id
normal       63561
SYN flood    40784
Name: count, dtype: int64
