In [None]:
import pandas as pd
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)

from Models.UNSW_NB15_config import UNSW_NB15_Config

SOURCE_IP_COL_NAME = UNSW_NB15_Config.SOURCE_NODE
DESTINATION_IP_COL_NAME = UNSW_NB15_Config.DESTINATION_NODE
SOURCE_PORT_COL_NAME = UNSW_NB15_Config.SOURCE_PORT_COL_NAME
DESTINATION_PORT_COL_NAME = UNSW_NB15_Config.DESTINATION_PORT_COL_NAME

LABEL_COL = UNSW_NB15_Config.ATTACK_CLASS_COL_NAME
IS_ATTACK_COL = UNSW_NB15_Config.IS_ATTACK_COL_NAME

TIME_COL_NAMES = UNSW_NB15_Config.TIME_COL_NAMES

# Load the dataset into a DataFrame
df = pd.read_csv(os.path.join(project_root, 'Datasets/UNSW_NB15/All/all_raw.csv'))

# Display the first few rows of the DataFrame
print(df.head())

FileNotFoundError: [Errno 2] No such file or directory: '/vol/bitbucket/shc20/FYP/GNN-Adversarial-Attack/Datasets/UNSW_NB15/All/all_raw_downsampled.csv'

In [None]:
# # Combine Port and IP
df[SOURCE_PORT_COL_NAME] = df[SOURCE_PORT_COL_NAME].apply(str)
df[DESTINATION_PORT_COL_NAME] = df[DESTINATION_PORT_COL_NAME].apply(str)

In [None]:
from pyvis.network import Network

def visualize_graph(df, attack_name, batch_size=50, name_prefix="", combine_ports=True):

    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i+batch_size]

        # Skip the batch if the attack_name is not in the 'Label' column
        if attack_name not in batch[LABEL_COL].values:
            print(f"Skipping batch {batch[LABEL_COL].values} as it does not contain the attack category '{attack_name}'")
            continue
        
        net = Network(notebook=True)
        
        # Add nodes and edges for each row in the batch
        for _, row in batch.iterrows():
            if combine_ports:
                row[SOURCE_IP_COL_NAME] = row[SOURCE_IP_COL_NAME] + ':' + row[SOURCE_PORT_COL_NAME]
                row[DESTINATION_IP_COL_NAME] = row[DESTINATION_IP_COL_NAME] + ':' + row[DESTINATION_PORT_COL_NAME]
            src = row[SOURCE_IP_COL_NAME]
            dst = row[DESTINATION_IP_COL_NAME]
            edge_label = row[LABEL_COL]
            
            net.add_node(src, label=src)
            net.add_node(dst, label=dst)
            
            # Add edge with label
            color = 'red' if row[IS_ATTACK_COL] == 1 else 'blue'
            net.add_edge(src, dst, title=edge_label, color=color)
        
        net.show(f"{name_prefix}graph_{attack_name}.html")
        return

visualize_graph(df, 'DoS', batch_size=50, combine_ports=True, name_prefix="combined_ports_")
visualize_graph(df, 'DoS', batch_size=50, combine_ports=False, name_prefix="combined_ports_")

Nonegraph_DoS.html


In [None]:
# Visualise graph after adversarial attack
import random


def attack_attacker(dataframe, ratio, num_injected_nodes):
    attack_eval = dataframe[dataframe[IS_ATTACK_COL] == 1]
    num_injected = int(ratio * len(dataframe))

    # Sample attack rows
    sampled_attack_flows = attack_eval.sample(n=num_injected, random_state=42).copy().reset_index(drop=True)
    
    injected_rows = sampled_attack_flows.copy()
    print("Sampled attack flows:", len(sampled_attack_flows))
    print("Labels of sampled attack flows:", sampled_attack_flows[LABEL_COL].value_counts())

    node_ips = [f"192.168.1.{i+1}" for i in range(num_injected_nodes)]
    injected_rows[DESTINATION_IP_COL_NAME] = injected_rows[SOURCE_IP_COL_NAME] # Target the Real Attacker Nodes
    injected_rows[SOURCE_IP_COL_NAME] = [f"{node_ips[i % len(node_ips)]}:{random.randint(1024, 65535)}" for i in range(num_injected)]
    # injected_rows['pkSeqID'] = [f'Injected-{i}' for i in range(num_injected)]
    injected_rows[LABEL_COL] = 'Adversarial' # Assign a new class for injected samples
    print(injected_rows[0:5])

    # Append and reorder
    combined_df = pd.concat([dataframe, injected_rows], ignore_index=True)

    # Sort using this datetime column
    combined_df = combined_df.sort_values(by=TIME_COL_NAMES).reset_index(drop=True)

    return combined_df

attack_attacker_df = attack_attacker(df, 0.1, 10)
visualize_graph(attack_attacker_df, 'DoS', batch_size=50, name_prefix='adversarial_combined_port', combine_ports=True)
visualize_graph(attack_attacker_df, 'DoS', batch_size=50, name_prefix='adversarial_ip_node', combine_ports=False)

Sampled attack flows: 54315
Labels of sampled attack flows: attack_cat
Generic           36435
Exploits           7483
Fuzzers            4075
DoS                2789
Reconnaissance     2438
Analysis            437
Backdoor            294
Shellcode           243
Backdoors            87
Worms                34
Name: count, dtype: int64
               srcip               dstip proto state        dur  sbytes  \
0  192.168.1.1:39025      175.45.176.0:0  ospf   INT  12.256339    2184   
1  192.168.1.2:41532  175.45.176.0:47439   udp   INT   0.000008     114   
2  192.168.1.3:30599  175.45.176.3:47439   udp   INT   0.000008     114   
3  192.168.1.4:55225  175.45.176.3:47439   udp   INT   0.000009     114   
4  192.168.1.5:36774  175.45.176.2:46858   tcp   FIN   8.708573  546476   

   dbytes  sttl  dttl  sloss  ...  ct_ftp_cmd ct_srv_src  ct_srv_dst  \
0       0   254     0      0  ...         NaN          1           1   
1       0   254     0      0  ...         NaN         33          33