In [2]:
from collections import defaultdict
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from BoT_IoT_config import BoT_IoT_Config

SOURCE_COL = BoT_IoT_Config.SOURCE_IP_COL_NAME
DESTINATION_COL = BoT_IoT_Config.DESTINATION_IP_COL_NAME
LABEL_COL = BoT_IoT_Config.IS_ATTACK_COL_NAME
CATEGORY_COL = BoT_IoT_Config.ATTACK_CLASS_COL_NAME

BENIGN_CLASS_NAME = BoT_IoT_Config.BENIGN_CLASS_NAME
TIME_COL = BoT_IoT_Config.TIME_COL_NAMES[1]


In [None]:

df = pd.read_csv('./All/all_raw.csv')

G = nx.MultiDiGraph()

# G.add_nodes_from(df[SOURCE_COL].unique())
# G.add_nodes_from(df[DESTINATION_COL].unique())

G.add_edges_from(
    (row[SOURCE_COL], row[DESTINATION_COL], {'attack_category': row[CATEGORY_COL], 'label': row[LABEL_COL]})
    for _, row in df.iterrows()
)

category_in_degrees = defaultdict(list)

for node in G.nodes:
    in_edges = G.in_edges(node, data=True)
    out_edges = G.out_edges(node, data=True)

    attacker = False
    for _, _, data in out_edges:
        if data.get('label') == 1:  # Check if the edge is labeled as an attack
            attacker = True
            category = data.get('attack_category', 'Unknown')
            category_in_degrees[category].append(len(in_edges))
            break

    if not attacker:
        category_in_degrees[BENIGN_CLASS_NAME].append(len(in_edges))

plt.figure(figsize=(10, 6))

print(len(G.nodes))
print(category_in_degrees)

# Prepare data for scatter plot
categories = []
in_degrees = []

for category, degrees in category_in_degrees.items():
    categories.extend([category] * len(degrees))
    in_degrees.extend(degrees)

plt.scatter(categories, in_degrees, color='skyblue', s=100, alpha=0.6)
plt.xlabel('Attack Categories')
plt.ylabel('In-Degree')
plt.title('In-Degree Distribution by Attack Category')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: './All/all_downsampled.csv'

In [None]:
def print_attack_node_and_their_in_traffic(df):

    # Strip whitespaces 
    df.columns = df.columns.str.strip()
    
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

    # Find all attacker nodes
    attacker_nodes = df[df[CATEGORY_COL] != BENIGN_CLASS_NAME][SOURCE_COL].unique()
    print("Attacker Nodes:", attacker_nodes)

    # Find all traffic where attacker node is the destination
    directed_to_attacker_traffic = df[df[DESTINATION_COL].isin(attacker_nodes)]

    # Find all nodes that have directed traffic to attacker nodes
    source_node_to_attacker = directed_to_attacker_traffic[SOURCE_COL].unique()
    print("Source Node to Attacker:", source_node_to_attacker)

    attack_directed_to_attacker_traffic = directed_to_attacker_traffic[directed_to_attacker_traffic[CATEGORY_COL] != BENIGN_CLASS_NAME]
    
    print("Example Attacks on Attacker Nodes:", attack_directed_to_attacker_traffic.groupby(SOURCE_COL).head(1))

    print("Source Node to Attacker with Attack Traffic:", attack_directed_to_attacker_traffic[SOURCE_COL].unique())

    print("No of Attacks on Attacker Nodes:", len(attack_directed_to_attacker_traffic))

    directed_to_attacker_traffic_attack_type = directed_to_attacker_traffic[CATEGORY_COL].unique()
    print("Traffic to Attacker Nodes Attack Type:", directed_to_attacker_traffic_attack_type)

print_attack_node_and_their_in_traffic(df)

  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


Attacker Nodes: ['sport' '0' '2142' ... 9819 24203 24975]
Source Node to Attacker: ['0' '2142' '13284' ... 24203 24975 60876]
Example Attacks on Attacker Nodes:                 srcip             sport  dstip dsport proto      state    dur  \
10.40.85.1_0        0       224.0.0.5_0      0   ospf   INT  50.004341    384   
59.166.0.6_0     2142   149.171.126.4_0     53    udp   CON   0.001134    132   
175.45.176.0_0  13284  149.171.126.16_0     80    tcp   FIN    2.39039   1362   
59.166.0.1_0    21270   149.171.126.9_0    111    udp   CON   0.005153    568   
59.166.0.1_0    48847   149.171.126.5_0     53    udp   CON   0.001093    132   
...               ...               ...    ...    ...   ...        ...    ...   
59.166.0.9_3     9819   149.171.126.0_3    143    tcp   FIN   1.643158   7810   
59.166.0.9_3    20545   149.171.126.4_3  34428    tcp   FIN   0.023955   2542   
59.166.0.2_3    24203   149.171.126.4_3     25    tcp   FIN   0.042211  37812   
59.166.0.4_3    24975   149.1

In [None]:
attack_traffic = df[df[LABEL_COL] == 1]

# Group attack traffic by destination IP and attack category, and count occurrences
attack_traffic_grouped = attack_traffic.groupby([DESTINATION_COL, CATEGORY_COL])

# Print the number of rows in the grouped DataFrame
print(attack_traffic_grouped.size())

dstip             attack_cat    
149.171.126.10_0  Backdoors           1
                  DoS                67
                  Exploits          437
                  Fuzzers           460
                  Generic            54
                                   ... 
149.171.126.19_3  Fuzzers           751
                  Generic           152
                  Reconnaissance    436
                  Shellcode          63
                  Worms               4
Length: 347, dtype: int64


In [None]:
import os

df = pd.read_csv('./All/all_downsampled.csv', header=0)

# Get nodes that are both attackers and victims
def get_attacker_and_victim_nodes(df):
    # Strip whitespaces from column names
    df.columns = df.columns.str.strip()

    attack_traffic = df[df[CATEGORY_COL] != BoT_IoT_Config.BENIGN_CLASS_NAME]
    
    # Get unique attacker nodes
    attacker_nodes = attack_traffic[BoT_IoT_Config.SOURCE_IP_COL_NAME].unique()
    
    # Get unique victim nodes (nodes that are destinations)
    victim_nodes = attack_traffic[BoT_IoT_Config.DESTINATION_IP_COL_NAME].unique()
    
    # Find intersection of attacker and victim nodes
    both_attacker_and_victim = set(attacker_nodes) & set(victim_nodes)
    
    print("Attacker: ", attacker_nodes)
    print("Victim: ", victim_nodes)
    print("Nodes that are both attackers and victims:", both_attacker_and_victim)

    return (attacker_nodes, victim_nodes)

# Directory containing the CSV files
directory = './Raw'

# Iterate through all CSV files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        print(f"Processing file: {filename}")
        
        # Read the CSV file
        file_df = pd.read_csv(file_path, header=0)
        
        # Run the function
        get_attacker_and_victim_nodes(file_df)
        min_time = file_df[TIME_COL].min()
        max_time = file_df[TIME_COL].max()
        print(f"Min/Max Timestamp in {filename}: {min_time}/{max_time}")

get_attacker_and_victim_nodes(df)

  df = pd.read_csv('./All/all_downsampled.csv', header=None, names=COL_NAMES)


Processing file: UNSW-NB15_1.csv


  file_df = pd.read_csv(file_path, header=None, names=COL_NAMES)


Attacker:  ['59.166.0.0' '59.166.0.6' '59.166.0.5' '59.166.0.3' '10.40.182.3'
 '59.166.0.7' '10.40.170.2' '59.166.0.1' '59.166.0.2' '59.166.0.4'
 '175.45.176.3' '175.45.176.2' '175.45.176.0' '59.166.0.8' '59.166.0.9'
 '175.45.176.1' '10.40.182.1' '10.40.85.1' '192.168.241.243' '10.40.85.30'
 '149.171.126.16' '149.171.126.2' '149.171.126.11' '149.171.126.4'
 '149.171.126.5' '149.171.126.17' '149.171.126.19' '149.171.126.9'
 '149.171.126.8' '149.171.126.7' '149.171.126.15' '149.171.126.6'
 '149.171.126.0' '149.171.126.1' '149.171.126.3' '149.171.126.13'
 '149.171.126.12' '149.171.126.10' '149.171.126.18' '127.0.0.1']
Victim:  ['149.171.126.6' '149.171.126.9' '149.171.126.7' '149.171.126.5'
 '149.171.126.0' '149.171.126.4' '10.40.182.3' '10.40.170.2'
 '149.171.126.18' '149.171.126.16' '149.171.126.8' '149.171.126.2'
 '149.171.126.1' '149.171.126.10' '149.171.126.3' '149.171.126.15'
 '149.171.126.14' '149.171.126.12' '149.171.126.13' '149.171.126.11'
 '224.0.0.5' '149.171.126.17' '149.171.

  file_df = pd.read_csv(file_path, header=None, names=COL_NAMES)


Attacker:  ['59.166.0.0' '59.166.0.8' '59.166.0.3' '59.166.0.4' '59.166.0.2'
 '59.166.0.6' '59.166.0.7' '59.166.0.9' '59.166.0.5' '59.166.0.1'
 '10.40.182.3' '10.40.170.2' '10.40.182.1' '10.40.85.1' '10.40.85.30'
 '149.171.126.5' '149.171.126.8' '149.171.126.3' '149.171.126.6'
 '149.171.126.4' '149.171.126.2' '149.171.126.9' '149.171.126.7'
 '149.171.126.0' '149.171.126.1' '149.171.126.14' '175.45.176.3'
 '175.45.176.0' '175.45.176.2' '175.45.176.1' '10.40.85.10' '10.40.182.6'
 '149.171.126.17' '149.171.126.15' '149.171.126.16' '149.171.126.18'
 '149.171.126.12' '149.171.126.13' '149.171.126.19' '149.171.126.11']
Victim:  ['149.171.126.5' '149.171.126.3' '149.171.126.6' '149.171.126.7'
 '149.171.126.4' '149.171.126.8' '149.171.126.9' '149.171.126.1'
 '149.171.126.2' '149.171.126.0' '10.40.182.3' '10.40.170.2' '224.0.0.5'
 '10.40.85.1' '10.40.85.30' '59.166.0.0' '59.166.0.1' '59.166.0.9'
 '59.166.0.7' '59.166.0.3' '59.166.0.6' '59.166.0.8' '59.166.0.4'
 '59.166.0.2' '59.166.0.5' '175.45

(array(['sport', '0', '2142', ..., 9819, 24203, 24975],
       shape=(74562,), dtype=object),
 array(['dsport', '0', '53', ..., 10649, 15810, 28268],
       shape=(45032,), dtype=object))