In [None]:
from collections import defaultdict
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from NF_BoT_IoT_v3_config import NF_BoT_IoT_v3_Config

SOURCE_COL = NF_BoT_IoT_v3_Config.SOURCE_IP_COL_NAME
DESTINATION_COL = NF_BoT_IoT_v3_Config.DESTINATION_IP_COL_NAME
CATEGORY_COL = NF_BoT_IoT_v3_Config.ATTACK_CLASS_COL_NAME

BENIGN_CLASS_NAME = NF_BoT_IoT_v3_Config.BENIGN_CLASS_NAME

df = pd.read_csv('./All/all_downsampled.csv')

G = nx.DiGraph()

G.add_edges_from(
    (row[SOURCE_COL], row[DESTINATION_COL], {CATEGORY_COL: row[CATEGORY_COL]})
    for _, row in df.iterrows()
)

category_in_degrees = defaultdict(list)

for node in G.nodes:
    in_edges = G.in_edges(node, data=True)
    out_edges = G.out_edges(node, data=True)

    attacker = False
    for _, _, data in out_edges:
        if data.get(CATEGORY_COL) != BENIGN_CLASS_NAME:  # Check if the edge is labeled as an attack
            attacker = True
            category = data.get(CATEGORY_COL, 'Unknown')
            category_in_degrees[category].append(len(in_edges))
            break

    if not attacker:
        category_in_degrees['Normal'].append(len(in_edges))

plt.figure(figsize=(10, 6))

# Prepare data for scatter plot
categories = []
in_degrees = []

for category, degrees in category_in_degrees.items():
    categories.extend([category] * len(degrees))
    in_degrees.extend(degrees)

plt.scatter(categories, in_degrees, color='skyblue', s=100, alpha=0.6)
plt.xlabel('Attack Categories')
plt.ylabel('In-Degree')
plt.title('In-Degree Distribution by Attack Category')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

KeyboardInterrupt: 

In [None]:
import numpy as np
import os
np.set_printoptions(threshold=np.inf)

pd.set_option('display.max_rows', None)        # Show all rows
pd.set_option('display.max_columns', None)     # Show all columns
pd.set_option('display.max_colwidth', None)    # Don't truncate string values
pd.set_option('display.expand_frame_repr', False)  # Prevent wrapping


def print_attack_node_and_their_in_traffic(df):

    # Strip whitespaces 
    df.columns = df.columns.str.strip()
    
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

    # Find all attacker nodes
    attacker_nodes = df[df[CATEGORY_COL] != BENIGN_CLASS_NAME][SOURCE_COL].unique()
    print("Attacker Nodes:", attacker_nodes)

    # Find all traffic where attacker node is the destination
    directed_to_attacker_traffic = df[df[DESTINATION_COL].isin(attacker_nodes)]

    # Find all nodes that have directed traffic to attacker nodes
    source_node_to_attacker = directed_to_attacker_traffic[SOURCE_COL].unique()
    print("Source Node to Attacker:", source_node_to_attacker)

    attack_directed_to_attacker_traffic = directed_to_attacker_traffic[directed_to_attacker_traffic[CATEGORY_COL] != BENIGN_CLASS_NAME]
    
    print("Example Attacks on Attacker Nodes:", attack_directed_to_attacker_traffic.groupby(SOURCE_COL).head(1))

    print("Source Node to Attacker with Attack Traffic:", attack_directed_to_attacker_traffic[SOURCE_COL].unique())

    print("No of Attacks on Attacker Nodes:", len(attack_directed_to_attacker_traffic))

    directed_to_attacker_traffic_attack_type = directed_to_attacker_traffic[CATEGORY_COL].unique()
    print("Traffic to Attacker Nodes Attack Type:", directed_to_attacker_traffic_attack_type)


# # Directory containing the CSV files
# directory = './Raw'

# # Iterate through all CSV files in the directory
# for filename in os.listdir(directory):
#     if filename.endswith('.csv'):
#         file_path = os.path.join(directory, filename)
#         print(f"Processing file: {filename}")
        
#         # Read the CSV file
#         df = pd.read_csv(file_path, header=0)
        
#         # Run the function
#         print_attack_node_and_their_in_traffic(df)


Processing file: Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv


  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


Attacker Nodes: ['172.16.0.1' '192.168.10.50']
Source Node to Attacker: ['192.168.10.12' '192.168.10.16' '192.168.10.19' '192.168.10.25'
 '192.168.10.17' '192.168.10.5' '192.168.10.9' '192.168.10.14'
 '192.168.10.15' '192.168.10.8' '192.168.10.3' '172.16.0.1'
 '192.168.10.50']
Example Attacks on Attacker Nodes:                                     Flow ID      Source IP  Source Port Destination IP  Destination Port  Protocol      Timestamp  Flow Duration  Total Fwd Packets  Total Backward Packets  Total Length of Fwd Packets  Total Length of Bwd Packets  Fwd Packet Length Max  Fwd Packet Length Min  Fwd Packet Length Mean  Fwd Packet Length Std  Bwd Packet Length Max  Bwd Packet Length Min  Bwd Packet Length Mean  Bwd Packet Length Std  Flow Bytes/s  Flow Packets/s  Flow IAT Mean  Flow IAT Std  Flow IAT Max  Flow IAT Min  Fwd IAT Total  Fwd IAT Mean  Fwd IAT Std  Fwd IAT Max  Fwd IAT Min  Bwd IAT Total  Bwd IAT Mean   Bwd IAT Std  Bwd IAT Max  Bwd IAT Min  Fwd PSH Flags  Bwd PSH Flags  

KeyboardInterrupt: 

In [None]:
# Run the function on the main dataframe
all_raw_df = pd.read_csv('./All/all_raw.csv', header=0)
print_attack_node_and_their_in_traffic(all_raw_df)

  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


Attacker Nodes: ['172.16.0.1' '192.168.10.50' '192.168.10.8' '192.168.10.12'
 '192.168.10.9' '205.174.165.73' '192.168.10.15' '192.168.10.14'
 '192.168.10.5' '192.168.10.17']
Source Node to Attacker: ['104.16.207.165' '104.16.28.216' '104.19.196.102' '104.20.10.120'
 '104.28.13.116' '104.97.123.193' '104.97.125.160' '104.97.139.37'
 '138.201.37.241' '144.76.121.178' '145.243.233.163' '151.101.0.166'
 '151.101.0.249' '151.101.1.108' '151.101.1.5' '151.101.130.2'
 '151.101.2.2' '151.101.44.249' '152.163.13.4' '152.163.56.2'
 '169.60.66.35' '172.217.10.238' '172.217.11.1' '172.217.11.10'
 '172.217.12.138' '172.217.12.162' '172.217.3.98' '172.217.6.198'
 '172.217.6.200' '172.217.6.206' '172.217.7.132' '173.241.254.199'
 '185.167.164.39' '185.54.150.17' '185.86.137.17' '192.168.10.12'
 '192.168.10.15' '192.168.10.16' '192.168.10.50' '192.168.10.8'
 '195.216.249.102' '195.216.249.109' '195.216.249.89' '199.16.156.120'
 '199.16.157.105' '204.2.255.232' '209.85.201.157' '212.77.188.194'
 '213.

In [None]:
import os

df = pd.read_csv('./All/all_downsampled.csv', header=0)
ATTACK_CATEGORY_COL = CIC_IDS_2017_Config.ATTACK_CLASS_COL_NAME

# Get nodes that are both attackers and victims
def get_attacker_and_victim_nodes(df):
    # Strip whitespaces from column names
    df.columns = df.columns.str.strip()

    attack_traffic = df[df[ATTACK_CATEGORY_COL] != CIC_IDS_2017_Config.BENIGN_CLASS_NAME]
    
    # Get unique attacker nodes
    attacker_nodes = attack_traffic[CIC_IDS_2017_Config.SOURCE_IP_COL_NAME].unique()
    
    # Get unique victim nodes (nodes that are destinations)
    victim_nodes = attack_traffic[CIC_IDS_2017_Config.DESTINATION_IP_COL_NAME].unique()
    
    # Find intersection of attacker and victim nodes
    both_attacker_and_victim = set(attacker_nodes) & set(victim_nodes)
    
    print("Attacker: ", attacker_nodes)
    print("Victim: ", victim_nodes)
    print("Nodes that are both attackers and victims:", both_attacker_and_victim)

    return (attacker_nodes, victim_nodes)

# Directory containing the CSV files
directory = './Raw'

# Iterate through all CSV files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        print(f"Processing file: {filename}")
        
        # Read the CSV file
        file_df = pd.read_csv(file_path, header=0)
        
        # Run the function
        get_attacker_and_victim_nodes(file_df)
        min_time = file_df['Timestamp'].min()
        max_time = file_df['Timestamp'].max()
        print(f"Min/Max Timestamp in {filename}: {min_time}/{max_time}")

get_attacker_and_victim_nodes(df)

Processing file: Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv
Attacker:  ['172.16.0.1' '192.168.10.50']
Victim:  ['192.168.10.50' '172.16.0.1']
Nodes that are both attackers and victims: {'192.168.10.50', '172.16.0.1'}
Min/Max Timestamp in Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv: 7/7/2017 3:30/7/7/2017 5:02
Processing file: Monday-WorkingHours.pcap_ISCX.csv
Attacker:  []
Victim:  []
Nodes that are both attackers and victims: set()
Min/Max Timestamp in Monday-WorkingHours.pcap_ISCX.csv: 03/07/2017 01:00:01/03/07/2017 12:59:58
Processing file: Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv
Attacker:  ['172.16.0.1']
Victim:  ['192.168.10.50']
Nodes that are both attackers and victims: set()
Min/Max Timestamp in Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv: 6/7/2017 10:00/6/7/2017 9:59
Processing file: Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv
Attacker:  ['192.168.10.8']
Victim:  ['205.174.165.73']
Nodes that are both attackers and victims: 

(array(['172.16.0.1_0', '192.168.10.50_0', '172.16.0.1_2',
        '192.168.10.8_3', '172.16.0.1_4', '192.168.10.12_5',
        '192.168.10.9_5', '205.174.165.73_5', '192.168.10.15_5',
        '192.168.10.14_5', '192.168.10.5_5', '192.168.10.8_5',
        '192.168.10.17_5', '172.16.0.1_6', '172.16.0.1_7'], dtype=object),
 array(['192.168.10.50_0', '172.16.0.1_0', '192.168.10.50_2',
        '205.174.165.73_3', '192.168.10.50_4', '192.168.10.51_4',
        '52.6.13.28_5', '205.174.165.73_5', '192.168.10.9_5',
        '192.168.10.15_5', '192.168.10.14_5', '192.168.10.5_5',
        '192.168.10.8_5', '52.7.235.158_5', '192.168.10.50_6',
        '192.168.10.50_7'], dtype=object))