In [1]:
import scapy
from scapy.utils import rdpcap
import os
from statistics import stdev
from scapy.all import *
# from scapy.all import sniff, rdpcap, wrpcap

In [2]:
def analyze_NUM_packets(pkts):
    """
    analyze NUM packets to get target network characteristics
    :param pkts: NUM target packets, TCP/IP only !!!
    :return:
    - "Average Packet Size", the average length of the TCP/IP packet data field (hereinafter referred to as the packet length)
    - "Flow Bytes/s", the data flow rate
    - "Max Packet Length", the maximum packet length
    - "Fwd Packet Length Mean", the average length of packets transmitted in the forward direction
    - "Fwd IAT Min", the minimum value of the inter-packet interval (IAT, inter-arrival time) in the forward direction
    - "Total Length of Fwd Packets", the total length of packets transmitted in the forward direction
    - "Fwd IAT Std", the standard deviation of the value of the inter-packet interval in the forward direction of the packets
    - "Flow IAT Mean", the average value of the batch interval
    - "Fwd Packet Length Max", the maximum length of a packet transmitted in the forward direction
    - "Fwd Header Length", the total length of the headers of packets transmitted in the forward direction
    """

    last_time = None

    total_length = 0
    max_pkt_len = 0
    fwd_pkts_len = []
    fwd_ia_times = []
    fwd_header_len = 0

    for packet in pkts:
        # raw_data = packet[Raw].load
        # if len(raw_data) >= 14:  # Минимальная длина Ethernet-заголовка
        #     try:
        #         packet = Ether(raw_data)
        #         packet.show2()
        #     except:
        #         print("Ошибка парсинга Ethernet")
        # else:
        #     print("Недостаточно данных для Ethernet-кадра")
        try:
            total_length += len(packet)
            max_pkt_len = max(max_pkt_len, len(packet))
            fwd_pkts_len.append(len(packet))  # Общая длина пакета

            header_length = 0  # Общая длина заголовков для текущего пакета
            # Ethernet (14 или 18 байт с VLAN)
            if Ether in packet:
                if Dot1Q in packet:
                    header_length += 14 + 4  # Ethernet + VLAN
                else:
                    header_length += 14
            # LLC/SNAP
            if LLC in packet:
                header_length += 3  # LLC заголовок
                if SNAP in packet:
                    header_length += 5  # SNAP заголовок
            # IP
            if IP in packet:
                ip_header_len = packet[IP].ihl * 4
                header_length += ip_header_len
                # TCP
                if TCP in packet:
                    tcp_header_len = packet[TCP].dataofs * 4
                    header_length += tcp_header_len
                # UDP
                elif UDP in packet:
                    header_length += 8  # UDP заголовок
            # Добавляем длину заголовков текущего пакета в общую сумму
            fwd_header_len += header_length

            if last_time is not None:
                # print(f"a={self.last_time} b={packet.time} d=b-a={packet.time - self.last_time}")
                fwd_ia_times.append(packet.time - last_time)
            last_time = packet.time
            # Обработка заголовков только для IP-пакетов
            # if packet.haslayer(IP):
            #     ip_header_len = packet[IP].ihl * 4
            #     if packet.haslayer(TCP):
            #         tcp_header_len = packet[TCP].dataofs * 4
            #         fwd_header_len += ip_header_len + tcp_header_len
            #     else:
            #         fwd_header_len += ip_header_len
        except Exception as e:
            print(f"can't analyze packet:")
            print(f"Err: {e}")
            packet.show2()
            # exit(1)
    try:
        avg_pkt_size = float(round(total_length / len(pkts), 3))
        flow_bytes_per_s = float(round(total_length / (pkts[-1].time - pkts[0].time), 3))
        fwd_pkt_mean_len = float(round(sum(fwd_pkts_len) / len(fwd_pkts_len), 3))
        fwd_iat_min = float(round(min(fwd_ia_times) * 1000, 5))
        tot_len_fwd_pkts = sum(fwd_pkts_len)
        fwd_iat_std = float(round(stdev(fwd_ia_times), 5))
        flow_iat_mean = float(round(sum(fwd_ia_times) / len(fwd_ia_times), 5))
        fwd_pkt_max_len = max(fwd_pkts_len)

        # print(len(fwd_ia_times), fwd_ia_times)
        # print("Average Packet Size:", avg_pkt_size)
        # print("Flow Bytes/s:", flow_bytes_per_s)
        # print("Max Packet Length:", max_pkt_len)
        # print("Fwd Packet Length Mean:", fwd_pkt_mean_len)
        # print("Fwd IAT Min:", fwd_iat_min)
        # print("Total Length of Fwd Packets:", tot_len_fwd_pkts)
        # print("Fwd IAT Std:", fwd_iat_std)
        # print("Flow IAT Mean:", flow_iat_mean)
        # print("Fwd Packet Length Max:", fwd_pkt_max_len)
        # print("Fwd Header Length:", fwd_header_len)
        return (
                avg_pkt_size,
                flow_bytes_per_s,
                max_pkt_len,
                fwd_pkt_mean_len,
                fwd_iat_min,
                tot_len_fwd_pkts,
                fwd_iat_std,
                flow_iat_mean,
                fwd_pkt_max_len,
                fwd_header_len,
                )
    except Exception as e:
        print(f"can't count stats:")
        print(f"Err: {e}")
        packet.show2()

In [3]:
def analyze_pcap(pkts, NUM, row_data):
    """
    group and analyze packets by NUM
    :param pkts: target list with packets
    :return: None, put unlabeled data samples at 'row_data' attribute
    """
    print(f"Analyzing for every {NUM} packets")
    n = len(pkts) + 1
    i, j = 0, NUM
    while i + NUM < n:
        # print(f"in analyze fn, num={NUM}, i={i}, j={j}")
        row_data.append(analyze_NUM_packets(pkts[i:j]))
        i, j = j, j + NUM

In [4]:
pcap_to_read = ''


def read_pcap():
    """
    put the .pcap file at 'sniffed_pkts_list' attribute
    :return: None, put the .pcap file at 'sniffed_pkts_list' attribute
    """
    return rdpcap(pcap_to_read)

In [5]:
def create_csv(y_correct, NUM, row_data, attack_type):
    """
    create local './csvs/' folder with .csv datasets based on 'pcap_to_read' attribute
    :return: None, create local './csvs/' folder
    """
    print(f"Creating csv for {NUM} packets")
    file_name, file_extension = os.path.splitext(pcap_to_read)
    # print(file_name, file_extension)
    new_folder = os.path.join(os.getcwd(), "csvs", attack_type)
    os.makedirs(new_folder, exist_ok=True)
    if row_data:
        with open(
                os.path.join(new_folder, os.path.split(file_name)[-1] + f"_{NUM}_{y_correct}_" + ".csv"), "w"
                ) as f:
            f.write(
                    "Average Packet Size,Flow Bytes/s,Fwd Packet Length Mean,Max Packet Length,Fwd IAT Min,Total Length of Fwd Packets,Flow IAT Mean,Fwd IAT Std,Fwd Packet Length Max,Fwd Header Length,Label\n"
                    )
            y_correct = str(y_correct) + "\n"
            for tpl in row_data:
                # print(f'row_tuple : {tpl}')
                f.write(",".join([str(e) for e in tpl] + [y_correct]))

In [6]:
pcap_to_read = "normal-median-500-timeout-5400-seed-42-tcp-udp-sender7-anomal.pcap"
sniffed_pkts_list = read_pcap()

In [7]:
for NUM in [10, 50, 100, 250, 500, 750, 1000, 5000, 10000]:
    row_data = []
    analyze_pcap(sniffed_pkts_list, NUM, row_data)
    attack_type = "attack"
    y_correct = 1
    create_csv(y_correct, NUM, row_data, attack_type)

Analyzing for every 10 packets
Creating csv for 10 packets
Analyzing for every 50 packets
Creating csv for 50 packets
Analyzing for every 100 packets
Creating csv for 100 packets
Analyzing for every 250 packets
Creating csv for 250 packets
Analyzing for every 500 packets
Creating csv for 500 packets
Analyzing for every 750 packets
Creating csv for 750 packets
Analyzing for every 1000 packets
Creating csv for 1000 packets
Analyzing for every 5000 packets
Creating csv for 5000 packets
Analyzing for every 10000 packets
Creating csv for 10000 packets


In [8]:
del sniffed_pkts_list

In [9]:
pcap_to_read = "normal-median-500-timeout-5400-seed-42-tcp-udp-sender7.pcap"
sniffed_pkts_list = read_pcap()

In [10]:
for NUM in [10, 50, 100, 250, 500, 750, 1000, 5000, 10000]:
    row_data = []
    analyze_pcap(sniffed_pkts_list, NUM, row_data)
    attack_type = "normal"
    y_correct = 0
    create_csv(y_correct, NUM, row_data, attack_type)

Analyzing for every 10 packets
Creating csv for 10 packets
Analyzing for every 50 packets
Creating csv for 50 packets
Analyzing for every 100 packets
Creating csv for 100 packets
Analyzing for every 250 packets
Creating csv for 250 packets
Analyzing for every 500 packets
Creating csv for 500 packets
Analyzing for every 750 packets
Creating csv for 750 packets
Analyzing for every 1000 packets
Creating csv for 1000 packets
Analyzing for every 5000 packets
Creating csv for 5000 packets
Analyzing for every 10000 packets
Creating csv for 10000 packets
