In [1]:
import os
from statistics import stdev
from scapy.all import sniff, rdpcap, wrpcap

In [2]:
def analyze_NUM_packets(pkts):
    """
    analyze NUM packets to get target network characteristics
    :param pkts: NUM target packets, TCP/IP only !!!
    :return:
    - "Average Packet Size", the average length of the TCP/IP packet data field (hereinafter referred to as the packet length)
    - "Flow Bytes/s", the data flow rate
    - "Max Packet Length", the maximum packet length
    - "Fwd Packet Length Mean", the average length of packets transmitted in the forward direction
    - "Fwd IAT Min", the minimum value of the inter-packet interval (IAT, inter-arrival time) in the forward direction
    - "Total Length of Fwd Packets", the total length of packets transmitted in the forward direction
    - "Fwd IAT Std", the standard deviation of the value of the inter-packet interval in the forward direction of the packets
    - "Flow IAT Mean", the average value of the batch interval
    - "Fwd Packet Length Max", the maximum length of a packet transmitted in the forward direction
    - "Fwd Header Length", the total length of the headers of packets transmitted in the forward direction
    """

    last_time = None

    total_length = 0
    max_pkt_len = 0
    fwd_pkts_len = []
    fwd_ia_times = []
    fwd_header_len = 0

    for packet in pkts:
        try:
            total_length += len(packet)
            max_pkt_len = max(max_pkt_len, len(packet))
            fwd_pkts_len.append(packet.len)
            if last_time is not None:
                # print(f"a={self.last_time} b={packet.time} d=b-a={packet.time - self.last_time}")
                fwd_ia_times.append(packet.time - last_time)
            last_time = packet.time
            fwd_header_len += packet.ihl * 4 + packet.dataofs * 4
        except Exception as e:
            print(e)
            print(f"can't analyze packet:")
            packet.show()
    try:
        avg_pkt_size = float(round(total_length / len(pkts), 3))
        flow_bytes_per_s = float(
            round(total_length / (pkts[-1].time - pkts[0].time), 3)
        )
        fwd_pkt_mean_len = float(round(sum(fwd_pkts_len) / len(fwd_pkts_len), 3))
        fwd_iat_min = float(round(min(fwd_ia_times) * 1000, 5))
        tot_len_fwd_pkts = sum(fwd_pkts_len)
        fwd_iat_std = float(round(stdev(fwd_ia_times), 5))
        flow_iat_mean = float(round(sum(fwd_ia_times) / len(fwd_ia_times), 5))
        fwd_pkt_max_len = max(fwd_pkts_len)
        # print(len(fwd_ia_times), fwd_ia_times)
        # print("Average Packet Size:", avg_pkt_size)
        # print("Flow Bytes/s:", flow_bytes_per_s)
        # print("Max Packet Length:", max_pkt_len)
        # print("Fwd Packet Length Mean:", fwd_pkt_mean_len)
        # print("Fwd IAT Min:", fwd_iat_min)
        # print("Total Length of Fwd Packets:", tot_len_fwd_pkts)
        # print("Fwd IAT Std:", fwd_iat_std)
        # print("Flow IAT Mean:", flow_iat_mean)
        # print("Fwd Packet Length Max:", fwd_pkt_max_len)
        # print("Fwd Header Length:", fwd_header_len)
        return (
            avg_pkt_size,
            flow_bytes_per_s,
            max_pkt_len,
            fwd_pkt_mean_len,
            fwd_iat_min,
            tot_len_fwd_pkts,
            fwd_iat_std,
            flow_iat_mean,
            fwd_pkt_max_len,
            fwd_header_len,
        )
    except Exception as e:
        print(e)

In [3]:
def analyze_pcap(pkts, NUM, row_data):
    """
    group and analyze packets by NUM
    :param pkts: target list with packets
    :return: None, put unlabeled data samples at 'row_data' attribute
    """
    n = len(pkts) + 1
    i, j = 0, NUM
    while i + NUM < n:
        print(f"in analyze fn, num={NUM}, i={i}, j={j}")
        row_data.append(analyze_NUM_packets(pkts[i:j]))
        i, j = j, j + NUM

In [4]:
pcap_to_read = ''


def read_pcap():
    """
    put the .pcap file at 'sniffed_pkts_list' attribute
    :return: None, put the .pcap file at 'sniffed_pkts_list' attribute
    """
    return rdpcap(pcap_to_read)

In [5]:
def create_csv(y_correct, NUM, row_data, attack_type):
    """
    create local 'csvs/' folder with .csv datasets based on 'pcap_to_read' attribute
    :return: None, create local 'csvs/' folder
    """

    file_name, file_extension = os.path.splitext(pcap_to_read)
    # print(file_name, file_extension)
    new_folder = os.path.join(os.getcwd(), "csvs", attack_type)
    os.makedirs(new_folder, exist_ok=True)
    if row_data:
        with open(
            os.path.join(new_folder, os.path.split(file_name)[-1] + f"_{NUM}_{y_correct}_" + ".csv"), "w"
        ) as f:
            f.write(
                "Average Packet Size,Flow Bytes/s,Fwd Packet Length Mean,Max Packet Length,Fwd IAT Min,Total Length of Fwd Packets,Flow IAT Mean,Fwd IAT Std,Fwd Packet Length Max,Fwd Header Length,Label\n"
            )
            y_correct = str(y_correct) + "\n"
            for tpl in row_data:
                # print(f'row_tuple : {tpl}')
                f.write(",".join([str(e) for e in tpl] + [y_correct]))

In [6]:
for NUM in [10, 50, 100, 250, 500, 750, 1000, 5000, 10000]:
    pcap_to_read = "datasets_pcaps/xss/xss-tcp-ip-vi-at-9374.pcap"

    sniffed_pkts_list = read_pcap()
    row_data = []
    analyze_pcap(sniffed_pkts_list, NUM, row_data)
    attack_type = "xss"
    y_correct = 1
    create_csv(y_correct, NUM, row_data, attack_type)

in analyze fn, num=10, i=0, j=10
in analyze fn, num=10, i=10, j=20
in analyze fn, num=10, i=20, j=30
in analyze fn, num=10, i=30, j=40
in analyze fn, num=10, i=40, j=50
in analyze fn, num=10, i=50, j=60
in analyze fn, num=10, i=60, j=70
in analyze fn, num=10, i=70, j=80
in analyze fn, num=10, i=80, j=90
in analyze fn, num=10, i=90, j=100
in analyze fn, num=10, i=100, j=110
in analyze fn, num=10, i=110, j=120
in analyze fn, num=10, i=120, j=130
in analyze fn, num=10, i=130, j=140
in analyze fn, num=10, i=140, j=150
in analyze fn, num=10, i=150, j=160
in analyze fn, num=10, i=160, j=170
in analyze fn, num=10, i=170, j=180
in analyze fn, num=10, i=180, j=190
in analyze fn, num=10, i=190, j=200
in analyze fn, num=10, i=200, j=210
in analyze fn, num=10, i=210, j=220
in analyze fn, num=10, i=220, j=230
in analyze fn, num=10, i=230, j=240
in analyze fn, num=10, i=240, j=250
in analyze fn, num=10, i=250, j=260
in analyze fn, num=10, i=260, j=270
in analyze fn, num=10, i=270, j=280
in analyze f