In [None]:
from scapy.all import *
import numpy as np
from after_image.feature_extractor import *
from tqdm import tqdm

## Extracting Summary Statistics

In [None]:
dataset_path = "../69897e94e24170c0_UQIOT2022_A7369/data/"

common_attack_types = [
    "ACK_Flooding",
    "ARP_Spoofing",
    "Port_Scanning",
    "Service_Detection",
    "SYN_Flooding",
    "UDP_Flooding"
]

device_attacks = {
    "Cam_1": common_attack_types,
    "Google-Nest-Mini_1": common_attack_types,
    "Lenovo_Bulb_1": common_attack_types ,
    "Raspberry_Pi_telnet": common_attack_types + ["HTTP_Flooding", "Telnet-brute_Force"],
    "Smart_Clock_1": common_attack_types,
    "Smartphone_1": common_attack_types,
    "Smartphone_2": common_attack_types,
    "SmartTV": common_attack_types
}

save_path = "./kitsune_features/"

In [None]:
def parse_kitsune(pcap_file, output_file_name, count=float('Inf'), parse_type="scapy"):
    """
    Obtains features from a pcap file using the Kitsune feature extractor.

    Parameters:
        pcap_file (string): Path to pcap file.
        output_file_name (string): Output path of the feature file.
        count (int): Number of packets to process. Defaults to float('Inf').
        parse_type (string): Either scapy or tshark. Defaults to "scapy".

    File I/O:
        Resulting file is written to output_file_name.
    """
    print("parsing:", pcap_file)

    feature_extractor = FE(pcap_file, parse_type=parse_type)
    headers = feature_extractor.nstat.getNetStatHeaders()
    npy_array = []

    output_file = open(output_file_name, "w")
    np.savetxt(output_file, [headers], fmt="%s", delimiter=",")

    skipped = 0
    written = 0
    t = tqdm(total=count)
    pkt_index = 0
    while pkt_index < count:
        try:
            if parse_type == "scapy":
                traffic_data, _ = feature_extractor.get_next_vector()
            else:
                traffic_data = feature_extractor.get_next_vector()
        except EOFError as e:
            print("EOF Reached")
            print(e)
            break
        except ValueError as e:
            print("EOF Reached")
            print(e)
            break
        except StopIteration as e:
            print(e)
            print("EOF Reached")
            break

        pkt_index += 1
        t.update(1)
        if traffic_data == []:
            np.savetxt(output_file, np.full(
                features.shape, -1), delimiter=",")
            skipped += 1
            continue
        features = feature_extractor.nstat.updateGetStats(*traffic_data)

        if np.isnan(features).any():
            print(features)
            break
        
        npy_array.append(features)
        features = np.expand_dims(features, axis=0)
        np.savetxt(output_file, features, delimiter=",", fmt="%s")
        written += 1
    t.close()
    np.save(output_file_name[:-3] + "npy", np.asarray(npy_array))
    output_file.close()
    print("skipped:", skipped)
    print("written:", written)

In [None]:
# Extract features for attacks on devices
for device in device_attacks.keys():
    for attack in device_attacks[device]:
        in_path = dataset_path + "attack_samples/" + device + "/" + attack + "_" + device + ".pcap"
        out_path = save_path + device + "/" + attack + ".csv"
        parse_kitsune(in_path, out_path)

In [None]:
# Extract host discovery traffic samples
in_path = dataset_path + "attack_samples/Host_Discovery" + ".pcap"
out_path = save_path + "Host_Discovery" + ".csv"
parse_kitsune(in_path, out_path)

In [None]:
# Extract benign samples
in_path = dataset_path + "benign_samples/whole_week.pcap"
out_path = save_path + "benign" + ".csv"
parse_kitsune(in_path, out_path)