In [39]:
from pcap_splitter.splitter import PcapSplitter
import os
import pandas as pd
import sklearn
import zat
import shutil

from zat.log_to_dataframe import LogToDataFrame
import numpy as np
from scapy.all import rdpcap

def list_files_in_directory(directory):
    file_list = []
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            file_list.append(filename)
    return file_list



def extract_5_tuple(pcap_file):
    tuples = set()  # Using a set to ensure unique tuples
    packets = rdpcap(pcap_file)
    
    print(f"found {len(packets)} packets in file")

    for idx, packet in enumerate(packets):
        if idx>0 and idx%50==0:
            print(".")
        if packet.haslayer('IP'):
            ip_layer = packet['IP']
            transport_layer = packet[ip_layer.payload.name]

            # Extracting the 5-tuple information
            src_ip = ip_layer.src
            dst_ip = ip_layer.dst
            src_port = transport_layer.sport
            dst_port = transport_layer.dport
            proto = ip_layer.proto

            # Forming the 5-tuple
            tuple_info = (src_ip, src_port, dst_ip, dst_port, proto)
            tuples.add(tuple_info)

    return tuples


def extract_fist_useful_tuple(pcap_file, useful_ip):
    packets = rdpcap(pcap_file)
    
    # print(f"found {len(packets)} packets in file")

    for idx, packet in enumerate(packets):
        if idx>0 and idx%50==0:
            print(".")
        if packet.haslayer('IP'):
            ip_layer = packet['IP']
            transport_layer = packet[ip_layer.payload.name]

            # Extracting the 5-tuple information
            src_ip = ip_layer.src
            dst_ip = ip_layer.dst
            if not hasattr(transport_layer, 'sport'):
                continue
            src_port = transport_layer.sport
            dst_port = transport_layer.dport
            proto = ip_layer.proto

            # Forming the 5-tuple
            tuple_info = (src_ip, src_port, dst_ip, dst_port, proto)
            if src_ip == useful_ip:
                return tuple_info

    return None


def move_files(file_list, source_folder, destination_folder):
    for file_name in file_list:
        source_file = os.path.join(source_folder, file_name)
        destination_file = os.path.join(destination_folder, file_name)
        
        try:
            shutil.move(source_file, destination_file)
        except FileNotFoundError:
            print(f"Error: {file_name} not found in {source_folder}")
        except Exception as e:
            print(f"Error while moving {file_name}: {e}")

In [None]:
# Good to print out versions of stuff
print('zat: {:s}'.format(zat.__version__))
print('Pandas: {:s}'.format(pd.__version__))
print('Numpy: {:s}'.format(np.__version__))
print('Scikit Learn Version:', sklearn.__version__)

# Splitting by flows:

In [74]:
"""
prefix_malware_1 = "../../../Downloads/iot_23_datasets_full/CTU-IoT-Malware-Capture-1-1/"
ps = PcapSplitter(prefix_malware_1 + "2018-05-09-192.168.100.103.pcap")
MALICIOUS_IP = "192.168.100.103"
!mkdir {prefix_malware_1}/splitted
print(ps.split_by_session(prefix_malware_1 + "splitted", pkts_bpf_filter=f"src host {MALICIOUS_IP}"))
"""
"""
prefix_bening_1 = "../../../Downloads/iot_23_datasets_full/CTU-Honeypot-Capture-4-1/"
ps = PcapSplitter(prefix_bening_1 + "2018-10-25-14-06-32-192.168.1.132.pcap")
VICTIM_IP = "192.168.1.132"
!mkdir {prefix_bening_1}/splitted
print(ps.split_by_session(prefix_bening_1 + "splitted", pkts_bpf_filter=f"src host {VICTIM_IP}"))
"""

"""
prefix_bening_2 = "../../../Downloads/iot_23_datasets_full/CTU-Honeypot-Capture-5-1/"
ps = PcapSplitter(prefix_bening_2 + "2018-09-21-capture.pcap")
VICTIM_IP = "192.168.2.3"
!mkdir -p {prefix_bening_2}/splitted
print(ps.split_by_session(prefix_bening_2 + "splitted", pkts_bpf_filter=f"src host {VICTIM_IP}"))
"""


prefix_bening_3 = "../../../Downloads/iot_23_datasets_full/CTU-Honeypot-Capture-7-1/Somfy-03/"
ps = PcapSplitter(prefix_bening_3 + "2019-07-04-16-41-10-192.168.1.158.pcap")
VICTIM_IP = "192.168.1.158"
!mkdir -p {prefix_bening_3}/splitted
print(ps.split_by_session(prefix_bening_3 + "splitted", pkts_bpf_filter=f"src host {VICTIM_IP}"))


"""
prefix_malware_2 = "../../../Downloads/iot_23_datasets_full/CTU-IoT-Malware-Capture-3-1/"
ps = PcapSplitter(prefix_malware_2 + "2018-05-21_capture.pcap")
MALICIOUS_IP = "192.168.2.5"
!mkdir {prefix_malware_2}/splitted
print(ps.split_by_session(prefix_malware_2 + "splitted", pkts_bpf_filter=f"src host {MALICIOUS_IP}"))
"""

"""
# To big to process...
prefix_malware_3 = "../../../Downloads/iot_23_datasets_full/CTU-IoT-Malware-Capture-7-1/"
ps = PcapSplitter(prefix_malware_3 + "2018-07-20-17-31-20-192.168.100.108.pcap")
MALICIOUS_IP = "192.168.100.108"
!mkdir  {prefix_malware_3}/splitted
# print(ps.split_by_session(prefix_malware_3 + "splitted", pkts_bpf_filter=f"src host {MALICIOUS_IP}"))
"""
"""
prefix_malware_4 = "../../../Downloads/iot_23_datasets_full/CTU-IoT-Malware-Capture-8-1/"
ps = PcapSplitter(prefix_malware_4 + "2018-07-31-15-15-09-192.168.100.113.pcap")
MALICIOUS_IP = "192.168.100.113"
!mkdir  {prefix_malware_4}/splitted
print(ps.split_by_session(prefix_malware_4 + "splitted", pkts_bpf_filter=f"src host {MALICIOUS_IP}"))
"""

"""
# NOT USED
prefix_malware_5 = "../../../Downloads/iot_23_datasets_full/CTU-IoT-Malware-Capture-9-1/"
ps = PcapSplitter(prefix_malware_5 + "2018-07-25-10-53-16-192.168.100.111.pcap")
MALICIOUS_IP = "192.168.100.111"
!mkdir  {prefix_malware_5}/splitted
print(ps.split_by_session(prefix_malware_5 + "splitted", pkts_bpf_filter=f"src host {MALICIOUS_IP}"))
"""

"""
prefix_malware_6 = "../../../Downloads/iot_23_datasets_full/CTU-IoT-Malware-Capture-17-1/"
ps = PcapSplitter(prefix_malware_6 + "2018-09-06-11-43-12-192.168.100.111.only15000000.pcap")
MALICIOUS_IP = "192.168.100.111"
!mkdir  {prefix_malware_6}/splitted
print(ps.split_by_session(prefix_malware_6 + "splitted", pkts_bpf_filter=f"src host {MALICIOUS_IP}"))
"""

prefix = prefix_bening_3
useful_ip=VICTIM_IP

2102.90s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Started...
Finished. Read and written 30012 packets to 16 files



In [71]:
prefix = prefix_bening_3
useful_ip=VICTIM_IP

In [75]:
files = list_files_in_directory(prefix + "splitted")

In [73]:
len(files)

29

# Zeek Analysis:

In [66]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(prefix + 'bro/conn.log.labeled', usecols = ['id.orig_h', 'id.orig_p', 'id.resp_h', 'id.resp_p', 'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

OSError: Could not read/access zeek log file: ../../../Downloads/iot_23_datasets_full/CTU-Honeypot-Capture-7-1/Somfy-02/bro/conn.log.labeled

In [54]:
bro_df['tunnel_parents   label   detailed-label'].unique()

['-   benign   -']
Categories (1, object): ['-   benign   -']

In [None]:
bro_df.groupby('tunnel_parents   label   detailed-label').count()

In [57]:
filered_df = bro_df[bro_df['tunnel_parents   label   detailed-label']=='-   benign   -']

# Flow Pcap Analysis:

The following code cell would take many ours to complete. However, it can be interrupted at any time. If the subsequent cells are the runned, the work done so far will be saved in disk by file moving... 

If we need more data, we can get it by running this cells when we want...

In [58]:
doorlock_start = []
echo = [] # Bening Victim Amazon Echo 
generic_cc = []
cc_heartbeat = []
okiru = []
generic_ddos = []
muhstik_botnet_file_names = []
horizontal_scan_file_names = []
bening_flow_file_names = []

for idx, current_file_name in enumerate(files):
    if idx%500==0:
        print(f'{idx/len(files)}% done')
    if idx>10000:
        break
    pcap_file = prefix + "splitted/" + current_file_name
    useful_tuple = extract_fist_useful_tuple(
        pcap_file=pcap_file,
        useful_ip=useful_ip)
    
    if useful_tuple is not None:
        
        match = filered_df[(filered_df['id.orig_h']==useful_tuple[0]) &\
            (filered_df['id.orig_p']==useful_tuple[1]) &\
                (filered_df['id.resp_h']==useful_tuple[2]) &\
                    (filered_df['id.resp_p']==useful_tuple[3])] 
        
        if len(match)>=1:
            if len(match['tunnel_parents   label   detailed-label'].unique()) > 1:
                print('discarding ambiguous 5-tuple')

            label = match['tunnel_parents   label   detailed-label'].iloc[0]

            if '-   benign   -' in label:
                doorlock_start.append(current_file_name)
            elif 'Malicious   Okiru' in label:
                okiru.append(current_file_name)
            elif 'Malicious   DDoS' in label:
                generic_ddos.append(current_file_name)
            elif 'C&C-HeartBeat' in label:
                cc_heartbeat.append(current_file_name)
            elif 'Malicious   Attack' in label:
                muhstik_botnet_file_names.append(current_file_name)
            elif 'Horizontal' in label:
                horizontal_scan_file_names.append(current_file_name)
            elif 'Malicious   C&C' in label:
                generic_cc.append(current_file_name)
            else:
                bening_flow_file_names.append(current_file_name)

0.0% done
.
.
.
.
.


In [59]:
print(len(doorlock_start))
print(len(okiru))
print(len(generic_ddos))
print(len(cc_heartbeat))

print(len(horizontal_scan_file_names))
print(len(bening_flow_file_names))
print(len(generic_cc))
print(len(muhstik_botnet_file_names))

4
0
0
0
0
0
0
0


In [60]:
"""
!mkdir -p {prefix}/okiru
!mkdir -p {prefix}/generic_ddos
!mkdir -p {prefix}/cc_heartbeat

!mkdir -p {prefix}/h_scan
!mkdir -p {prefix}/bening_traffic
!mkdir -p {prefix}/generic_cc
!mkdir -p {prefix}/muhstik
"""

!mkdir -p {prefix}/doorlock_start


1761.44s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


In [61]:
move_files(doorlock_start, prefix + "splitted", prefix + "doorlock_start")

move_files(okiru, prefix + "splitted", prefix + "okiru")
move_files(generic_ddos, prefix + "splitted", prefix + "generic_ddos")
move_files(cc_heartbeat, prefix + "splitted", prefix + "cc_heartbeat")

move_files(horizontal_scan_file_names, prefix + "splitted", prefix + "h_scan")
move_files(bening_flow_file_names, prefix + "splitted", prefix + "bening_traffic")
move_files(generic_cc, prefix + "splitted", prefix + "generic_cc")
move_files(muhstik_botnet_file_names, prefix + "splitted", prefix + "muhstik")

files = list_files_in_directory(prefix + "splitted")

In [63]:
doorlock_start_file_names = list_files_in_directory(prefix + "doorlock_start")
"""
horizontal_scan_file_names = list_files_in_directory(prefix + "horizontal_scan_flows")
bening_flow_file_names = list_files_in_directory(prefix + "bening_flows")
command_and_conquer_file_names = list_files_in_directory(prefix + "cc_flows")
muhstik_botnet_file_names = list_files_in_directory(prefix + "muhstik_botnet_flows")

print(len(horizontal_scan_file_names))
print(len(bening_flow_file_names))
print(len(command_and_conquer_file_names))
print(len(muhstik_botnet_file_names))
"""
print(len(doorlock_start_file_names))

4


# Where did we get the attacks?

## malicious:

- bening_traffic (for attacker) and h_scan came from capture1-1
- muhstik from capture 3-1
- okiru, cc_heartbeat and generic_ddos from capture 7-1

## bening:

- hue from honeypot-4-1
- echo from honeypot-5-1
- doorlock from honeypot-7-1 (Somfy-01, Somfy-02, and Somfy-03)
