In [None]:
from pcap_splitter.splitter import PcapSplitter
from scapy.all import rdpcap, wrpcap, IP, TCP
import ipaddress
import socket
import os
import pandas as pd
import sklearn
import zat
import shutil

from zat.log_to_dataframe import LogToDataFrame
import numpy as np
from scapy.all import rdpcap

import time
from tqdm.notebook import trange, tqdm
import dpkt


def list_files_in_directory(directory):
    file_list = []
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            file_list.append(filename)
    return file_list



def extract_5_tuple(pcap_file):
    tuples = set()  # Using a set to ensure unique tuples
    packets = rdpcap(pcap_file)
    
    print(f"found {len(packets)} packets in file")

    for idx, packet in enumerate(packets):
        if idx>0 and idx%50==0:
            print(".")
        if packet.haslayer('IP'):
            ip_layer = packet['IP']
            transport_layer = packet[ip_layer.payload.name]

            # Extracting the 5-tuple information
            src_ip = ip_layer.src
            dst_ip = ip_layer.dst
            src_port = transport_layer.sport
            dst_port = transport_layer.dport
            proto = ip_layer.proto

            # Forming the 5-tuple
            tuple_info = (src_ip, src_port, dst_ip, dst_port, proto)
            tuples.add(tuple_info)

    return tuples


def extract_fist_useful_tuple(pcap_file, useful_ip):
    packets = rdpcap(pcap_file)
    
    # print(f"found {len(packets)} packets in file")

    for idx, packet in enumerate(packets):
        if idx>0 and idx%50==0:
            print(".")
        if packet.haslayer('IP'):
            ip_layer = packet['IP']
            transport_layer = packet[ip_layer.payload.name]

            # Extracting the 5-tuple information
            src_ip = ip_layer.src
            dst_ip = ip_layer.dst
            if not hasattr(transport_layer, 'sport'):
                continue
            src_port = transport_layer.sport
            dst_port = transport_layer.dport
            proto = ip_layer.proto

            # Forming the 5-tuple
            tuple_info = (src_ip, src_port, dst_ip, dst_port, proto)
            if src_ip == useful_ip:
                return tuple_info

    return None


def move_files(file_list, source_folder, destination_folder):
    for file_name in file_list:
        source_file = os.path.join(source_folder, file_name)
        destination_file = os.path.join(destination_folder, file_name)

        try:
            shutil.move(source_file, destination_file)
        except FileNotFoundError:
            print(f"Error: {file_name} not found in {source_folder}")
        except Exception as e:
            print(f"Error while moving {file_name}: {e}")

In [None]:
# Good to print out versions of stuff
print('zat: {:s}'.format(zat.__version__))
print('Pandas: {:s}'.format(pd.__version__))
print('Numpy: {:s}'.format(np.__version__))
print('Scikit Learn Version:', sklearn.__version__)

# OKIRU attack:

## Metadata:

In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-IoT-Malware-Capture-7-1/'
CURR_FILE = CURR_FOLDER + "2018-07-20-17-31-20-192.168.100.108.pcap"

## ZAT:

In [None]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(CURR_FOLDER + 'bro/conn.log.labeled',
                                    usecols = ['id.orig_h',
                                               'id.orig_p',
                                               'id.resp_h',
                                               'id.resp_p',
                                               'orig_pkts',
                                               'resp_pkts',
                                               'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

In [None]:
bro_df['tunnel_parents   label   detailed-label'].unique()

In [None]:
filtered_df

In [None]:
def filter_packets(input_pcap_file, output_pcap_file):
    filtered_packets = []

    # Define filtering criteria
    source_ip = '192.168.100.108'
    source_ip = ipaddress.IPv4Address(source_ip).packed
    source_port = 5526
    dest_port = 37215
    packets_to_filter = 500000
    count = 0

    with open(input_pcap_file, 'rb') as f:
        pcap = dpkt.pcap.Reader(f)

        for idx, (timestamp, buf) in enumerate(pcap):
            eth = dpkt.ethernet.Ethernet(buf)
            ip = eth.data
            tcp = ip.data
            # if idx > 0 and idx % 50000==0: print(idx)
            # Filter packets based on the criteria
            if isinstance(ip, dpkt.ip.IP) and ip.src == source_ip \
                    and isinstance(tcp, dpkt.tcp.TCP) \
                    and tcp.sport == source_port \
                    and tcp.dport == dest_port:

                filtered_packets.append((timestamp, buf))
                count += 1
                if count > 0 and count % 50000 == 0:
                    print(count)
                if count >= packets_to_filter:
                    break

    # Write the filtered packets to another PCAP file
    with open(output_pcap_file, 'wb') as f:
        pcap_writer = dpkt.pcap.Writer(f)
        for timestamp, buf in filtered_packets:
            pcap_writer.writepkt(buf, timestamp)



filter_packets(CURR_FILE, CURR_FOLDER+"filtered.pcap")


# HScan:

In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-IoT-Malware-Capture-1-1/'
CURR_FILE = CURR_FOLDER + "2018-05-09-192.168.100.103.pcap"

In [None]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(CURR_FOLDER + 'bro/conn.log.labeled',
                                    usecols = ['id.orig_h',
                                               'id.orig_p',
                                               'id.resp_h',
                                               'id.resp_p',
                                               'orig_pkts',
                                               'resp_pkts',
                                               'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

In [None]:
bro_df['tunnel_parents   label   detailed-label'].unique()

In [None]:
filtered_df = bro_df[bro_df['tunnel_parents   label   detailed-label']=='(empty)   Malicious   PartOfAHorizontalPortScan']

In [None]:
filtered_df = filtered_df[filtered_df['id.orig_h'] == '192.168.100.103']

In [None]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"filtered.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.100.103'
source_ip = ipaddress.IPv4Address(source_ip).packed

packets_to_filter = 50000
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in enumerate(pcap):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        udp = ip.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) and ip.src == source_ip:
            match = filtered_df[(filtered_df['id.orig_p'] == udp.sport) & \
                                (filtered_df['id.resp_p'] == udp.dport) & \
                                (filtered_df['id.resp_h'] == socket.inet_ntoa(ip.dst))]
            if len(match) > 0:

                filtered_packets.append((timestamp, buf))
                count += 1
                if count > 0 and count % 1000 == 0:
                    print(count)
                if count >= packets_to_filter:
                    break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)





# CC HeartBeat

In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-IoT-Malware-Capture-7-1/'
CURR_FILE = CURR_FOLDER + "2018-07-20-17-31-20-192.168.100.108.pcap"

In [None]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(CURR_FOLDER + 'bro/conn.log.labeled',
                                    usecols = ['id.orig_h',
                                               'id.orig_p',
                                               'id.resp_h',
                                               'id.resp_p',
                                               'orig_pkts',
                                               'resp_pkts',
                                               'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

In [None]:
bro_df['tunnel_parents   label   detailed-label'].unique()

In [None]:
filtered_df = bro_df[bro_df['tunnel_parents   label   detailed-label']=='(empty)   Malicious   C&C-HeartBeat']

In [None]:
filtered_df = filtered_df[filtered_df['id.orig_h'] == '192.168.100.108']

In [None]:
filtered_df

In [None]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"cc_heartbeat.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.100.108'
source_ip = ipaddress.IPv4Address(source_ip).packed

packets_to_filter = 1500
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in enumerate(pcap):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        udp = ip.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) and isinstance(udp, dpkt.tcp.TCP) and ip.src == source_ip:
            match = filtered_df[(filtered_df['id.orig_p'] == udp.sport) & \
                                (filtered_df['id.resp_p'] == udp.dport) & \
                                (filtered_df['id.resp_h'] == socket.inet_ntoa(ip.dst))]
            if len(match) > 0:

                filtered_packets.append((timestamp, buf))
                count += 1
                if count > 0 and count % 10 == 0:
                    print(count)
                if count >= packets_to_filter:
                    break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)


# GEneric DOS

In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-IoT-Malware-Capture-7-1/'
CURR_FILE = CURR_FOLDER + "2018-07-20-17-31-20-192.168.100.108.pcap"

In [None]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(CURR_FOLDER + 'bro/conn.log.labeled',
                                    usecols = ['id.orig_h',
                                               'id.orig_p',
                                               'id.resp_h',
                                               'id.resp_p',
                                               'orig_pkts',
                                               'resp_pkts',
                                               'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

In [None]:
bro_df['tunnel_parents   label   detailed-label'].unique()

In [None]:
filtered_df = bro_df[bro_df['tunnel_parents   label   detailed-label']=='(empty)   Malicious   DDoS']

In [None]:
filtered_df = filtered_df[filtered_df['id.orig_h'] == '192.168.100.108']

In [None]:
filtered_df

In [None]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"generic-ddos.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.100.108'
source_ip = ipaddress.IPv4Address(source_ip).packed
target_ip = '64.237.233.111'
target_ip = ipaddress.IPv4Address(target_ip).packed

packets_to_filter = 500000
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in tqdm(enumerate(pcap)):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        udp = ip.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) \
                and ip.dst == target_ip \
                and ip.src == source_ip \
                and isinstance(udp, dpkt.udp.UDP) \
                and udp.dport == 80:

            match = filtered_df[filtered_df['id.orig_p'] == udp.sport]
            if len(match) > 0:
                filtered_packets.append((timestamp, buf))
                count += 1
                if count > 0 and count % 100 == 0:
                    print(count)
                if count >= packets_to_filter:
                    break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)


In [None]:
and ip.dst == target_ip \
            and udp.dport == 80:

# Bening Traffic from victims:

## Doorlock:

In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-Honeypot-Capture-7-1/Somfy-01/'
CURR_FILE = CURR_FOLDER + "2019-07-03-15-15-47-first_start_somfy_gateway.pcap"

In [None]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(CURR_FOLDER + 'bro/conn.log.labeled',
                                    usecols = ['id.orig_h',
                                               'id.orig_p',
                                               'id.resp_h',
                                               'id.resp_p',
                                               'orig_pkts',
                                               'resp_pkts',
                                               'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

In [None]:
filtered_df = bro_df[bro_df['id.orig_h'] == '192.168.1.158']

In [None]:
filtered_df

In [None]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"doorlock_1.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.1.158'
source_ip = ipaddress.IPv4Address(source_ip).packed

packets_to_filter = 500000
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in enumerate(pcap):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) and ip.src == source_ip:

            filtered_packets.append((timestamp, buf))
            count += 1
            if count > 0 and count % 1000 == 0:
                print(count)
            if count >= packets_to_filter:
                break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)


In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-Honeypot-Capture-7-1/Somfy-02/'
CURR_FILE = CURR_FOLDER + "2019-07-03-16-41-09-192.168.1.158.pcap"

In [None]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"doorlock_2.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.1.158'
source_ip = ipaddress.IPv4Address(source_ip).packed

packets_to_filter = 500000
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in enumerate(pcap):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) and ip.src == source_ip:

            filtered_packets.append((timestamp, buf))
            count += 1
            if count > 0 and count % 1000 == 0:
                print(count)
            if count >= packets_to_filter:
                break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)

In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-Honeypot-Capture-7-1/Somfy-03/'
CURR_FILE = CURR_FOLDER + "2019-07-04-16-41-10-192.168.1.158.pcap"

In [None]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"doorlock_3.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.1.158'
source_ip = ipaddress.IPv4Address(source_ip).packed

packets_to_filter = 500000
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in enumerate(pcap):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) and ip.src == source_ip:

            filtered_packets.append((timestamp, buf))
            count += 1
            if count > 0 and count % 1000 == 0:
                print(count)
            if count >= packets_to_filter:
                break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)

# Echo

In [None]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-Honeypot-Capture-5-1/'
CURR_FILE = CURR_FOLDER + "2018-09-21-capture.pcap"

In [None]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(CURR_FOLDER + 'bro/conn.log.labeled',
                                    usecols = ['id.orig_h',
                                               'id.orig_p',
                                               'id.resp_h',
                                               'id.resp_p',
                                               'orig_pkts',
                                               'resp_pkts',
                                               'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

In [None]:
bro_df.sort_values('orig_pkts')

In [None]:
bro_df['tunnel_parents   label   detailed-label'].unique()

In [None]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"echo.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.2.3'
source_ip = ipaddress.IPv4Address(source_ip).packed

packets_to_filter = 1000000
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in enumerate(pcap):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) and ip.src == source_ip:

            filtered_packets.append((timestamp, buf))
            count += 1
            if count > 0 and count % 1000 == 0:
                print(count)
            if count >= packets_to_filter:
                break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)

# Hue:

In [230]:
DATASET_ROOT = '/home/jovyan/nfs/jcevallos/datasets/iot23_full/'
CURR_FOLDER = DATASET_ROOT + 'CTU-Honeypot-Capture-4-1/'
CURR_FILE = CURR_FOLDER + "2018-10-25-14-06-32-192.168.1.132.pcap"

In [231]:
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(CURR_FOLDER + 'bro/conn.log.labeled',
                                    usecols = ['id.orig_h',
                                               'id.orig_p',
                                               'id.resp_h',
                                               'id.resp_p',
                                               'orig_pkts',
                                               'resp_pkts',
                                               'tunnel_parents   label   detailed-label'])

# Print out the head of the dataframe
bro_df.head()

Unnamed: 0_level_0,id.orig_h,id.orig_p,id.resp_h,id.resp_p,orig_pkts,resp_pkts,tunnel_parents label detailed-label
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-10-25 12:08:22.538640128,192.168.1.132,58687,216.239.35.4,123,1,1,- benign -
2018-10-25 12:06:37.400158976,192.168.1.132,1900,239.255.255.250,1900,24,0,- benign -
2018-10-25 12:09:45.734088960,192.168.1.132,32893,216.239.35.8,123,1,1,- benign -
2018-10-25 12:17:11.302625024,192.168.1.132,53395,2.16.60.82,443,5,3,- benign -
2018-10-25 12:17:11.265404928,192.168.1.132,52801,192.168.1.1,53,1,1,- benign -


In [232]:
bro_df['tunnel_parents   label   detailed-label'].unique()

['-   benign   -']
Categories (1, object): ['-   benign   -']

In [233]:
input_pcap_file = CURR_FILE
output_pcap_file = CURR_FOLDER+"hue_2.pcap"

filtered_packets = []

# Define filtering criteria
source_ip = '192.168.1.132'
source_ip = ipaddress.IPv4Address(source_ip).packed

packets_to_filter = 1000000
count = 0

with open(input_pcap_file, 'rb') as f:
    pcap = dpkt.pcap.Reader(f)

    for idx, (timestamp, buf) in enumerate(pcap):
        eth = dpkt.ethernet.Ethernet(buf)
        ip = eth.data
        # Filter packets based on the criteria
        if isinstance(ip, dpkt.ip.IP) and ip.src == source_ip:

            filtered_packets.append((timestamp, buf))
            count += 1
            if count > 0 and count % 1000 == 0:
                print(count)
            if count >= packets_to_filter:
                break

# Write the filtered packets to another PCAP file
with open(output_pcap_file, 'wb') as f:
    pcap_writer = dpkt.pcap.Writer(f)
    for timestamp, buf in filtered_packets:
        pcap_writer.writepkt(buf, timestamp)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
