In [None]:
import os
from scapy.all import PcapReader, wrpcap

# Directional malicious IP addresses (Source and Destination must match)
directional_botnet_ips = {
    'IRC': [
        ('192.168.2.112', '131.202.243.84'),
        ('192.168.5.122', '198.164.30.2'),
        ('192.168.2.110', '192.168.5.122'),
        ('192.168.4.118', '192.168.5.122'),
        ('192.168.2.113', '192.168.5.122'),
        ('192.168.1.103', '192.168.5.122'),
        ('192.168.4.120', '192.168.5.122'),
        ('192.168.2.112', '192.168.2.110'),
        ('192.168.2.112', '192.168.4.120'),
        ('192.168.2.112', '192.168.1.103'),
        ('192.168.2.112', '192.168.2.113'),
        ('192.168.2.112', '192.168.4.118'),
        ('192.168.2.112', '192.168.2.109'),
        ('192.168.2.112', '192.168.2.105'),
        ('192.168.1.105', '192.168.5.122')
    ]
}

# Non-directional malicious IP addresses (Either Source or Destination matches)
non_directional_botnet_ips = {
    'Neris': ['147.32.84.180'],
    'RBot': ['147.32.84.170'],
    'Menti': ['147.32.84.150'],
    'Sogou': ['147.32.84.140'],
    'Murlo': ['147.32.84.130'],
    'Virut': ['147.32.84.160'],
    'IRCbot_and_black_hole1': ['10.0.2.15'],
    'Black_hole_2': ['192.168.106.141'],
    'Black_hole_3': ['192.168.106.131'],
    'TBot': ['172.16.253.130', '172.16.253.131', '172.16.253.129', '172.16.253.240'],
    'Weasel': ['74.78.117.238', '158.65.110.24'],
    'Zeus': ['192.168.3.35', '192.168.3.25', '192.168.3.65', '172.29.0.116'],
    'Osx_trojan': ['172.29.0.109'],
    'Zero_access': ['172.16.253.132', '192.168.248.165'],
    'Smoke_bot': ['10.37.130.4']
}

# Function to check if packet is malicious and identify its botnet type
def get_botnet_type(packet):
    if packet.haslayer('IP'):
        src_ip = packet['IP'].src
        dst_ip = packet['IP'].dst
        # Check directional botnet IPs
        for botnet, ip_pairs in directional_botnet_ips.items():
            for src, dst in ip_pairs:
                if src_ip == src and dst_ip == dst:
                    return botnet
        # Check non-directional botnet IPs
        for botnet, ips in non_directional_botnet_ips.items():
            if src_ip in ips or dst_ip in ips:
                return botnet
    return None

# Function to clean directories
def clean_directories(dirs):
    for dir in dirs:
        for file in os.listdir(dir):
            file_path = os.path.join(dir, file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(f"Error: {e}")

pcap_path = "../pcap/train"
# Create directories for malicious and benign packets
os.makedirs(f'{pcap_path}/malicious', exist_ok=True)
os.makedirs(f'{pcap_path}/benign', exist_ok=True)
clean_directories([f'{pcap_path}/malicious', f'{pcap_path}/benign'])

# Initialize packet storage
malicious_packets = {botnet: [] for botnet in {**directional_botnet_ips, **non_directional_botnet_ips}.keys()}
benign_packets = []

# Function to dump packets to files
def dump_packets():
    for botnet, pkt_list in malicious_packets.items():
        if pkt_list:
            file_name = f'{pcap_path}/malicious/{botnet}.pcap'
            wrpcap(file_name, pkt_list, append=True)
            malicious_packets[botnet] = []  # Reset list after dumping

    if benign_packets:
        wrpcap(f'{pcap_path}/benign/benign.pcap', benign_packets, append=True)
        benign_packets.clear()  # Reset list after dumping

# Read and process packets in chunks
chunk_size = 10000
packet_count = 0

with PcapReader(f'{pcap_path}/input.pcap') as pcap_reader:
    while True:
        packets = pcap_reader.read_all(count=chunk_size)
        if not packets:
            break

        for packet in packets:
            packet_count += 1
            botnet_type = get_botnet_type(packet)
            if botnet_type:
                malicious_packets[botnet_type].append(packet)
            else:
                benign_packets.append(packet)

        # Dump packets after each chunk
        dump_packets()
        print(f"Processed {packet_count} packets so far...")

# Final dump to ensure all packets are written
dump_packets()

print(f"Processed {packet_count} packets in total.")