In [2]:
from scapy.all import sniff
from scapy.layers.inet import IP, TCP, UDP
import pandas as pd
import numpy as np
from datetime import datetime
import logging
from collections import defaultdict
from typing import Dict, List, Tuple

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class FlowStats:
    def __init__(self):
        self.start_time = None
        self.last_time = None
        self.fwd_packets: List[Dict] = []
        self.bwd_packets: List[Dict] = []
        self.flow_iats: List[float] = []
        self.fwd_iats: List[float] = []
        self.bwd_iats: List[float] = []
        
        # TCP Flag counters
        self.fin_count = 0
        self.syn_count = 0
        self.rst_count = 0
        self.psh_count = 0
        self.ack_count = 0
        self.urg_count = 0
        self.cwe_count = 0
        self.ece_count = 0
        
        # Window sizes
        self.init_win_bytes_forward = None
        self.init_win_bytes_backward = None
        
        # Active/Idle time tracking
        self.active_times: List[float] = []
        self.idle_times: List[float] = []
        self.last_active_time = None
        self.active_threshold = 2.0  # seconds

    def update_tcp_flags(self, packet):
        if TCP in packet:
            flags = packet[TCP].flags
            if flags & 0x01: self.fin_count += 1  # FIN
            if flags & 0x02: self.syn_count += 1  # SYN
            if flags & 0x04: self.rst_count += 1  # RST
            if flags & 0x08: self.psh_count += 1  # PSH
            if flags & 0x10: self.ack_count += 1  # ACK
            if flags & 0x20: self.urg_count += 1  # URG
            if flags & 0x40: self.cwe_count += 1  # CWE
            if flags & 0x80: self.ece_count += 1  # ECE

    def add_packet(self, packet, is_forward: bool):
        current_time = datetime.now().timestamp()
        
        if self.start_time is None:
            self.start_time = current_time
            if TCP in packet:
                if is_forward:
                    self.init_win_bytes_forward = packet[TCP].window
                else:
                    self.init_win_bytes_backward = packet[TCP].window
        
        # Calculate IAT
        if self.last_time is not None:
            iat = current_time - self.last_time
            self.flow_iats.append(iat)
            if is_forward:
                self.fwd_iats.append(iat)
            else:
                self.bwd_iats.append(iat)
        
        packet_data = {
            'time': current_time,
            'length': len(packet),
            'header_length': len(packet[IP]),
        }
        
        if is_forward:
            self.fwd_packets.append(packet_data)
        else:
            self.bwd_packets.append(packet_data)
        
        self.last_time = current_time
        self.update_tcp_flags(packet)
        self.update_active_idle_times(current_time)

    def update_active_idle_times(self, current_time):
        if self.last_active_time is not None:
            idle_time = current_time - self.last_active_time
            if idle_time > self.active_threshold:
                self.idle_times.append(idle_time)
                self.active_times.append(self.active_threshold)
        self.last_active_time = current_time

    def get_stats(self) -> Dict:
        duration = self.last_time - self.start_time if self.last_time else 0
        
        # Calculate packet length statistics
        fwd_lengths = [p['length'] for p in self.fwd_packets]
        bwd_lengths = [p['length'] for p in self.bwd_packets]
        all_lengths = fwd_lengths + bwd_lengths
        
        stats = {
            'Flow Duration': duration,
            'Total Fwd Packets': len(self.fwd_packets),
            'Total Backward Packets': len(self.bwd_packets),
            'Total Length of Fwd Packets': sum(fwd_lengths),
            'Total Length of Bwd Packets': sum(bwd_lengths),
            
            # Packet length statistics
            'Fwd Packet Length Max': max(fwd_lengths) if fwd_lengths else 0,
            'Fwd Packet Length Min': min(fwd_lengths) if fwd_lengths else 0,
            'Fwd Packet Length Mean': np.mean(fwd_lengths) if fwd_lengths else 0,
            'Fwd Packet Length Std': np.std(fwd_lengths) if fwd_lengths else 0,
            'Bwd Packet Length Max': max(bwd_lengths) if bwd_lengths else 0,
            'Bwd Packet Length Min': min(bwd_lengths) if bwd_lengths else 0,
            'Bwd Packet Length Mean': np.mean(bwd_lengths) if bwd_lengths else 0,
            'Bwd Packet Length Std': np.std(bwd_lengths) if bwd_lengths else 0,
            
            # Flow rates
            'Flow Bytes/s': sum(all_lengths) / duration if duration else 0,
            'Flow Packets/s': len(all_lengths) / duration if duration else 0,
            
            # IAT statistics
            'Flow IAT Mean': np.mean(self.flow_iats) if self.flow_iats else 0,
            'Flow IAT Std': np.std(self.flow_iats) if self.flow_iats else 0,
            'Flow IAT Max': max(self.flow_iats) if self.flow_iats else 0,
            'Flow IAT Min': min(self.flow_iats) if self.flow_iats else 0,
            
            # Forward IAT statistics
            'Fwd IAT Total': sum(self.fwd_iats),
            'Fwd IAT Mean': np.mean(self.fwd_iats) if self.fwd_iats else 0,
            'Fwd IAT Std': np.std(self.fwd_iats) if self.fwd_iats else 0,
            'Fwd IAT Max': max(self.fwd_iats) if self.fwd_iats else 0,
            'Fwd IAT Min': min(self.fwd_iats) if self.fwd_iats else 0,
            
            # Backward IAT statistics
            'Bwd IAT Total': sum(self.bwd_iats),
            'Bwd IAT Mean': np.mean(self.bwd_iats) if self.bwd_iats else 0,
            'Bwd IAT Std': np.std(self.bwd_iats) if self.bwd_iats else 0,
            'Bwd IAT Max': max(self.bwd_iats) if self.bwd_iats else 0,
            'Bwd IAT Min': min(self.bwd_iats) if self.bwd_iats else 0,
            
            # Flag counts
            'FIN Flag Count': self.fin_count,
            'SYN Flag Count': self.syn_count,
            'RST Flag Count': self.rst_count,
            'PSH Flag Count': self.psh_count,
            'ACK Flag Count': self.ack_count,
            'URG Flag Count': self.urg_count,
            'CWE Flag Count': self.cwe_count,
            'ECE Flag Count': self.ece_count,
            
            # Header lengths
            'Fwd Header Length': sum(p['header_length'] for p in self.fwd_packets),
            'Bwd Header Length': sum(p['header_length'] for p in self.bwd_packets),
            
            # Window sizes
            'Init_Win_bytes_forward': self.init_win_bytes_forward or 0,
            'Init_Win_bytes_backward': self.init_win_bytes_backward or 0,
            
            # Active/Idle times
            'Active Mean': np.mean(self.active_times) if self.active_times else 0,
            'Active Std': np.std(self.active_times) if self.active_times else 0,
            'Active Max': max(self.active_times) if self.active_times else 0,
            'Active Min': min(self.active_times) if self.active_times else 0,
            'Idle Mean': np.mean(self.idle_times) if self.idle_times else 0,
            'Idle Std': np.std(self.idle_times) if self.idle_times else 0,
            'Idle Max': max(self.idle_times) if self.idle_times else 0,
            'Idle Min': min(self.idle_times) if self.idle_times else 0,
        }
        
        return stats

# Dictionary to store flow statistics
flows: Dict[Tuple, FlowStats] = defaultdict(FlowStats)

def get_flow_key(packet) -> Tuple:
    ip = packet[IP]
    if TCP in packet:
        proto = TCP
    elif UDP in packet:
        proto = UDP
    else:
        return None
    
    src_port = packet[proto].sport
    dst_port = packet[proto].dport
    
    # Create bidirectional flow key
    if f"{ip.src}:{src_port}" < f"{ip.dst}:{dst_port}":
        return (ip.src, ip.dst, src_port, dst_port, proto.name)
    return (ip.dst, ip.src, dst_port, src_port, proto.name)

def is_forward_direction(packet, flow_key) -> bool:
    ip = packet[IP]
    proto = TCP if TCP in packet else UDP
    return (ip.src == flow_key[0] and 
            ip.dst == flow_key[1] and 
            packet[proto].sport == flow_key[2])

def packet_handler(packet):
    try:
        if IP in packet and (TCP in packet or UDP in packet):
            flow_key = get_flow_key(packet)
            if flow_key:
                is_forward = is_forward_direction(packet, flow_key)
                flows[flow_key].add_packet(packet, is_forward)
                logger.info(f"Processed packet for flow: {flow_key}")
    except Exception as e:
        logger.error(f"Error processing packet: {e}")

def process_captured_data() -> pd.DataFrame:
    try:
        if not flows:
            raise ValueError("No flows captured.")
        
        flow_data = []
        for flow_key, stats in flows.items():
            flow_dict = {
                'Source IP': flow_key[0],
                'Destination IP': flow_key[1],
                'Source Port': flow_key[2],
                'Destination Port': flow_key[3],
                'Protocol': flow_key[4],
                **stats.get_stats()
            }
            flow_data.append(flow_dict)
        
        df = pd.DataFrame(flow_data)
        logger.info(f"Processed {len(df)} flows.")
        return df
    except Exception as e:
        logger.error(f"Error processing flow data: {e}")
        return pd.DataFrame()

def main():
    interface = 'Wi-Fi'  # Replace with your network interface
    packet_count = 1000  # Increased packet count for better flow statistics
    
    try:
        logger.info(f"Starting capture on interface {interface}")
        sniff(iface=interface, prn=packet_handler, count=packet_count)
        
        flow_data = process_captured_data()
        if not flow_data.empty:
            output_path = "flowdataset.csv"
            flow_data.to_csv(output_path, index=False)
            logger.info(f"Flow statistics saved to {output_path}")
        else:
            logger.warning("No flow data to analyze.")
    except Exception as e:
        logger.error(f"Error in main execution: {e}")

if __name__ == "__main__":
    main()


2024-12-22 14:00:37,022 - INFO - Starting capture on interface Wi-Fi
2024-12-22 14:00:37,122 - INFO - Processed packet for flow: ('10.20.16.57', '255.255.255.255', 68, 67, <member 'name' of 'Packet' objects>)
2024-12-22 14:00:37,257 - INFO - Processed packet for flow: ('10.20.32.198', '54.250.81.247', 50097, 443, <member 'name' of 'Packet' objects>)
2024-12-22 14:00:37,290 - INFO - Processed packet for flow: ('10.20.32.198', '13.107.226.254', 50221, 443, <member 'name' of 'Packet' objects>)
2024-12-22 14:00:37,297 - INFO - Processed packet for flow: ('10.20.32.198', '13.107.226.254', 50221, 443, <member 'name' of 'Packet' objects>)
2024-12-22 14:00:37,369 - INFO - Processed packet for flow: ('10.20.32.198', '54.250.81.247', 50097, 443, <member 'name' of 'Packet' objects>)
2024-12-22 14:00:37,746 - INFO - Processed packet for flow: ('0.0.0.0', '255.255.255.255', 68, 67, <member 'name' of 'Packet' objects>)
2024-12-22 14:00:38,053 - INFO - Processed packet for flow: ('10.20.29.180', '255