In [11]:
from scapy.all import sniff, IP, TCP, UDP
import pandas as pd
import numpy as np
from collections import defaultdict

flows = defaultdict(lambda: {
    'packets': [],
    'timestamps': [],
    'fwd_packets': 0,
    'bwd_packets': 0,
    'fwd_bytes': 0,
    'bwd_bytes': 0,
    'fwd_packet_lengths': [],
    'bwd_packet_lengths': [],
    'fwd_iat': [],
    'bwd_iat': [],
    'fwd_flags': defaultdict(int),
    'bwd_flags': defaultdict(int),
    'fwd_header_length': 0,
    'bwd_header_length': 0,
    'start_time': None,
    'end_time': None,
    'protocol': None,
    'src_ip': None,
    'dst_ip': None,
    'src_port': None,
    'dst_port': None,
})

def process_packet(packet):
    if IP in packet:
        src_ip = packet[IP].src
        dst_ip = packet[IP].dst
        protocol = packet[IP].proto
        src_port = None
        dst_port = None

        if TCP in packet:
            src_port = packet[TCP].sport
            dst_port = packet[TCP].dport
            flags = packet[TCP].flags
        elif UDP in packet:
            src_port = packet[UDP].sport
            dst_port = packet[UDP].dport
            flags = None

        flow_key = (src_ip, dst_ip, src_port, dst_port, protocol)

        flows[flow_key]['packets'].append(packet)
        flows[flow_key]['timestamps'].append(packet.time)
        flows[flow_key]['protocol'] = protocol
        flows[flow_key]['src_ip'] = src_ip
        flows[flow_key]['dst_ip'] = dst_ip
        flows[flow_key]['src_port'] = src_port
        flows[flow_key]['dst_port'] = dst_port

        if flows[flow_key]['start_time'] is None:
            flows[flow_key]['start_time'] = packet.time
        flows[flow_key]['end_time'] = packet.time

        packet_length = len(packet)
        if src_ip == flow_key[0]: 
            flows[flow_key]['fwd_packets'] += 1
            flows[flow_key]['fwd_bytes'] += packet_length
            flows[flow_key]['fwd_packet_lengths'].append(packet_length)
            if len(flows[flow_key]['fwd_packet_lengths']) > 1:
                flows[flow_key]['fwd_iat'].append(packet.time - flows[flow_key]['timestamps'][-2])
            if TCP in packet:
                for flag in ['F', 'S', 'R', 'P', 'A', 'U']:
                    if flags & getattr(TCP, flag + '_FLAG'):
                        flows[flow_key]['fwd_flags'][flag] += 1
            flows[flow_key]['fwd_header_length'] += len(packet[IP]) - len(packet[IP].payload)
        else: 
            flows[flow_key]['bwd_packets'] += 1
            flows[flow_key]['bwd_bytes'] += packet_length
            flows[flow_key]['bwd_packet_lengths'].append(packet_length)
            if len(flows[flow_key]['bwd_packet_lengths']) > 1:
                flows[flow_key]['bwd_iat'].append(packet.time - flows[flow_key]['timestamps'][-2])
            if TCP in packet:
                for flag in ['F', 'S', 'R', 'P', 'A', 'U']:
                    if flags & getattr(TCP, flag + '_FLAG'):
                        flows[flow_key]['bwd_flags'][flag] += 1
            flows[flow_key]['bwd_header_length'] += len(packet[IP]) - len(packet[IP].payload)

print("Capturing packets for 10 seconds...")
sniff(prn=process_packet, timeout=10)

flow_data = []
for flow_key, flow_info in flows.items():
    if len(flow_info['packets']) == 0:
        continue

    flow_duration = flow_info['end_time'] - flow_info['start_time']
    total_fwd_packets = flow_info['fwd_packets']
    total_bwd_packets = flow_info['bwd_packets']
    total_fwd_bytes = flow_info['fwd_bytes']
    total_bwd_bytes = flow_info['bwd_bytes']

    fwd_packet_lengths = flow_info['fwd_packet_lengths']
    bwd_packet_lengths = flow_info['bwd_packet_lengths']
    fwd_packet_length_max = max(fwd_packet_lengths) if fwd_packet_lengths else 0
    fwd_packet_length_min = min(fwd_packet_lengths) if fwd_packet_lengths else 0
    fwd_packet_length_mean = np.mean(fwd_packet_lengths) if fwd_packet_lengths else 0
    fwd_packet_length_std = np.std(fwd_packet_lengths) if fwd_packet_lengths else 0
    fwd_packet_length_var = np.var(fwd_packet_lengths) if fwd_packet_lengths else 0

    bwd_packet_length_max = max(bwd_packet_lengths) if bwd_packet_lengths else 0
    bwd_packet_length_min = min(bwd_packet_lengths) if bwd_packet_lengths else 0
    bwd_packet_length_mean = np.mean(bwd_packet_lengths) if bwd_packet_lengths else 0
    bwd_packet_length_std = np.std(bwd_packet_lengths) if bwd_packet_lengths else 0
    bwd_packet_length_var = np.var(bwd_packet_lengths) if bwd_packet_lengths else 0

    fwd_iat = flow_info['fwd_iat']
    bwd_iat = flow_info['bwd_iat']
    fwd_iat_mean = np.mean(fwd_iat) if fwd_iat else 0
    fwd_iat_std = np.std(fwd_iat) if fwd_iat else 0
    fwd_iat_max = max(fwd_iat) if fwd_iat else 0
    fwd_iat_min = min(fwd_iat) if fwd_iat else 0

    bwd_iat_mean = np.mean(bwd_iat) if bwd_iat else 0
    bwd_iat_std = np.std(bwd_iat) if bwd_iat else 0
    bwd_iat_max = max(bwd_iat) if bwd_iat else 0
    bwd_iat_min = min(bwd_iat) if bwd_iat else 0

    flow_bytes_per_sec = (total_fwd_bytes + total_bwd_bytes) / flow_duration if flow_duration > 0 else 0
    flow_packets_per_sec = (total_fwd_packets + total_bwd_packets) / flow_duration if flow_duration > 0 else 0

    fin_flag_count = flow_info['fwd_flags']['F'] + flow_info['bwd_flags']['F']
    syn_flag_count = flow_info['fwd_flags']['S'] + flow_info['bwd_flags']['S']
    rst_flag_count = flow_info['fwd_flags']['R'] + flow_info['bwd_flags']['R']
    psh_flag_count = flow_info['fwd_flags']['P'] + flow_info['bwd_flags']['P']
    ack_flag_count = flow_info['fwd_flags']['A'] + flow_info['bwd_flags']['A']
    urg_flag_count = flow_info['fwd_flags']['U'] + flow_info['bwd_flags']['U']

    fwd_header_length = flow_info['fwd_header_length']
    bwd_header_length = flow_info['bwd_header_length']

    flow_data.append({
        'src_ip': flow_info['src_ip'],
        'dst_ip': flow_info['dst_ip'],
        'protocol': flow_info['protocol'],
        'flow_duration': flow_duration,
        'total_fwd_packets': total_fwd_packets,
        'total_bwd_packets': total_bwd_packets,
        'fwd_packets_length_total': total_fwd_bytes,
        'bwd_packets_length_total': total_bwd_bytes,
        'fwd_packet_length_max': fwd_packet_length_max,
        'fwd_packet_length_min': fwd_packet_length_min,
        'fwd_packet_length_mean': fwd_packet_length_mean,
        'fwd_packet_length_std': fwd_packet_length_std,
        'fwd_packet_length_var': fwd_packet_length_var,
        'bwd_packet_length_max': bwd_packet_length_max,
        'bwd_packet_length_min': bwd_packet_length_min,
        'bwd_packet_length_mean': bwd_packet_length_mean,
        'bwd_packet_length_std': bwd_packet_length_std,
        'bwd_packet_length_var': bwd_packet_length_var,
        'flow_bytes_per_sec': flow_bytes_per_sec,
        'flow_packets_per_sec': flow_packets_per_sec,
        'fwd_iat_mean': fwd_iat_mean,
        'fwd_iat_std': fwd_iat_std,
        'fwd_iat_max': fwd_iat_max,
        'fwd_iat_min': fwd_iat_min,
        'bwd_iat_mean': bwd_iat_mean,
        'bwd_iat_std': bwd_iat_std,
        'bwd_iat_max': bwd_iat_max,
        'bwd_iat_min': bwd_iat_min,
        'fin_flag_count': fin_flag_count,
        'syn_flag_count': syn_flag_count,
        'rst_flag_count': rst_flag_count,
        'psh_flag_count': psh_flag_count,
        'ack_flag_count': ack_flag_count,
        'urg_flag_count': urg_flag_count,
        'fwd_header_length': fwd_header_length,
        'bwd_header_length': bwd_header_length,
    })

flow_df = pd.DataFrame(flow_data)
flow_df.head()

Capturing packets for 10 seconds...




Unnamed: 0,src_ip,dst_ip,protocol,flow_duration,total_fwd_packets,total_bwd_packets,fwd_packets_length_total,bwd_packets_length_total,fwd_packet_length_max,fwd_packet_length_min,...,bwd_iat_max,bwd_iat_min,fin_flag_count,syn_flag_count,rst_flag_count,psh_flag_count,ack_flag_count,urg_flag_count,fwd_header_length,bwd_header_length
0,140.233.190.126,192.168.1.73,17,0.0,1,0,325,0,325,325,...,0,0,0,0,0,0,0,0,20,0
1,192.168.1.73,13.67.9.5,6,0.0,1,0,55,0,55,55,...,0,0,0,0,0,0,0,0,0,0
