# Statistics packets flow

In [101]:
import dpkt
import socket
from collections import defaultdict

def load_pcap(file_name):
    ip_data = defaultdict(list)
    start_time = None

    with open(file_name, 'rb') as f:
        pcap = dpkt.pcap.Reader(f)
        for timestamp, buf in pcap:
            eth = dpkt.ethernet.Ethernet(buf)
            if isinstance(eth.data, dpkt.ip.IP):
                ip = eth.data
                src_ip = socket.inet_ntoa(ip.src)
                dst_ip = socket.inet_ntoa(ip.dst)

                # Initialize start_time for the first packet
                if start_time is None:
                    start_time = timestamp

                time = timestamp - start_time

                ip_data[(src_ip, dst_ip)].append(time)

    return ip_data

import plotly.graph_objs as go

def filter_ip_pair_data(ip_data, ip_pair):
    # Check if the IP pair exists in either direction
    if ip_pair in ip_data:
        return ip_data[ip_pair]
    elif (ip_pair[1], ip_pair[0]) in ip_data:
        return ip_data[(ip_pair[1], ip_pair[0])]
    else:
        return defaultdict(int)  # Return empty if no data found

In [102]:
# list files in current directory names "packets-*.pcap"
import os
pcap_files = [f for f in os.listdir('.') if os.path.isfile(f) and f.startswith('packets-') and f.endswith('.pcap')]

ip_datas = {}

# load_pcap for each file
for pcap_file in pcap_files:
    print("Loading", pcap_file)
    ip_data = load_pcap(pcap_file)
    ip_datas[pcap_file] = ip_data

Loading packets-dnstt-20240910-154444.pcap
Loading packets-iodine-20240910-155333.pcap
Loading packets-dns2tcp-20240910-154930.pcap
Loading packets-dnscat2-20240910-155449.pcap


In [103]:
from ipywidgets import Output
import plotly.graph_objs as go

# Function to compute the global bin size based on visible data
def compute_bin_size_dynamic(protocol_packets, visible_traces, desired_bins=100):
    # Find the min and max time across only visible traces
    max_time = float('-inf')

    for protocol, packet_time in protocol_packets.items():
        if visible_traces[protocol]:  # Only consider visible traces
            max_time = max(max_time, max(packet_time))

    # Compute the bin size based on the total range and the number of desired bins
    bin_size = max_time / desired_bins
    return bin_size, 0, max_time

def plot_packets(protocol_packets):
    # Dictionary to track visible state of each protocol
    visible_traces = defaultdict(lambda: True)
    
    # Create the Plotly figure
    fig = go.Figure()

    # Add traces for each protocol
    for protocol, packet_time in protocol_packets.items():
        fig.add_trace(go.Histogram(
            x=packet_time,
            name=protocol,
            xbins=dict(start=min(packet_time), size=1, end=max(packet_time)),
            visible=True
        ))

    # Update layout with titles and legend
    fig.update_layout(
        title=f"Packets per Second Histogram",
        xaxis_title="Time (seconds)",
        yaxis_title="Packet Count",
        showlegend=True,
        barmode='overlay',
    )

    # Display the figure
    out = Output()
    display(out)
    
    with out:
        fig.show()

    # Define the callback for dynamic legend click event
    def update_bins(trace, points, selector):
        # Toggle visibility of the clicked trace
        visible_traces[trace['name']] = not trace.visible
        
        # Recompute the bin size based on visible traces
        bin_size, min_time, max_time = compute_bin_size_dynamic(protocol_packets, visible_traces)
        print(bin_size, min_time, max_time)

        # Update all traces with the new bin size and global start/end times
        for i, protocol in enumerate(protocol_packets.keys()):
            fig.data[i].xbins.start = min_time
            fig.data[i].xbins.end = max_time
            fig.data[i].xbins.size = bin_size
        
        # Refresh the figure
        fig.update_traces()
    
    # Attach the callback to legend click events
    fig.data[0].on_click(update_bins)

In [104]:
# Now filter and plot for a specific IP pair
ip_pair = ('172.22.0.3', '172.22.0.2')
filtered_ip_data = {}
for pcap_file, ip_data in ip_datas.items():
    filtered_ip_data[pcap_file] = filter_ip_pair_data(ip_data, ip_pair)


In [105]:
plot_packets(filtered_ip_data)

Output()