In [None]:
import scapy.all as scapy
from scapy.layers.inet import IP, TCP, UDP
import pandas as pd
from sklearn.ensemble import IsolationForest
import numpy as np

# List to store the packet features
features_list = []

# Packet capture callback function
def packet_callback(packet):
    if packet.haslayer(IP):
        ip_src = packet[IP].src
        ip_dst = packet[IP].dst
        protocol = packet[IP].proto
        packet_size = len(packet)
        
        # Store relevant packet features in the features list
        features = {
            "src_ip": ip_src,
            "dst_ip": ip_dst,
            "protocol": protocol,
            "packet_size": packet_size,
        }
        
        # Append features to the global list
        features_list.append(features)
        
        print(f"Captured packet from {ip_src} to {ip_dst} with size {packet_size} bytes.")
        
        # Check if we have enough data for anomaly detection
        if len(features_list) >= 100:  # Threshold for processing data
            df = preprocess_packet_data(features_list)
            detect_anomalies(df)
            features_list.clear()  # Reset for next batch of packets

# Function to preprocess the packet data into a structured format
def preprocess_packet_data(features_list):
    df = pd.DataFrame(features_list)
    
    # Normalize the packet size (between 0 and 1)
    df['packet_size'] = df['packet_size'] / df['packet_size'].max()
    
    # Feature engineering: Convert protocol to one-hot encoding
    df['protocol'] = df['protocol'].apply(lambda x: 1 if x == 6 else (2 if x == 17 else 0))  # TCP=6, UDP=17
    
    return df

# Anomaly detection using Isolation Forest
def detect_anomalies(df):
    model = IsolationForest(contamination=0.05)  # 5% contamination for anomaly detection
    df_for_model = df[['packet_size', 'protocol']]  # Selecting relevant features for detection
    
    # Fit the model and predict anomalies
    df['anomaly'] = model.fit_predict(df_for_model)
    
    # Detect anomalies
    anomalous_packets = df[df['anomaly'] == -1]
    
    if len(anomalous_packets) > 0:
        print("\n*** Anomaly Detected! ***")
        print(anomalous_packets)
        print("Anomalies detected in the network traffic.")
    else:
        print("\nNo anomalies detected in the network traffic.")

# Function to start capturing network traffic
def start_packet_capture():
    print("Starting network packet capture...")
    scapy.sniff(prn=packet_callback, store=0)

# Start the packet capture process
start_packet_capture()