In [13]:
import warnings
from scapy.all import *
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import deque
from datetime import datetime, timedelta

# Suppress warnings
warnings.filterwarnings("ignore")

# Dictionary to store previous connections with their timestamps
previous_connections = {}

# Mapping of numeric protocol types to encoded labels
protocol_mapping = {1: 0, 6: 1, 17: 2}

# Function to map protocol types to encoded labels
def map_protocol_type(protocol_type):
    return protocol_mapping.get(protocol_type, 'unknown')

# Function to calculate the count feature
def calculate_count(timestamp, dst_host):
    count = 0
    two_seconds_ago = timestamp - timedelta(seconds=2)
    # Iterate through previous connections
    for conn_time, conn_dst_host in previous_connections.items():
        # Check if the connection occurred within the past two seconds and to the same destination host
        if conn_time >= two_seconds_ago and conn_dst_host == dst_host:
            count += 1
    return count

# Function to extract features from a packet
def extract_features(packet):
    global previous_connections
    
    # Check if the packet contains an IP layer
    if IP in packet:
        # Extract protocol type
        protocol_type = packet.getlayer(IP).proto
        protocol_label = map_protocol_type(protocol_type)
        
        # Extract packet size
        src_bytes = len(packet)
        
        # Extract payload size
        dst_bytes = len(packet.payload)
        
        # Initialize feature variables
        hot = 0
        logged_in = 0
        
        # Check for presence of HTTP request
        if packet.haslayer(TCP) and packet[TCP].dport == 80 and packet.haslayer(Raw):
            http_request = str(packet[Raw].load)
            if 'GET' in http_request or 'POST' in http_request:
                hot = 1  # HTTP request detected
        
        # Check for presence of authentication packet (e.g., SSH, Telnet)
        if packet.haslayer(TCP) and packet.haslayer(Raw):
            if 'ssh' in str(packet[Raw].load).lower() or 'telnet' in str(packet[Raw].load).lower():
                logged_in = 1  # SSH or Telnet authentication detected
        
        # Extract destination host service count
        dst_host_srv_count = 1 if packet.haslayer(TCP) else 0
        
        # Extract destination host same source port count
        dst_host_same_src_port_count = 1 if packet.haslayer(TCP) and packet[TCP].sport == packet[TCP].dport else 0
        
        # Extract destination host service error count
        dst_host_srv_rerror_count = 1 if packet.haslayer(TCP) and packet[TCP].flags & 0x01 else 0
        
        # Extract destination host
        dst_host = packet[IP].dst
        
        # Get current timestamp
        timestamp = datetime.now()
        
        # Calculate count feature
        count = calculate_count(timestamp, dst_host)
        
        # Update previous_connections with the current connection
        previous_connections[timestamp] = dst_host
        
        # Prune previous_connections to keep only records within the past two seconds
        two_seconds_ago = timestamp - timedelta(seconds=2)
        previous_connections = {conn_time: conn_dst_host for conn_time, conn_dst_host in previous_connections.items() if conn_time >= two_seconds_ago}
        
        # Calculate destination host same source port rate
        dst_host_same_src_port_rate = dst_host_same_src_port_count / count if count > 0 else 0
        
        # Calculate destination host service error rate
        dst_host_srv_rerror_rate = dst_host_srv_rerror_count / dst_host_srv_count if dst_host_srv_count > 0 else 0
        
        # Return the extracted features as a DataFrame
        return pd.DataFrame({
            'protocol_type': [protocol_label],  # Updated to use the encoded label
            'src_bytes': [src_bytes],
            'dst_bytes': [dst_bytes],
            'hot': [hot],
            'logged_in': [logged_in],
            'count': [count],
            'dst_host_srv_count': [dst_host_srv_count],
            'dst_host_same_src_port_rate': [dst_host_same_src_port_rate],
            'dst_host_srv_rerror_rate': [dst_host_srv_rerror_rate]
        })

    else:
        # Return None if packet does not contain an IP layer
        return None

# Load the dataset
df = pd.read_csv("C:/Users/PRANAVYA DEEPTHI/Desktop/internship project/ds.csv")

# Select only the specified features
selected_features = ['protocol_type', 'src_bytes', 'dst_bytes', 'hot', 'logged_in', 'count', 'dst_host_srv_count',
                     'dst_host_same_src_port_rate', 'dst_host_srv_rerror_rate']
df = df[selected_features + ['class']]

# Apply label encoding to categorical columns
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['protocol_type'] = le.fit_transform(df['protocol_type'])

# Separate features (X) and target variable (y)
y = df['class']
X = df.drop('class', axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Load the XGBoost model
xgBoosterr = xgb.XGBClassifier(objective='binary:logistic', random_state=123)

# Fit the XGBoost model
xgBoosterr.fit(X_train, y_train)

# Compute accuracy on the test dataset
y_pred_test = xgBoosterr.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred_test)

# Print the test accuracy
print("Test Accuracy:", test_accuracy)

# Function to predict using XGBoost model
def predict_with_xgboost(features):
    # Make predictions using the XGBoost model
    prediction = xgBoosterr.predict(features)
    return prediction

# Function to process sniffed packets
def process_packet(packet):
    # Extract features from the packet
    features_df = extract_features(packet)
    # Check if features are extracted successfully
    
    if features_df is not None:
        # Make predictions using the XGBoost model
        prediction = predict_with_xgboost(features_df)
        print("Predicted Label:", prediction)
    else:
        print("Failed to extract features from packet.")

# Sniff packets and process them
def start_sniffing():
    # Sniff packets and process them on Wi-Fi interface
    sniff(prn=process_packet, iface="Wi-Fi", count=10)  # Sniff 10 packets

if __name__ == "__main__":
    start_sniffing()


Test Accuracy: 0.9966263147449891
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
Predicted Label: [0]
