In [5]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import time
from collections import Counter
import math
try:
    from scapy.all import sniff, IP, TCP, UDP, get_if_list
except ImportError as e:
    print(f'Scapy import error: {e}. Ensure Scapy is installed (`pip install scapy`) and no file is named \'scapy.py\'.')
    exit(1)

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    print('Class Distribution (is_attack):')
    print(df['is_attack'].value_counts(normalize=True))
    features = ['time_delta', 'pps', 'length', 'src_ip_entropy', 'syn_flag', 'ack_flag']
    X = df[features]
    y = df['is_attack']
    X = X.fillna(X.mean())
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, features, scaler

def train_base_models(X_train, y_train, X_test):
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    xgb_model = xgb.XGBClassifier(eval_metric='logloss', random_state=42)
    rf_model.fit(X_train, y_train)
    xgb_model.fit(X_train, y_train)
    rf_pred_train = rf_model.predict_proba(X_train)[:, 1]
    xgb_pred_train = xgb_model.predict_proba(X_train)[:, 1]
    rf_pred_test = rf_model.predict_proba(X_test)[:, 1]
    xgb_pred_test = xgb_model.predict_proba(X_test)[:, 1]
    meta_features_train = np.column_stack((rf_pred_train, xgb_pred_train))
    meta_features_test = np.column_stack((rf_pred_test, xgb_pred_test))
    return meta_features_train, meta_features_test, rf_model, xgb_model

def train_stacking_ensemble(meta_features_train, y_train, meta_features_test):
    meta_learner = LogisticRegression(random_state=42)
    meta_learner.fit(meta_features_train, y_train)
    final_predictions = meta_learner.predict(meta_features_test)
    return final_predictions, meta_learner

def evaluate_model(y_test, predictions):
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, zero_division=0)
    recall = recall_score(y_test, predictions, zero_division=0)
    f1 = f1_score(y_test, predictions, zero_division=0)
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1-Score: {f1:.4f}')
    cm = confusion_matrix(y_test, predictions)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig('confusion_matrix.png')
    plt.close()
    return cm

def plot_feature_importance(rf_model, features):
    importances = rf_model.feature_importances_
    indices = np.argsort(importances)[::-1]
    plt.figure(figsize=(8, 6))
    plt.title('Feature Importance (Random Forest)')
    plt.bar(range(len(features)), importances[indices], align='center')
    plt.xticks(range(len(features)), [features[i] for i in indices], rotation=45)
    plt.xlabel('Features')
    plt.ylabel('Importance')
    plt.tight_layout()
    plt.savefig('feature_importance.png')
    plt.close()

def calculate_entropy(ip_list):
    counts = Counter(ip_list)
    total = sum(counts.values())
    entropy = -sum((count/total) * math.log2(count/total) for count in counts.values()) if total > 0 else 0
    return entropy

def predict_packet(features, scaler, rf_model, xgb_model, meta_learner):
    features_scaled = scaler.transform([features])
    rf_pred = rf_model.predict_proba(features_scaled)[:, 1]
    xgb_pred = xgb_model.predict_proba(features_scaled)[:, 1]
    meta_features = np.column_stack((rf_pred, xgb_pred))
    prediction = meta_learner.predict(meta_features)[0]
    return prediction

def realtime_detection(interface='Wi-Fi', target_ip='192.168.0.110', window_size=10, output_log='detection_log.txt'):
    try:
        meta_learner = joblib.load('stacking_model.pkl')
        rf_model = joblib.load('rf_model.pkl')
        xgb_model = joblib.load('xgb_model.pkl')
        scaler = joblib.load('scaler.pkl')
    except FileNotFoundError as e:
        print(f'Model file not found: {e}. Run training first to generate .pkl files.')
        exit(1)
    
    available_interfaces = get_if_list()
    if interface not in available_interfaces:
        print(f'Interface \'{interface}\' not found. Available interfaces: {available_interfaces}')
        exit(1)
    
    packets = []
    log_file = open(output_log, 'w')
    
    def process_packet(packet):
        if packet.haslayer(IP) and (packet[IP].src == target_ip or packet[IP].dst == target_ip):
            pkt_time = time.time()
            packets.append((pkt_time, packet))
            packets[:] = [p for p in packets if pkt_time - window_size <= p[0]]
            
            if len(packets) >= 2:
                pkt = packet
                time_delta = pkt_time - packets[-2][0] if len(packets) > 1 else 0
                pps = len(packets) / window_size
                src_ips = [p[IP].src for t, p in packets]
                src_ip_entropy = calculate_entropy(src_ips)
                length = len(pkt)
                syn_flag = 1 if pkt.haslayer(TCP) and pkt[TCP].flags & 0x02 else 0
                ack_flag = 1 if pkt.haslayer(TCP) and pkt[TCP].flags & 0x10 else 0
                
                features = [time_delta, pps, length, src_ip_entropy, syn_flag, ack_flag]
                
                is_attack = predict_packet(features, scaler, rf_model, xgb_model, meta_learner)
                
                result = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {'Attack Detected!' if is_attack else 'No Attack'} " \
                         f"(pps={pps:.2f}, time_delta={time_delta:.4f}, syn_flag={syn_flag})"
                print(result)
                log_file.write(result + '\n')
                log_file.flush()
    
    print(f'Starting real-time DDoS detection on {interface} targeting {target_ip}...')
    try:
        sniff(iface=interface, prn=process_packet, store=False)
    except PermissionError:
        print('Permission denied. Run the script as administrator or install npcap (https://nmap.org/npcap/).')
    except KeyboardInterrupt:
        print('Stopping detection...')
    finally:
        log_file.close()

def main():
    file_path = r'C:\Users\DELL\Desktop\Final_year_Project\Datas\DDoS\Correct\One-attack, labeled.csv'
    X, y, features, scaler = load_and_preprocess_data(file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    meta_features_train, meta_features_test, rf_model, xgb_model = train_base_models(X_train, y_train, X_test)
    final_predictions, meta_learner = train_stacking_ensemble(meta_features_train, y_train, meta_features_test)
    print('Stacking Ensemble Performance:')
    cm = evaluate_model(y_test, final_predictions)
    print('\nConfusion Matrix Details:')
    print(f'True Negatives (Non-Attack, Predicted Non-Attack): {cm[0,0]}')
    print(f'False Positives (Non-Attack, Predicted Attack): {cm[0,1]}')
    print(f'False Negatives (Attack, Predicted Non-Attack): {cm[1,0]}')
    print(f'True Positives (Attack, Predicted Attack): {cm[1,1]}')
    plot_feature_importance(rf_model, features)
    
    joblib.dump(meta_learner, 'stacking_model.pkl')
    joblib.dump(rf_model, 'rf_model.pkl')
    joblib.dump(xgb_model, 'xgb_model.pkl')
    joblib.dump(scaler, 'scaler.pkl')
    print('Model components saved as .pkl files.')

if __name__ == '__main__':
    import sys
    if len(sys.argv) > 1 and sys.argv[1] == 'realtime':
        realtime_detection()
    else:
        main()

Class Distribution (is_attack):
is_attack
1    0.795396
0    0.204604
Name: proportion, dtype: float64
Stacking Ensemble Performance:
Accuracy: 0.9983
Precision: 0.9988
Recall: 0.9991
F1-Score: 0.9990

Confusion Matrix Details:
True Negatives (Non-Attack, Predicted Non-Attack): 855
False Positives (Non-Attack, Predicted Attack): 4
False Negatives (Attack, Predicted Non-Attack): 3
True Positives (Attack, Predicted Attack): 3335
Model components saved as .pkl files.


In [None]:
import numpy as np
import pandas as pd
import time
from collections import Counter
import math
import joblib
from scapy.all import sniff, IP, TCP, UDP, get_if_list

# Load trained model components
try:
    meta_learner = joblib.load('stacking_model.pkl')
    rf_model = joblib.load('rf_model.pkl')
    xgb_model = joblib.load('xgb_model.pkl')
    scaler = joblib.load('scaler.pkl')
except FileNotFoundError as e:
    print(f'Error: {e}. Ensure .pkl files are in the working directory.')
    raise

def calculate_entropy(ip_list):
    counts = Counter(ip_list)
    total = sum(counts.values())
    entropy = -sum((count/total) * math.log2(count/total) for count in counts.values()) if total > 0 else 0
    return entropy

def predict_packet(features_df):
    features_scaled = scaler.transform(features_df)
    rf_pred = rf_model.predict_proba(features_scaled)[:, 1]
    xgb_pred = xgb_model.predict_proba(features_scaled)[:, 1]
    meta_features = np.column_stack((rf_pred, xgb_pred))
    return meta_learner.predict(meta_features)[0]

def realtime_detection(interface='your-interface', target_ip='ESP32-ip address', window_size=10):
    packets = []
    feature_names = ['time_delta', 'pps', 'length', 'src_ip_entropy', 'syn_flag', 'ack_flag']
    
    def process_packet(packet):
        if packet.haslayer(IP) and (packet[IP].src == target_ip or packet[IP].dst == target_ip):
            pkt_time = time.time()
            packets.append((pkt_time, packet))
            packets[:] = [p for p in packets if pkt_time - window_size <= p[0]]
            
            if len(packets) >= 2:
                pkt = packet
                time_delta = pkt_time - packets[-2][0] if len(packets) > 1 else 0
                pps = len(packets) / window_size
                src_ips = [p[IP].src for t, p in packets]
                src_ip_entropy = calculate_entropy(src_ips)
                length = len(pkt)
                syn_flag = 1 if pkt.haslayer(TCP) and pkt[TCP].flags & 0x02 else 0
                ack_flag = 1 if pkt.haslayer(TCP) and pkt[TCP].flags & 0x10 else 0
                
                features = [[time_delta, pps, length, src_ip_entropy, syn_flag, ack_flag]]
                features_df = pd.DataFrame(features, columns=feature_names)
                
                is_attack = predict_packet(features_df)
                
                result = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {'Attack Detected!' if is_attack else 'No Attack'} " \
                         f"(pps={pps:.2f}, time_delta={time_delta:.4f}, syn_flag={syn_flag})"
                print(result)
                
                with open('detection_log.txt', 'a') as f:
                    f.write(result + '\n')
    
    print(f'Starting real-time detection on {interface} for {target_ip}...')
    try:
        sniff(iface=interface, prn=process_packet, store=False)
    except PermissionError:
        print('Permission denied. Run PyCharm as administrator and ensure npcap is installed (https://nmap.org/npcap/).')
    except Exception as e:
        print(f'Error during packet capture: {e}')
    finally:
        print('Detection stopped.')

if __name__ == '__main__':
    realtime_detection(interface='your-interface', target_ip='ESP32-ip address', window_size=10)