# **DoS Detection**

In [None]:
import zipfile
import os

zip_file = "/content/sample_data/iot_intrusion_dataset.zip"
extract_files = "/content/sample_data/extracted_pcap_files"

# Extract the .zip file
with zipfile.ZipFile(zip_file, 'r') as files:
    files.extractall(extract_files)

print(f"✅ Extracted .pcap files to: {extract_files}")

✅ Extracted .pcap files to: /content/sample_data/extracted_pcap_files




---



---



**Install necessary components**

In [None]:
pip install scapy pandas numpy scikit-learn joblib

Collecting scapy
  Downloading scapy-2.6.1-py3-none-any.whl.metadata (5.6 kB)
Downloading scapy-2.6.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scapy
Successfully installed scapy-2.6.1




---



---



**Extract features from the .pcap files**

1️⃣ Destination IP (dst_ip)
Why?

Helps identify whether an IP is being specifically targeted in an attack.
SYN flood attacks usually target a single victim IP (e.g., 192.168.1.200).
Normal traffic is usually more distributed across different destination IPs.
✔ Example:
✅ Normal Traffic → Destinations vary (192.168.1.10, 192.168.1.50, etc.)
❌ SYN Flood Attack → Targeted at one IP (192.168.1.200)

2️⃣ Destination Port (dst_port)
Why?

Helps detect if the attack is targeting a specific service.
Many attacks target specific ports known for vulnerabilities (e.g., 554 for RTSP cameras).
Normal traffic uses a variety of ports (e.g., 80 for HTTP, 443 for HTTPS).
✔ Example:
✅ Normal Traffic → Uses mixed ports: 80 (Web), 443 (HTTPS), 22 (SSH), 53 (DNS)
❌ SYN Flood Attack → Floods one port: 554 (RTSP)

3️⃣ Packet Size (packet_size)
Why?

Attack traffic often has fixed-size packets, whereas normal traffic varies in size.
SYN flood packets usually have a small and constant size (~60 bytes).
Normal traffic packets vary in size depending on the protocol (HTTP, DNS, etc.).
✔ Example:
✅ Normal Traffic → Varies (50-150 bytes, depends on data)
❌ SYN Flood Attack → Fixed size (60 bytes per packet)

4️⃣ SYN Flag (syn_flag)
Why?

SYN flood attacks send a high number of SYN packets without completing the handshake.
Normal traffic has both SYN and ACK packets (full handshake).
✔ Example:
✅ Normal Traffic → SYN sometimes, but also has ACK packets.
❌ SYN Flood Attack → SYN always (syn_flag = 1), no ACKs.

5️⃣ ACK Flag (ack_flag)
Why?

Helps detect if the handshake is completed or not.
Normal traffic has ACK responses (ack_flag = 1), but SYN flood attacks do not receive ACKs (ack_flag = 0).
✔ Example:
✅ Normal Traffic → SYN + ACK (ack_flag = 1)
❌ SYN Flood Attack → Only SYN (ack_flag = 0)



---



---



**Train the model using dos .pcap files 1 and 4**

In [None]:
from scapy.all import rdpcap, IP, TCP
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# List of PCAP files and their respective DoS filtering rules
pcap_files = [
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-1-dec.pcap", "222.", "192.168.0.13", 554),  # First rule
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-4-dec.pcap", "111.", "192.168.0.24", 19604)  # Second rule
]

# Function to extract features from a given PCAP file
def extract_features_from_pcap(pcap_path, attack_src_prefix, attack_dst, attack_port):
    packets = rdpcap(pcap_path)
    traffic_data = []

    for packet in packets:
        if IP in packet and TCP in packet:
            src_ip = packet[IP].src
            dst_ip = packet[IP].dst
            dst_port = packet[TCP].dport
            packet_size = len(packet)
            syn_flag = 0
            ack_flag = 0

            # Check if the SYN flag (0x02) is set in the TCP header
            if packet[TCP].flags & 0x02:
                syn_flag = 1
            else:
                syn_flag = 0

            # Check if the ACK flag (0x10) is set in the TCP header
            if packet[TCP].flags & 0x10:
                ack_flag = 1
            else:
                ack_flag = 0

            # Labeling: DoS attack if it matches the Wireshark rule for this PCAP file
            if (dst_ip == attack_dst and syn_flag == 1 and
                src_ip.startswith(attack_src_prefix) and dst_port == attack_port):
                label = 1  # DoS Attack
            else:
                label = 0  # Normal Traffic

            traffic_data.append([dst_ip, dst_port, packet_size, syn_flag, ack_flag, label])

    return pd.DataFrame(traffic_data, columns=["dst_ip", "dst_port", "packet_size", "syn_flag", "ack_flag", "label"])

# Extract features from both PCAP files
df_list = []
for pcap_file, src_prefix, dst_ip, dst_port in pcap_files:
    df_list.append(extract_features_from_pcap(pcap_file, src_prefix, dst_ip, dst_port))

# Merge both datasets
df_combined = pd.concat(df_list, ignore_index=True)

# Convert categorical IP addresses to numerical values
df_combined["dst_ip"] = df_combined["dst_ip"].astype("category").cat.codes

# Split data into features (X) and labels (y)
X = df_combined.drop(columns=["label"])
y = df_combined["label"]

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the IDS model using a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the updated model
joblib.dump(model, "/content/sample_data/ids_model_v2.pkl")

# Evaluate model performance
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"🎯 Precision: {precision:.4f}")
print(f"📢 Recall: {recall:.4f}")
print(f"📊 F1-score: {f1:.4f}")


✅ Accuracy: 0.9997
🎯 Precision: 1.0000
📢 Recall: 0.9994
📊 F1-score: 0.9997


**now test the model again**

ids_model v2

In [None]:
from scapy.all import rdpcap, IP, TCP
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load trained IDS model
model = joblib.load("/content/sample_data/ids_model_v2.pkl")

# List of PCAP files and their respective DoS filtering rules
pcap_files = [
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-1-dec.pcap", "222.", "192.168.0.13", 554),  # First rule
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-2-dec.pcap", "222.", "192.168.0.13", 554),
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-3-dec.pcap", "111.", "192.168.0.13", 554),
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-4-dec.pcap", "111.", "192.168.0.24", 19604),  # Second rule
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-5-dec.pcap", "111.", "192.168.0.24", 19604),
    ("/content/sample_data/extracted_pcap_files/dos-synflooding-6-dec.pcap", "111.", "192.168.0.24", 19604)
]

# Function to extract features from a given PCAP file
def extract_features_from_pcap(pcap_path, attack_src_prefix, attack_dst, attack_port):
    packets = rdpcap(pcap_path)
    traffic_data = []

    for packet in packets:
        if IP in packet and TCP in packet:
            src_ip = packet[IP].src
            dst_ip = packet[IP].dst
            dst_port = packet[TCP].dport
            packet_size = len(packet)
            syn_flag = 1 if packet[TCP].flags & 0x02 else 0  # SYN flag
            ack_flag = 1 if packet[TCP].flags & 0x10 else 0  # ACK flag

            # Labeling: DoS attack if it matches the Wireshark rule for this PCAP file
            if (dst_ip == attack_dst and syn_flag == 1 and
                src_ip.startswith(attack_src_prefix) and dst_port == attack_port):
                label = 1  # DoS Attack
            else:
                label = 0  # Normal Traffic

            traffic_data.append([dst_ip, dst_port, packet_size, syn_flag, ack_flag, label])

    return pd.DataFrame(traffic_data, columns=["dst_ip", "dst_port", "packet_size", "syn_flag", "ack_flag", "label"])

file = 1
for pcap_file, src_prefix, dst_ip, dst_port in pcap_files:
    # Extract features from new PCAP file
    df_test = extract_features_from_pcap(pcap_file, src_prefix, dst_ip, dst_port)

    # Convert categorical IP addresses to numerical values
    df_test["dst_ip"] = df_test["dst_ip"].astype("category").cat.codes

    # Split into features (X) and labels (y)
    X_test = df_test.drop(columns=["label"])
    y_test = df_test["label"]

    #df_test = extract_features_from_pcap(pcap_file)
    #print(df_test.head(10))  # Show first 10 packets with labels

    # Predict using the trained model
    y_pred = model.predict(X_test)


    # Evaluate model performance
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Print results
    print("\n")
    print(f"Test with .pcap file number {file}")
    print(f"✅ Accuracy: {accuracy:.4f}")
    print(f"🎯 Precision: {precision:.4f}")
    print(f"📢 Recall: {recall:.4f}")
    print(f"📊 F1-score: {f1:.4f}")
    print(pd.Series(y_pred).value_counts())  # Show count of 0s (normal) and 1s (DoS)
    file += 1




Test with .pcap file number 1
✅ Accuracy: 0.9998
🎯 Precision: 0.9997
📢 Recall: 0.9998
📊 F1-score: 0.9998
0    21926
1    18706
Name: count, dtype: int64


Test with .pcap file number 2
✅ Accuracy: 0.9997
🎯 Precision: 0.9995
📢 Recall: 0.9999
📊 F1-score: 0.9997
0    18374
1    16872
Name: count, dtype: int64


Test with .pcap file number 3
✅ Accuracy: 0.9988
🎯 Precision: 1.0000
📢 Recall: 0.9975
📊 F1-score: 0.9988
0    12817
1    12509
Name: count, dtype: int64


Test with .pcap file number 4
✅ Accuracy: 0.9996
🎯 Precision: 1.0000
📢 Recall: 0.9991
📊 F1-score: 0.9996
0    5610
1    4688
Name: count, dtype: int64


Test with .pcap file number 5
✅ Accuracy: 0.9994
🎯 Precision: 1.0000
📢 Recall: 0.9987
📊 F1-score: 0.9994
0    5575
1    5541
Name: count, dtype: int64


Test with .pcap file number 6
✅ Accuracy: 0.9993
🎯 Precision: 1.0000
📢 Recall: 0.9986
📊 F1-score: 0.9993
0    6509
1    6294
Name: count, dtype: int64
