<a href="https://colab.research.google.com/github/AbdulsemedShalo/DDOS-Detection-and-Mitigation/blob/main/DDOSMitigation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB

# Load the dataset
data = pd.read_csv("https://raw.githubusercontent.com/AbdulsemedShalo/DDOS-Detection-and-Mitigation/main/Dataset.csv")

label_encoder = preprocessing.LabelEncoder()
data['Label'] = label_encoder.fit_transform(data['Label'])

# Extraxt the features you want to base the foundation of our model training
features = ['Max_Packet_Length','Fwd_Packet_Length_Max','Flow_Packets_Sec','Flow_Bytes_Sec','Packet_Length_Std'
           ,'Packet_Length_Variance','Flow_IAT_Max','Fwd_IAT_Max','Subflow_Fwd_Bytes','Fwd_Packet_Length_Std'
           ,'Bwd_Packets_Sec','min_seg_size_forward','Init_Win_bytes_backward','Average_Packet_Size'
           ,'Packet_Length_Mean','Fwd_IAT_Total','Flow_IAT_Std','Fwd_IAT_Std','Avg_Fwd_Segment_Size'
           ,'Fwd_Packet_Length_Mean','Fwd_Header_Length','Fwd_IAT_Mean','Flow_IAT_Mean','Idle_Max'
           ,'Idle_Mean','Fwd_Packets_Sec']

# Replace missing values with the mean of the column
data['Fwd_Packets_Sec'].fillna(data['Fwd_Packets_Sec'].median(), inplace=True)
data['Idle_Mean'].fillna(data['Idle_Mean'].median(), inplace=True)
data['Idle_Max'].fillna(data['Idle_Max'].median(), inplace=True)
data['Flow_IAT_Mean'].fillna(data['Flow_IAT_Mean'].median(), inplace=True)

data['Fwd_IAT_Mean'].fillna(data['Fwd_IAT_Mean'].median(), inplace=True)
data['Fwd_Header_Length'].fillna(data['Fwd_Header_Length'].median(), inplace=True)
data['Fwd_Packet_Length_Mean'].fillna(data['Fwd_Packet_Length_Mean'].median(), inplace=True)
data['Avg_Fwd_Segment_Size'].fillna(data['Avg_Fwd_Segment_Size'].median(), inplace=True)

data['Fwd_IAT_Std'].fillna(data['Fwd_IAT_Std'].median(), inplace=True)
data['Fwd_IAT_Total'].fillna(data['Fwd_IAT_Total'].median(), inplace=True)
data['Flow_IAT_Std'].fillna(data['Flow_IAT_Std'].median(), inplace=True)
data['Packet_Length_Mean'].fillna(data['Packet_Length_Mean'].median(), inplace=True)

data['Average_Packet_Size'].fillna(data['Average_Packet_Size'].median(), inplace=True)
data['Init_Win_bytes_backward'].fillna(data['Init_Win_bytes_backward'].median(), inplace=True)
data['min_seg_size_forward'].fillna(data['min_seg_size_forward'].median(), inplace=True)
data['Bwd_Packets_Sec'].fillna(data['Bwd_Packets_Sec'].median(), inplace=True)

data['Fwd_Packet_Length_Std'].fillna(data['Fwd_Packet_Length_Std'].median(), inplace=True)
data['Subflow_Fwd_Bytes'].fillna(data['Subflow_Fwd_Bytes'].median(), inplace=True)
data['Fwd_IAT_Max'].fillna(data['Fwd_IAT_Max'].median(), inplace=True)
data['Flow_IAT_Max'].fillna(data['Flow_IAT_Max'].median(), inplace=True)

data['Packet_Length_Variance'].fillna(data['Packet_Length_Variance'].median(), inplace=True)
data['Packet_Length_Std'].fillna(data['Packet_Length_Std'].median(), inplace=True)
data['Flow_Bytes_Sec'].fillna(data['Flow_Bytes_Sec'].median(), inplace=True)
data['Flow_Packets_Sec'].fillna(data['Flow_Packets_Sec'].median(), inplace=True)

data['Fwd_Packet_Length_Max'].fillna(data['Fwd_Packet_Length_Max'].median(), inplace=True)
data['Max_Packet_Length'].fillna(data['Max_Packet_Length'].median(), inplace=True)



# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data["Label"], test_size=0.2, random_state=1)

# Create the Gaussian Naive Bayes classifier
clf = GaussianNB()

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

new_traffic = [353,353,0.207573889,9.578052299,114.1646586,79000000,13033.56926,79000000,969,119.0835509,0.009884471,0,29200
               ,46.14285714,44.04545455,101000000,18000000,18400000,48.45,48.45,728,5323696.105,5058439.7,79000000,20500000,0.197689418
               ]



prediction = clf.predict([new_traffic])

if prediction == 0:
    print("The new traffic is not a DDoS attack.")
else:
    # Create a list of blocked IP addresses
    blocked_ips = []

    # Iterate over the rows in the DataFrame
    for index, row in data.iterrows():

        # Check if the row is a DDoS attack
        if row["Label"] == "DoS Hulk":

            # Get the IP address from the row
            ip_address = row["Source.IP"]

            # Check if the IP address is already in the blocked list
            if ip_address not in blocked_ips:

                # Add the IP address to the blocked list
                blocked_ips.append(ip_address)

                # Print the IP address
                print("Blocked IP address:", ip_address)
                print("The new traffic is a DDoS attack.")

    # Save the blocked IP addresses to a file
    with open("blocked_ips.txt", "w") as f:
        for ip_address in blocked_ips:
            f.write(ip_address + "\n")

Accuracy: 0.9243333333333333
The new traffic is not a DDoS attack.


