In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [14]:
df = pd.read_csv("./DDos-dataset/DDos.csv")
df.head()

Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,Total Backward Packets,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,192.168.10.5-104.16.207.165-54865-443-6,104.16.207.165,443,192.168.10.5,54865,6,7/7/2017 3:30,3,2,0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,192.168.10.5-104.16.28.216-55054-80-6,104.16.28.216,80,192.168.10.5,55054,6,7/7/2017 3:30,109,1,1,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,192.168.10.5-104.16.28.216-55055-80-6,104.16.28.216,80,192.168.10.5,55055,6,7/7/2017 3:30,52,1,1,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,192.168.10.16-104.17.241.25-46236-443-6,104.17.241.25,443,192.168.10.16,46236,6,7/7/2017 3:30,34,1,1,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,192.168.10.5-104.19.196.102-54863-443-6,104.19.196.102,443,192.168.10.5,54863,6,7/7/2017 3:30,3,2,0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [322]:
columns_to_keep = [
    'Flow Duration',
    'Total Fwd Packets',
    'Total Backward Packets',
    'Flow Bytes/s',
    'Flow Packets/s',
    'Flow IAT Mean',
    'Fwd IAT Mean',
    'Bwd IAT Mean',
    'FIN Flag Count',
    'SYN Flag Count',
    'RST Flag Count',
    'Down/Up Ratio',
    'Average Packet Size',
    'Subflow Fwd Packets',
    'Subflow Bwd Packets',
    'Init_Win_bytes_forward',
    'Init_Win_bytes_backward',
    'Idle Mean',
    'Idle Std',
    'Label'
]
df_filtered = df[columns_to_keep]
df_filtered.loc[:, 'Label'] = df_filtered['Label'].map({'DDoS': 1, 'BENIGN': 0})

In [324]:
X = df_filtered.drop(columns=["Label"])
y = df_filtered["Label"].astype('int64')

In [326]:
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(X.mean(), inplace=True)

In [328]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [330]:
model = RandomForestClassifier(n_estimators=100, class_weight='balanced')
model.fit(X_train, y_train)

In [331]:
val_score = model.score(X_val, y_val)
print(f'Accuracy sur l\'ensemble de validation: {val_score}')

Accuracy sur l'ensemble de validation: 0.9998101566207879


In [166]:

for col in df_filtered.columns:
    print(f"Colonne:{col}")
    print(f"Type: {df[col].dtype}")
    print(f"Exemples: {df[col].unique()[:5]}")
    print("-" * 40)


Colonne:Flow Duration
Type: int64
Exemples: [   3  109   52   34 1022]
----------------------------------------
Colonne:Total Fwd Packets
Type: int64
Exemples: [ 2  1  3 20 15]
----------------------------------------
Colonne:Total Backward Packets
Type: int64
Exemples: [ 0  1  2 25  6]
----------------------------------------
Colonne:Flow Bytes/s
Type: float64
Exemples: [4000000.       110091.7431   230769.2308   352941.1765    11741.68297]
----------------------------------------
Colonne:Flow Packets/s
Type: float64
Exemples: [666666.6667    18348.62385   38461.53846   58823.52941    1956.947162]
----------------------------------------
Colonne:Flow IAT Mean
Type: float64
Exemples: [   3.  109.   52.   34. 1022.]
----------------------------------------
Colonne:Fwd IAT Mean
Type: float64
Exemples: [3.000e+00 0.000e+00 1.022e+03 4.000e+00 1.000e+00]
----------------------------------------
Colonne:Bwd IAT Mean
Type: float64
Exemples: [0.0000e+00 3.0000e+00 4.0000e+00 5.2520e+03 1.3326

In [334]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9997785127871951
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     29262
           1       1.00      1.00      1.00     38462

    accuracy                           1.00     67724
   macro avg       1.00      1.00      1.00     67724
weighted avg       1.00      1.00      1.00     67724



In [351]:
attack_sample = np.array([[
    200000,        # Flow Duration
    1500,          # Total Fwd Packets
    1500,          # Total Backward Packets
    1000000,       # Flow Bytes/s
    500,           # Flow Packets/s
    1.2,           # Flow IAT Mean
    1.0,           # Fwd IAT Mean
    1.4,           # Bwd IAT Mean
    50,            # FIN Flag Count
    200,           # SYN Flag Count
    0,             # RST Flag Count
    1.0,           # Down/Up Ratio
    600,           # Average Packet Size
    800,           # Subflow Fwd Packets
    700,           # Subflow Bwd Packets
    1024,          # Init_Win_bytes_forward
    1024,          # Init_Win_bytes_backward
    0.5,           # Idle Mean
    0.1            # Idle Std
]])

benign_sample = np.array([[
    10000,         # Flow Duration
    50,            # Total Fwd Packets
    60,            # Total Backward Packets
    1000,          # Flow Bytes/s
    5,             # Flow Packets/s
    0.5,           # Flow IAT Mean
    20,            # Fwd IAT Mean
    25,            # Bwd IAT Mean
    0,             # FIN Flag Count
    0,             # SYN Flag Count
    0,             # RST Flag Count
    0.5,           # Down/Up Ratio
    100,           # Average Packet Size
    50,            # Subflow Fwd Packets
    40,            # Subflow Bwd Packets
    512,           # Init_Win_bytes_forward
    512,           # Init_Win_bytes_backward
    1.2,           # Idle Mean
    0.4            # Idle Std
]])


columns_to_keepp = [
    'Flow Duration',
    'Total Fwd Packets',
    'Total Backward Packets',
    'Flow Bytes/s',
    'Flow Packets/s',
    'Flow IAT Mean',
    'Fwd IAT Mean',
    'Bwd IAT Mean',
    'FIN Flag Count',
    'SYN Flag Count',
    'RST Flag Count',
    'Down/Up Ratio',
    'Average Packet Size',
    'Subflow Fwd Packets',
    'Subflow Bwd Packets',
    'Init_Win_bytes_forward',
    'Init_Win_bytes_backward',
    'Idle Mean',
    'Idle Std'
]

In [353]:
attack_sample_df = pd.DataFrame(attack_sample, columns=columns_to_keepp)
benign_sample_df = pd.DataFrame(benign_sample, columns=columns_to_keepp)

attack_prediction = model.predict(attack_sample_df)
print(f'Prédiction pour le paquet d\'attaque (DDoS) : {attack_prediction[0]}')

benign_prediction = model.predict(benign_sample_df)
print(f'Prédiction pour le paquet de non-attaque (BENIGN) : {benign_prediction[0]}')

if attack_prediction[0] == 1:
    print("Le paquet d'attaque a été correctement classé comme DDoS.")
else:
    print("Le paquet d'attaque n'a pas été correctement classé.")

if benign_prediction[0] == 0:
    print("Le paquet de non-attaque a été correctement classé comme BENIGN.")
else:
    print("Le paquet de non-attaque n'a pas été correctement classé.")

Prédiction pour le paquet d'attaque (DDoS) : 0
Prédiction pour le paquet de non-attaque (BENIGN) : 0
Le paquet d'attaque n'a pas été correctement classé.
Le paquet de non-attaque a été correctement classé comme BENIGN.
