In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# 1. Chargement et préparation des données
columns_to_keep = [
    'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 
    'Flow Bytes/s', 'Flow Packets/s', 'Flow IAT Mean', 'Fwd IAT Mean', 
    'Bwd IAT Mean', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 
    'Down/Up Ratio', 'Average Packet Size', 'Subflow Fwd Packets', 
    'Subflow Bwd Packets', 'Init_Win_bytes_forward', 'Init_Win_bytes_backward', 
    'Idle Mean', 'Idle Std', 'Label'
]

# Charger le dataset
data = pd.read_csv("./DDos-dataset/DDos.csv")

# Garder uniquement les colonnes nécessaires
data = data[columns_to_keep]

# Convertir les labels en valeurs binaires
data['Label'] = data['Label'].apply(lambda x: 1 if x == 'DDoS' else 0)

data.replace([np.inf, -np.inf], np.nan, inplace=True)

data.fillna(data.mean(), inplace=True)

# Séparer les caractéristiques et la cible
X = data.drop('Label', axis=1).values
y = data['Label'].values

# Normalisation des données
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Conversion des labels en encodage one-hot
y = to_categorical(y)

# Division en ensembles d'entraînement, de validation et de test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Reshape pour LSTM (ajout de la dimension temporelle)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_val = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# 2. Création du modèle LSTM
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))

# Compilation du modèle
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])



In [12]:
# 3. Entraînement du modèle
history = model.fit(X_train, y_train, epochs=10, batch_size=64, 
                    validation_data=(X_val, y_val), verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
# Évaluation du modèle
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Exemple de test avec une attaque DDoS et une donnée non-attaque
sample_ddos = np.array([[3000, 15, 15, 1500000, 5000, 1000, 1500, 1500, 1, 1, 0, 1, 120, 10, 10, 4000, 4000, 30000, 5000]])
sample_benign = np.array([[100, 1, 1, 10000, 100, 10, 10, 10, 0, 0, 0, 1, 80, 1, 1, 2000, 2000, 500, 100]])

# Normaliser les exemples de test
sample_ddos = scaler.transform(sample_ddos).reshape((1, 1, sample_ddos.shape[1]))
sample_benign = scaler.transform(sample_benign).reshape((1, 1, sample_benign.shape[1]))

# Prédictions
pred_ddos = model.predict(sample_ddos)
pred_benign = model.predict(sample_benign)

print("Prédiction pour attaque DDoS:", "DDoS" if np.argmax(pred_ddos) == 1 else "BENIGN")
print("Prédiction pour non-attaque:", "DDoS" if np.argmax(pred_benign) == 1 else "BENIGN")

Test Accuracy: 99.76%
Prédiction pour attaque DDoS: BENIGN
Prédiction pour non-attaque: BENIGN
