In [1]:
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report, adjusted_rand_score
from tf_keras.models import Sequential
from tf_keras.layers import Dense
from tf_keras.optimizers import Adam
from tf_keras.losses import SparseCategoricalCrossentropy




In [2]:
# Charge les datasets NSL-KDD
columns = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land",
    "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised",
    "root_shell", "su_attempted", "num_root", "num_file_creations", "num_shells", "num_access_files",
    "num_outbound_cmds", "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate",
    "srv_serror_rate", "rerror_rate", "srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
    "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", "dst_host_same_srv_rate",
    "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate",
    "dst_host_serror_rate", "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate",
    "class", "difficulty"
]

train_data = pd.read_csv("Datasets/KDDTrain+.txt", header=None, names=columns)
test_data = pd.read_csv("Datasets/KDDTest+.txt", header=None, names=columns)

In [3]:
# Prétraitement des données
def preprocess_data(data):
    data = data.copy()

    # Encodage des colonnes catégoriques
    categorical_cols = ["protocol_type", "service", "flag"]
    for col in categorical_cols:
        encoder = LabelEncoder()
        data[col] = encoder.fit_transform(data[col])

    # Encodage de la classe cible
    data["class"] = data["class"].apply(lambda x: "benign" if x == "normal" else x)
    target_encoder = LabelEncoder()
    data["class"] = target_encoder.fit_transform(data["class"])

    return data

train_data = preprocess_data(train_data)
test_data = preprocess_data(test_data)

# Séparation des caractéristiques et des cibles
X_train = train_data.drop(columns=["class"])
y_train = train_data["class"]
X_test = test_data.drop(columns=["class"])
y_test = test_data["class"]

# Standardisation des données
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Réseau de neurones supervisé
total_classes = max(np.max(y_train), np.max(y_test)) + 1

model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(total_classes, activation='softmax')
])

model.compile(optimizer = Adam(learning_rate=0.001), loss=SparseCategoricalCrossentropy(), metrics=["accuracy"])

# Entraînement
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Évaluation
accuracy = model.evaluate(X_test, y_test, verbose=0)[1]
print("Accuracy sur les données de test :", accuracy)


Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy sur les données de test : 0.011533002369105816


In [5]:
# Méthode K-Means
kmeans = KMeans(n_clusters=len(np.unique(y_train)), random_state=42)
kmeans.fit(X_train)
clusters = kmeans.predict(X_test)

# Comparaison des clusters avec les classes réelles
print("Score ARI :", adjusted_rand_score(y_test, clusters))

Score ARI : 0.2920902957582812


In [6]:
# Rapport de classification
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print("Rapport de classification :\n", classification_report(y_test, y_pred_classes, zero_division=0))

Rapport de classification :
               precision    recall  f1-score   support

           0       0.24      0.27      0.25       737
           1       0.00      0.00      0.00       359
           2       0.00      0.00      0.00      9711
           3       0.00      0.00      0.00        20
           4       0.00      0.00      0.00         3
           5       0.00      0.00      0.00      1231
           6       0.00      0.00      0.00       133
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00       141
           9       0.00      0.00      0.00         7
          10       0.00      0.00      0.00         2
          11       0.00      0.00      0.00       293
          12       0.00      0.00      0.00       996
          13       0.00      0.00      0.00        18
          14       0.00      0.00      0.00        17
          15       0.04      0.01      0.01      4657
          16       0.00      0.00      0.00        7