In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,MinMaxScaler

In [86]:
data = pd.read_csv('./Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')

In [87]:
print(data.columns)

Index([' Destination Port', ' Flow Duration', ' Total Fwd Packets',
       ' Total Backward Packets', 'Total Length of Fwd Packets',
       ' Total Length of Bwd Packets', ' Fwd Packet Length Max',
       ' Fwd Packet Length Min', ' Fwd Packet Length Mean',
       ' Fwd Packet Length Std', 'Bwd Packet Length Max',
       ' Bwd Packet Length Min', ' Bwd Packet Length Mean',
       ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s',
       ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min',
       'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max',
       ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std',
       ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags',
       ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length',
       ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s',
       ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean',
       ' Packet Length Std', ' Packet Length Variance', '

In [88]:
X = data.drop(columns=[' Label'])  
y = data[' Label']
X_cleaned = X[~X.isin([np.nan, np.inf, -np.inf]).any(axis=1)]
y_cleaned = y[X_cleaned.index]


In [89]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [90]:
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(X_cleaned)

In [91]:
print("Shape of x_scaled:", x_scaled.shape)
print("Shape of y_encoded:", y_encoded.shape)


Shape of x_scaled: (225711, 78)
Shape of y_encoded: (225745,)


In [92]:
X_train, X_test, y_train, y_test = train_test_split(x_scaled, y_cleaned, test_size=0.3, random_state=42)


In [93]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,classification_report

In [94]:
knn = KNeighborsClassifier(n_neighbors=5)

In [95]:
knn.fit(X_train, y_train)


In [96]:
y_pred_knn = knn.predict(X_test)


In [97]:
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn))

KNN Accuracy: 0.999778480077975
              precision    recall  f1-score   support

      BENIGN       1.00      1.00      1.00     29321
        DDoS       1.00      1.00      1.00     38393

    accuracy                           1.00     67714
   macro avg       1.00      1.00      1.00     67714
weighted avg       1.00      1.00      1.00     67714



In [98]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


In [99]:
num_features = 78
num_samples = X_train_lstm.shape[0]  # This should give you how many samples you have
total_elements = num_samples * num_features
print(f"Total elements: {total_elements}, Original shape: {X_train_lstm.shape}")


Total elements: 12323766, Original shape: (157997, 1, 78)


In [100]:

print("Original shape of X_train_lstm:", X_train_lstm.shape)


num_features = 78
total_elements = X_train_lstm.size  

samples = total_elements // num_features  
print(f"Calculated number of samples: {samples}")

X_train_lstm = np.reshape(X_train_lstm, (samples, 1, num_features))
print("Reshaped X_train_lstm:", X_train_lstm.shape)




Original shape of X_train_lstm: (157997, 1, 78)
Calculated number of samples: 157997
Reshaped X_train_lstm: (157997, 1, 78)


In [101]:
lstm_model = Sequential()
lstm_model.add(LSTM(units=64, input_shape=(1, X_train.shape[1])))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(1, activation='sigmoid'))

  super().__init__(**kwargs)


In [102]:
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [103]:
le = LabelEncoder()

# Fit and transform the training labels
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

print("Encoded y_train:", y_train_encoded)
print("Encoded y_test:", y_test_encoded)

Encoded y_train: [1 1 1 ... 1 0 1]
Encoded y_test: [0 0 0 ... 1 1 1]


In [104]:
# Convert to float if necessary
y_train_encoded = y_train_encoded.astype(np.float32)
y_test_encoded = y_test_encoded.astype(np.float32)

# Fit the model with encoded labels
lstm_model.fit(X_train_lstm, y_train_encoded, epochs=10, batch_size=64, validation_data=(X_test_lstm, y_test_encoded))

# Predictions
y_pred_lstm = (lstm_model.predict(X_test_lstm) > 0.5).astype("int32")


Epoch 1/10
[1m2469/2469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - accuracy: 0.9379 - loss: 0.1663 - val_accuracy: 0.9891 - val_loss: 0.0304
Epoch 2/10
[1m2469/2469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - accuracy: 0.9896 - loss: 0.0263 - val_accuracy: 0.9896 - val_loss: 0.0236
Epoch 3/10
[1m2469/2469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.9914 - loss: 0.0202 - val_accuracy: 0.9921 - val_loss: 0.0178
Epoch 4/10
[1m2469/2469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.9947 - loss: 0.0174 - val_accuracy: 0.9940 - val_loss: 0.0158
Epoch 5/10
[1m2469/2469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.9966 - loss: 0.0151 - val_accuracy: 0.9942 - val_loss: 0.0147
Epoch 6/10
[1m2469/2469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9973 - loss: 0.0122 - val_accuracy: 0.9974 - val_loss: 0.0125
Epoch 7/10

In [105]:
y_pred_labels = le.inverse_transform(y_pred_lstm.flatten())  # Flatten if needed
print("Predicted labels:", y_pred_labels)


Predicted labels: ['BENIGN' 'BENIGN' 'BENIGN' ... 'DDoS' 'DDoS' 'DDoS']


In [107]:
y_pred_lstm = (lstm_model.predict(X_test_lstm) > 0.5).astype("int32")
print(y_pred_lstm)

[1m2117/2117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step
[[0]
 [0]
 [0]
 ...
 [1]
 [1]
 [1]]


In [109]:

y_pred_lstm = (lstm_model.predict(X_test_lstm) > 0.5).astype("int32")


y_pred_labels = le.inverse_transform(y_pred_lstm.flatten())


print("LSTM Accuracy:", accuracy_score(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels))


[1m2117/2117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step
LSTM Accuracy: 0.9974599048941135
              precision    recall  f1-score   support

      BENIGN       1.00      1.00      1.00     29321
        DDoS       1.00      1.00      1.00     38393

    accuracy                           1.00     67714
   macro avg       1.00      1.00      1.00     67714
weighted avg       1.00      1.00      1.00     67714

