In [None]:

import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
data = pd.read_csv('/content/sample_data/5gnidd.csv')

# Drop unnecessary columns
drop_columns = ['Unnamed: 0', 'RunTime', 'Min', 'Max', 'sTos', 'dTos', 'sDSb', 'dDSb',
                'sHops', 'dHops', 'SrcWin', 'DstWin', 'sVid', 'dVid',
                'SrcTCPBase', 'DstTCPBase', 'TcpRtt', 'SynAck', 'AckDat']
data = data.drop(drop_columns, axis=1)

# Drop missing values
data = data.dropna()

# Drop duplicates
data = data.drop_duplicates()

# Separate features (X) and labels (Y)
X = data.drop(['Label', 'Attack Type', 'Attack Tool'], axis=1)
Y = data['Attack Type']

# Convert categorical columns to numerical using one-hot encoding
X = pd.get_dummies(X, columns=['Proto', 'Cause', 'State'])

# Scale numerical features
scaler = StandardScaler()
X[X.columns] = scaler.fit_transform(X)

# Convert labels to numerical values
label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(Y)

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Build the CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((X_train.shape[1], 1), input_shape=(X_train.shape[1],)),
    tf.keras.layers.Conv1D(64, kernel_size=5, activation='relu'),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
epochs = 50
batch_size = 64
history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size,
                    validation_data=(X_test, Y_test), verbose=1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

# Make predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert numerical predictions back to attack types
y_pred_labels = label_encoder.inverse_transform(y_pred_classes)
y_true_labels = label_encoder.inverse_transform(Y_test)

# Generate classification report
report = classification_report(y_true_labels, y_pred_labels)
print('Classification Report:')
print(report)

# Generate confusion matrix
confusion = confusion_matrix(y_true_labels, y_pred_labels)
print('Confusion Matrix:')
print(confusion)

import time
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# ... (Your previous code)

# Model Evaluation:
start_time = time.time()
loss, accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size)
end_time = time.time()
training_time = end_time - start_time

print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')
print(f'Training Time: {training_time:.2f} seconds')

# Making Predictions:
start_time = time.time()
y_pred = model.predict(X_test)
end_time = time.time()
prediction_time = end_time - start_time

y_pred_classes = np.argmax(y_pred, axis=1)
y_pred_labels = label_encoder.inverse_transform(y_pred_classes)
y_true_labels = label_encoder.inverse_transform(Y_test)

# Calculate precision, recall, and F1-score
precision = precision_score(y_true_labels, y_pred_labels, average='weighted')
recall = recall_score(y_true_labels, y_pred_labels, average='weighted')
f1 = f1_score(y_true_labels, y_pred_labels, average='weighted')

# Calculate accuracy
accuracy = accuracy_score(y_true_labels, y_pred_labels)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')
print(f'Training Time: {training_time:.2f} seconds')
print(f'Prediction Time: {prediction_time:.2f} seconds')

# Get unique attack types
unique_attack_types = np.unique(Y_test)

for attack_type in unique_attack_types:
    # Filter test data for the specific attack type
    attack_indices = np.where(Y_test == attack_type)[0]
    x_test_attack_type = X_test.iloc[attack_indices]
    y_test_attack_type = Y_test[attack_indices]

    # Calculate prediction time
    start_time = time.time()
    y_pred_attack_type = model.predict(x_test_attack_type)
    end_time = time.time()
    prediction_time = end_time - start_time

    # Convert predicted labels to original labels
    y_pred_classes_attack_type = np.argmax(y_pred_attack_type, axis=1)
    y_pred_labels_attack_type = label_encoder.inverse_transform(y_pred_classes_attack_type)

    # Convert numerical attack type values to string labels
    y_test_labels_attack_type = label_encoder.inverse_transform(y_test_attack_type)

    # Calculate metrics for the specific attack type
    precision_attack_type = precision_score(y_test_labels_attack_type, y_pred_labels_attack_type, average='weighted')
    recall_attack_type = recall_score(y_test_labels_attack_type, y_pred_labels_attack_type, average='weighted')
    f1_attack_type = f1_score(y_test_labels_attack_type, y_pred_labels_attack_type, average='weighted')
    accuracy_attack_type = accuracy_score(y_test_labels_attack_type, y_pred_labels_attack_type)

    print(f'Attack Type: {attack_type}')
    print(f'Precision: {precision_attack_type:.4f}')
    print(f'Recall: {recall_attack_type:.4f}')
    print(f'F1-Score: {f1_attack_type:.4f}')
    print(f'Accuracy: {accuracy_attack_type:.4f}')
    print(f'Prediction Time: {prediction_time:.2f} seconds')
    print('---')

# ... (Rest of your code)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 0.0849, Test Accuracy: 0.9602
Classification Report:
                precision    recall  f1-score   support

        Benign       1.00      0.99      0.99       160
       SYNScan       0.50      0.80      0.62         5
TCPConnectScan       0.70      0.88      0.78         8
       UDPScan       0.00      0.00      0.00         3

      accuracy                           0.96       176
     ma

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9602
Precision: 0.9551
Recall: 0.9602
F1-Score: 0.9562
Training Time: 0.14 seconds
Prediction Time: 0.12 seconds
Attack Type: 0
Precision: 1.0000
Recall: 0.9875
F1-Score: 0.9937
Accuracy: 0.9875
Prediction Time: 0.15 seconds
---


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Attack Type: 1
Precision: 1.0000
Recall: 0.8000
F1-Score: 0.8889
Accuracy: 0.8000
Prediction Time: 0.14 seconds
---


  _warn_prf(average, modifier, msg_start, len(result))


Attack Type: 2
Precision: 1.0000
Recall: 0.8750
F1-Score: 0.9333
Accuracy: 0.8750
Prediction Time: 0.52 seconds
---
Attack Type: 3
Precision: 0.0000
Recall: 0.0000
F1-Score: 0.0000
Accuracy: 0.0000
Prediction Time: 0.13 seconds
---


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
