In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

def run_deep_learning_classifier():
    """
    Loads, preprocesses, builds, trains, and evaluates a neural network model.
    """
    try:
        # --- 1. Load Data ---
        print("Loading UNSW-NB15 training and testing data... 📂")
        train_df = pd.read_csv('UNSW_NB15_training-set.csv')
        test_df = pd.read_csv('UNSW_NB15_testing-set.csv')

        full_df = pd.concat([train_df, test_df], ignore_index=True)
        full_df.columns = full_df.columns.str.strip()
        full_df = full_df.drop(['id', 'label'], axis=1, errors='ignore')

        # --- 2. Data Preparation ---
        print("Preparing data for the model... 📊")
        X = full_df.drop('attack_cat', axis=1)
        y = full_df['attack_cat']

        numeric_features = X.select_dtypes(include=np.number).columns.tolist()
        categorical_features = X.select_dtypes(include=['object']).columns.tolist()

        # --- 3. Preprocessing ---
        print("Preprocessing features and labels... ⚙️")
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), numeric_features),
                ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
            ], remainder='passthrough')

        X_processed = preprocessor.fit_transform(X)

        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(y)
        y_categorical = to_categorical(y_encoded)

        # Split data back into full training and testing sets
        train_len = len(train_df)
        X_train, X_test = X_processed[:train_len], X_processed[train_len:]
        y_train, y_test = y_categorical[:train_len], y_categorical[train_len:]

        num_classes = len(label_encoder.classes_)

        # --- 4. Build the Neural Network Model ---
        print("Building the neural network model... 🧠")
        model = Sequential([
            Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
            Dropout(0.3),
            Dense(64, activation='relu'),
            Dropout(0.3),
            Dense(num_classes, activation='softmax')
        ])

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        model.summary()

        # --- 5. Train the Model ---
        print("\nTraining the model... (This will utilize the GPU if available) 🚀")
        # Validation during training still uses the full test set for a stable metric
        model.fit(
            X_train, y_train,
            epochs=20,
            batch_size=64,
            validation_data=(X_test, y_test),
            verbose=1
        )

        # --- 6. Create Test Set Sample ---
        EVALUATION_SAMPLE_SIZE = 50000
        print(f"\nCreating a random sample of {EVALUATION_SAMPLE_SIZE} cases from the test set for final evaluation...")

        # Generate random indices to select a sample
        num_test_samples = X_test.shape[0]
        sample_indices = np.random.choice(num_test_samples, EVALUATION_SAMPLE_SIZE, replace=False)

        # Create the sample sets
        X_test_sample = X_test[sample_indices]
        y_test_sample = y_test[sample_indices]

        # --- 7. Evaluation on the Sample ---
        print(f"Evaluating the final model on the {EVALUATION_SAMPLE_SIZE}-case sample... 📈")
        loss, accuracy = model.evaluate(X_test_sample, y_test_sample, verbose=0)
        print(f"Sample Test Accuracy: {accuracy:.4f}")
        print(f"Sample Test Loss: {loss:.4f}")

        # Get predictions for the sample to generate a detailed report
        y_pred_probs = model.predict(X_test_sample)
        y_pred_encoded = np.argmax(y_pred_probs, axis=1)
        y_test_encoded = np.argmax(y_test_sample, axis=1)

        # Decode the labels back to their original string format for the report
        y_pred_labels = label_encoder.inverse_transform(y_pred_encoded)
        y_test_labels = label_encoder.inverse_transform(y_test_encoded)

        print("\n" + "="*50)
        print("            Deep Learning (MLP) Results")
        print("="*50)

        print("\n--- Classification Report ---")
        report = classification_report(y_test_labels, y_pred_labels, zero_division=0)
        print(report)

        print("\n--- Confusion Matrix ---")
        cm = confusion_matrix(y_test_labels, y_pred_labels, labels=label_encoder.classes_)
        cm_df = pd.DataFrame(cm, index=label_encoder.classes_, columns=label_encoder.classes_)
        print(cm_df)

    except FileNotFoundError:
        print("\nERROR: Make sure 'UNSW_NB15_training-set.csv' and 'UNSW_NB15_testing-set.csv' are present.")
    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}")
        print("Please ensure TensorFlow is installed correctly.")

# =================================
# Main Execution Block
# =================================
if __name__ == "__main__":
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        print(f"GPU(s) detected: {len(gpus)}. TensorFlow will use the GPU for training.")
    else:
        print("No GPU detected. TensorFlow will use the CPU.")

    run_deep_learning_classifier()

GPU(s) detected: 1. TensorFlow will use the GPU for training.
Loading UNSW-NB15 training and testing data... 📂
Preparing data for the model... 📊
Preprocessing features and labels... ⚙️
Building the neural network model... 🧠



Training the model... (This will utilize the GPU if available) 🚀
Epoch 1/20
[1m2740/2740[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - accuracy: 0.7140 - loss: 0.7946 - val_accuracy: 0.6950 - val_loss: 0.6454
Epoch 2/20
[1m2740/2740[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - accuracy: 0.7783 - loss: 0.5721 - val_accuracy: 0.7138 - val_loss: 0.5812
Epoch 3/20
[1m2740/2740[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4ms/step - accuracy: 0.7878 - loss: 0.5445 - val_accuracy: 0.7334 - val_loss: 0.5674
Epoch 4/20
[1m2740/2740[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - accuracy: 0.7910 - loss: 0.5307 - val_accuracy: 0.7389 - val_loss: 0.5798
Epoch 5/20
[1m2740/2740[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.7924 - loss: 0.5224 - val_accuracy: 0.7235 - val_loss: 0.5693
Epoch 6/20
[1m2740/2740[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5ms/step - accuracy: 0.7932 -