In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from google.colab import drive

# ✅ Mount Google Drive
drive.mount('/content/drive')

# ✅ File paths
file_paths = [
    "/content/drive/MyDrive/Datasets/NSLKDD.csv",
    "/content/drive/MyDrive/Datasets/UNSW_NB15_merged.csv",
    "/content/drive/MyDrive/Datasets/kddcup.csv",
    "/content/drive/MyDrive/Datasets/CICIDS2017.csv"
]
dataset_names = ["NSLKDD", "UNSW_NB15", "KDDCup", "CICIDS2017"]

# ✅ Correct target column names
target_columns = {
    "NSLKDD": "anomaly",
    "UNSW_NB15": "label",
    "KDDCup": "Label",  # Verify correct column name
    "CICIDS2017": " Label"
}

print("\n==== Training CNN Models for Each Dataset ====\n")

for file, name in zip(file_paths, dataset_names):
    print(f"\n🔹 Training on {name} dataset...\n")

    try:
        df = pd.read_csv(file, low_memory=False).dropna(axis=1, how='all')
    except FileNotFoundError:
        print(f"❌ Error: {file} not found. Skipping {name}.")
        continue

    # ✅ Limit dataset to 100,000 rows
    row_limit = 100000
    if len(df) > row_limit:
        df = df.sample(n=row_limit, random_state=42)
        print(f"✅ {name} dataset limited to {row_limit} rows.")

    target_column = target_columns.get(name)
    if target_column not in df.columns:
        print(f"⚠ Skipping {name}, target column '{target_column}' not found!")
        continue

    X = df.drop(columns=[target_column])
    y = df[target_column]

    # ✅ Encode categorical features
    for col in X.select_dtypes(include=['object']).columns:
        X[col] = LabelEncoder().fit_transform(X[col].astype(str))

    # ✅ Convert X to float32 & handle NaN/Inf
    X = X.astype(np.float32)
    X.replace([np.inf, -np.inf], np.nan, inplace=True)
    X.fillna(X.mean(), inplace=True)

    # ✅ Apply Standard Scaling
    X = StandardScaler().fit_transform(X)
    y = LabelEncoder().fit_transform(y)

    split = int(0.7 * len(X))
    X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]

    # ✅ Reshape for CNN (Adding 1 channel)
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # ✅ Define CNN Model
    model = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
        MaxPooling1D(pool_size=2),
        Conv1D(128, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # ✅ Train CNN Model
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

    # ✅ Evaluate Model
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"🎯 CNN Accuracy for {name}: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))


Mounted at /content/drive

==== Training CNN Models for Each Dataset ====


🔹 Training on NSLKDD dataset...

✅ NSLKDD dataset limited to 100000 rows.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 10ms/step - accuracy: 0.9298 - loss: 0.1756
Epoch 2/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9695 - loss: 0.0807
Epoch 3/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 10ms/step - accuracy: 0.9762 - loss: 0.0660
Epoch 4/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 9ms/step - accuracy: 0.9787 - loss: 0.0566
Epoch 5/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 10ms/step - accuracy: 0.9792 - loss: 0.0549
Epoch 6/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 9ms/step - accuracy: 0.9815 - loss: 0.0508
Epoch 7/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.9828 - loss: 0.0497
Epoch 8/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 9ms/step - accuracy: 0.9826 - loss: 0.0459
Epoch 9/10
[

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 10ms/step - accuracy: 0.9013 - loss: 0.2237
Epoch 2/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9399 - loss: 0.1346
Epoch 3/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - accuracy: 0.9504 - loss: 0.1151
Epoch 4/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 9ms/step - accuracy: 0.9601 - loss: 0.0977
Epoch 5/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 10ms/step - accuracy: 0.9603 - loss: 0.0931
Epoch 6/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9635 - loss: 0.0895
Epoch 7/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 10ms/step - accuracy: 0.9641 - loss: 0.0844
Epoch 8/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9652 - loss: 0.0849
Epoch 9/10
[1m2188/2188

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 9ms/step - accuracy: 6.1964e-07 - loss: -2996931584.0000
Epoch 2/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 2.9520e-06 - loss: -153115262976.0000
Epoch 3/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - accuracy: 1.4755e-06 - loss: -949813116928.0000
Epoch 4/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 9ms/step - accuracy: 3.7009e-06 - loss: -2968876482560.0000
Epoch 5/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - accuracy: 1.2386e-05 - loss: -6823767703552.0000
Epoch 6/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 9ms/step - accuracy: 1.6980e-05 - loss: -13302665052160.0000
Epoch 7/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 1.3449e-05 - loss: -23135480971264.0000
Epoch 8/10
[1m2188/2188[0m [32m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


✅ CICIDS2017 dataset limited to 100000 rows.
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 14ms/step - accuracy: 0.9974 - loss: 0.0070
Epoch 2/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 14ms/step - accuracy: 1.0000 - loss: 2.4812e-08
Epoch 3/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 14ms/step - accuracy: 1.0000 - loss: 6.6633e-09
Epoch 4/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 16ms/step - accuracy: 1.0000 - loss: 5.2692e-09
Epoch 5/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 14ms/step - accuracy: 1.0000 - loss: 2.0006e-09
Epoch 6/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 15ms/step - accuracy: 1.0000 - loss: 4.6004e-10
Epoch 7/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 14ms/step - accuracy: 1.0000 - loss: 3.3216e-10
Epoch 8/10
[1m2188/2188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 14ms/step - accuracy: 1.0000 - loss: 9.