In [10]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel, VarianceThreshold
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, LSTM, Dense, Input, Dropout
from keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.neural_network import MLPClassifier
from imblearn.pipeline import Pipeline as ImbPipeline
from tensorflow.keras.models import save_model
import joblib
import json
import os

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:

# Load datasets
df = pd.read_csv('/content/drive/My Drive/UNSW_NB15_training-set.csv')



In [5]:
# Preprocess the data
def preprocess_data(df):
    # Separate features and targets
    X = df.drop(['attack_cat', 'label'], axis=1)
    y_binary = df['label']
    y_multiclass = df['attack_cat']

    # Handle categorical variables
    categorical_cols = ['proto', 'service', 'state']
    X = pd.get_dummies(X, columns=categorical_cols)

    # Label encode the targets
    le_binary = LabelEncoder()
    le_multiclass = LabelEncoder()
    y_binary = le_binary.fit_transform(y_binary)
    y_multiclass = le_multiclass.fit_transform(y_multiclass)

    # Remove constant features
    variance_selector = VarianceThreshold()
    X_var = variance_selector.fit_transform(X)
    X_var = pd.DataFrame(X_var, columns=X.columns[variance_selector.get_support()])

    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_var)
    X_scaled = pd.DataFrame(X_scaled, columns=X_var.columns)

    return X_scaled, y_binary, y_multiclass, le_binary, le_multiclass, scaler, variance_selector

X_scaled, y_binary, y_multiclass, le_binary, le_multiclass, scaler, variance_selector = preprocess_data(df)

In [6]:
# Create base models
def create_base_models():
    return [
        RandomForestClassifier(n_estimators=100, random_state=42),
        XGBClassifier(n_estimators=100, random_state=42),
        ExtraTreesClassifier(n_estimators=100, random_state=42)
    ]



In [7]:
# Create meta-learner
def create_meta_learner(n_classes, n_features):
    model = Sequential([
        Input(shape=(n_features,)),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dense(n_classes, activation='softmax')
    ])
    return model

In [8]:
# Run experiment
def run_experiment(X, y, n_classes, seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    results = []

    for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):
        print(f"Fold {fold}")
        X_train, X_val = X.iloc[train_index], X.iloc[val_index]
        y_train, y_val = y[train_index], y[val_index]

        base_models = create_base_models()
        base_predictions = []
        for model in base_models:
            model.fit(X_train, y_train)
            base_predictions.append(model.predict_proba(X_val))

        meta_features = np.hstack(base_predictions)

        meta_learner = create_meta_learner(n_classes, meta_features.shape[1])
        meta_learner.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )

        meta_learner.fit(meta_features, y_val, epochs=50, batch_size=32, verbose=0)

        final_predictions = meta_learner.predict(meta_features)
        final_predictions = np.argmax(final_predictions, axis=1)

        accuracy = accuracy_score(y_val, final_predictions)
        precision = precision_score(y_val, final_predictions, average='weighted')
        recall = recall_score(y_val, final_predictions, average='weighted')
        f1 = f1_score(y_val, final_predictions, average='weighted')

        results.append((accuracy, precision, recall, f1))

    return np.mean(results, axis=0)

In [9]:
# Run experiments for binary and multiclass classification
print("Binary Classification")
binary_results = run_experiment(X_scaled, y_binary, 2, 42)
print("Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-score: {:.4f}".format(*binary_results))

print("\nMulticlass Classification")
multiclass_results = run_experiment(X_scaled, y_multiclass, 10, 42)  # 10 classes including 'Normal'
print("Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-score: {:.4f}".format(*multiclass_results))

Binary Classification
Fold 1
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Fold 2
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Fold 3
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Fold 4
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Fold 5
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Accuracy: 0.9948, Precision: 0.9948, Recall: 0.9948, F1-score: 0.9948

Multiclass Classification
Fold 1
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Fold 2
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Fold 3
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Fold 4
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Fold 5
[1m1096/1096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Accuracy: 0.8974, Precision: 0.8976, Reca

In [11]:

def save_final_model(X, y, n_classes, seed, save_path):
    os.makedirs(save_path, exist_ok=True)

    np.random.seed(seed)
    tf.random.set_seed(seed)

    # Train final base models
    base_models = create_base_models()
    base_predictions = []
    for i, model in enumerate(base_models):
        model.fit(X, y)
        base_predictions.append(model.predict_proba(X))
        joblib.dump(model, f"{save_path}/base_model_{i}.joblib")

    # Train final meta-learner
    meta_features = np.hstack(base_predictions)
    meta_learner = create_meta_learner(n_classes, meta_features.shape[1])
    meta_learner.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    meta_learner.fit(meta_features, y, epochs=50, batch_size=32, verbose=0)
    save_model(meta_learner, f"{save_path}/meta_learner.h5")

    print(f"Final model components saved to {save_path}")

# After running the experiments, save the final models
print("Saving final models...")

# Save binary classification model
save_final_model(X_scaled, y_binary, 2, 42, "/content/saved_model_binary")

# Save multiclass classification model
save_final_model(X_scaled, y_multiclass, 10, 42, "/content/saved_model_multiclass")

# Save preprocessors and other components
preprocessors_path = "/content/saved_preprocessors"
os.makedirs(preprocessors_path, exist_ok=True)

joblib.dump(le_binary, f"{preprocessors_path}/le_binary.joblib")
joblib.dump(le_multiclass, f"{preprocessors_path}/le_multiclass.joblib")
joblib.dump(scaler, f"{preprocessors_path}/scaler.joblib")

# Assuming you have a feature_selector and variance_selector
if 'feature_selector' in globals():
    joblib.dump(feature_selector, f"{preprocessors_path}/feature_selector.joblib")
if 'variance_selector' in globals():
    joblib.dump(variance_selector, f"{preprocessors_path}/variance_selector.joblib")

# Save selected features
with open(f"{preprocessors_path}/selected_features.json", 'w') as f:
    json.dump(X_scaled.columns.tolist(), f)

print("All models and preprocessors have been saved.")

# If you're using Google Colab and want to download the saved models:
from google.colab import files
!zip -r saved_models.zip /content/saved_model_binary /content/saved_model_multiclass /content/saved_preprocessors
files.download('saved_models.zip')

Saving final models...




Final model components saved to /content/saved_model_binary




Final model components saved to /content/saved_model_multiclass
All models and preprocessors have been saved.
  adding: content/saved_model_binary/ (stored 0%)
  adding: content/saved_model_binary/meta_learner.h5 (deflated 19%)
  adding: content/saved_model_binary/base_model_1.joblib (deflated 68%)
  adding: content/saved_model_binary/base_model_0.joblib (deflated 82%)
  adding: content/saved_model_binary/base_model_2.joblib (deflated 83%)
  adding: content/saved_model_multiclass/ (stored 0%)
  adding: content/saved_model_multiclass/meta_learner.h5 (deflated 11%)
  adding: content/saved_model_multiclass/base_model_1.joblib (deflated 67%)
  adding: content/saved_model_multiclass/base_model_0.joblib (deflated 89%)
  adding: content/saved_model_multiclass/base_model_2.joblib (deflated 90%)
  adding: content/saved_preprocessors/ (stored 0%)
  adding: content/saved_preprocessors/le_multiclass.joblib (deflated 29%)
  adding: content/saved_preprocessors/selected_features.json (deflated 72%)
 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>