In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time

# Load your dataset
filename = "NF-BOT-IOT"
train_data = pd.read_csv('/content/drive/MyDrive/dataset/NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')
test_data = pd.read_csv('/content/drive/MyDrive/dataset/NF-BOT-IOT_test_preprocessed.csv', sep=',', encoding='utf-8')

# Prepare training data
X_train = train_data.drop(columns=['label'], axis=1)
y_train = train_data['label']

# Prepare test data
X_test = test_data.drop(columns=['label'], axis=1)
y_test = test_data['label']

# Split a small portion of training data for optimization (MultiVerse Optimizer)
X_t, _, y_t, _ = train_test_split(X_train, y_train, train_size=0.01, random_state=7)
X_test_t, _, y_test_t, _ = train_test_split(X_test, y_test, train_size=0.01, random_state=7)

# Define custom MultiVerse Optimizer (MVO) algorithm for feature selection
class MultiVerseOptimizer:
    def __init__(self, num_universes=5, max_iter=10):
        self.num_universes = num_universes
        self.max_iter = max_iter

    def fit(self, X, y):
        start_time = time.time()  # Start the timer
        num_features = X.shape[1]
        universes = np.random.choice([True, False], size=(self.num_universes, num_features))  # Initialize universes with random selections
        best_universe = None
        best_accuracy = 0.0

        for iteration in range(self.max_iter):
            for universe in range(self.num_universes):
                # Evaluate each universe using a simple model (e.g., Random Forest)
                selected_features = universes[universe]
                clf = RandomForestClassifier()
                X_subset = X.iloc[:, selected_features]
                clf.fit(X_subset, y)
                accuracy = accuracy_score(y, clf.predict(X_subset))

                # Update best universe found
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_universe = selected_features

            # Apply evolution to universes
            self._evolve_universes(universes, best_universe, num_features)

        end_time = time.time()  # End the timer
        execution_time = end_time - start_time  # Calculate execution time

        self.selected_features = best_universe
        self.execution_time = execution_time
        self.num_selected_features = np.sum(best_universe)  # Number of selected features
        return self

    def _evolve_universes(self, universes, best_universe, num_features):
        # Apply some evolutionary strategies to the universes
        for universe in universes:
            # Crossover with the best universe
            crossover_point = np.random.randint(num_features)
            universe[:crossover_point] = best_universe[:crossover_point]

            # Mutation
            mutation_point = np.random.randint(num_features)
            universe[mutation_point] = not universe[mutation_point]

# Instantiate and run the MultiVerse Optimizer algorithm for feature selection on training data
mvo = MultiVerseOptimizer(num_universes=5, max_iter=10)
mvo.fit(X_t, y_t)

# Apply selected features to training data
selected_features_train = X_train.columns[mvo.selected_features]

# Print number of selected features and list them for the training data
num_selected_features_train = mvo.num_selected_features
print("Number of selected features (training data):", num_selected_features_train)
print("Selected Features (training data):", selected_features_train.tolist())

# Use selected features to filter columns in X_train
X_train_selected = X_train[selected_features_train]

# Output the dataframe with selected features for the training data
print("DataFrame with selected features for training data:")
print(X_train_selected.head())

# Apply selected features to test data
selected_features_test = X_test.columns[mvo.selected_features]

# Print number of selected features and list them for the test data
num_selected_features_test = num_selected_features_train  # Same as training data for test data
print("Number of selected features (test data):", num_selected_features_test)
print("Selected Features (test data):", selected_features_test.tolist())

# Use selected features to filter columns in X_test
X_test_selected = X_test[selected_features_test]

# Output the dataframe with selected features for test data
print("DataFrame with selected features for test data:")
print(X_test_selected.head())

# Create a DataFrame for optimization results
optimization_results = pd.DataFrame({
    "Optimization": ["MultiVerseOptimizer"],
    "Execution Time of Optimizer": [mvo.execution_time],
    "No of Feature Selected": [num_selected_features_train],
    "Selected Feature": [', '.join(selected_features_train)]
})

# Save optimization results to a CSV file
optimization_results.to_csv('NF-BOT-IOT_MultiVerseOptimizer_feature.csv', index=False)


Number of selected features (training data): 6
Selected Features (training data): ['L7_PROTO', 'IN_BYTES', 'OUT_BYTES', 'IN_PKTS', 'OUT_PKTS', 'FLOW_DURATION_MILLISECONDS']
DataFrame with selected features for training data:
   L7_PROTO  IN_BYTES  OUT_BYTES   IN_PKTS  OUT_PKTS  \
0 -0.252892 -0.018640  -0.007950 -0.022356  0.001374   
1  7.100761 -0.019469  -0.008504 -0.046650 -0.024643   
2 -0.252892 -0.018640  -0.007950 -0.022356  0.001374   
3 -0.252892 -0.019494  -0.008553 -0.046650 -0.029847   
4 -0.252892 -0.018640  -0.007950 -0.022356  0.001374   

   FLOW_DURATION_MILLISECONDS  
0                    0.495722  
1                    0.495841  
2                    0.495720  
3                   -2.083921  
4                    0.495721  
Number of selected features (test data): 6
Selected Features (test data): ['L7_PROTO', 'IN_BYTES', 'OUT_BYTES', 'IN_PKTS', 'OUT_PKTS', 'FLOW_DURATION_MILLISECONDS']
DataFrame with selected features for test data:
   L7_PROTO  IN_BYTES  OUT_BYTES 