In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time

# Load your dataset
filename = "NF-BOT-IOT"
train_data = pd.read_csv('/content/drive/MyDrive/dataset_wireless/NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')
test_data = pd.read_csv('/content/drive/MyDrive/dataset_wireless/NF-BOT-IOT_test_preprocessed.csv', sep=',', encoding='utf-8')

X_train = train_data.drop(columns=['label'], axis=1)
y_train = train_data['label']
X_test = test_data.drop(columns=['label'], axis=1)
y_test = test_data['label']

# Define custom Moth Flame Optimization Algorithm (MFO) for feature selection
class MFOFeatureSelection:
    def __init__(self, num_moths=5, max_iter=5):
        self.num_moths = num_moths
        self.max_iter = max_iter

    def fit(self, X, y):
        start_time = time.time()  # Start timing the algorithm
        num_features = X.shape[1]
        best_feature_set = None
        best_accuracy = 0.0

        # Initialize random positions of moths
        positions = np.random.randint(0, 2, size=(self.num_moths, num_features), dtype=bool)

        for iteration in range(self.max_iter):
            # Evaluate the fitness of each moth's solution
            fitness_values = np.zeros(self.num_moths)
            for i in range(self.num_moths):
                selected_features = np.where(positions[i])[0]
                if len(selected_features) == 0:
                    continue  # Skip if no features are selected
                clf = RandomForestClassifier()
                X_subset = X.iloc[:, selected_features]
                clf.fit(X_subset, y)
                accuracy = accuracy_score(y, clf.predict(X_subset))
                fitness_values[i] = accuracy

                # Update best solution found
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_feature_set = selected_features

            # Moths move towards the flame (exploitation)
            best_moth_index = np.argmax(fitness_values)
            for i in range(self.num_moths):
                if i != best_moth_index:
                    positions[i] ^= positions[best_moth_index] & np.random.randint(0, 2, size=num_features, dtype=bool)

            # Moths randomly move in search space (exploration)
            for i in range(self.num_moths):
                positions[i] ^= np.random.randint(0, 2, size=num_features, dtype=bool)

        end_time = time.time()  # End timing the algorithm
        self.exec_time = end_time - start_time  # Calculate execution time
        self.selected_features = best_feature_set
        return self

# Instantiate and run the MFO algorithm for feature selection
mfo = MFOFeatureSelection()
mfo.fit(X_train, y_train)

# Apply selected features to training data
selected_feature_indices = mfo.selected_features
selected_features = X_train.columns[selected_feature_indices]
num_selected_features = len(selected_features)

# Save selected features to a CSV file
feature_name = filename + "_MFO_features.csv"
optimizer_name = "MFO"
execution_time = mfo.exec_time

# Write CSV file with optimizer name, execution time, number of selected features, and their names
with open(feature_name, 'w') as file:
    file.write(f"optimizer,execution time,num of selected features,selected features\n")
    file.write(f"{optimizer_name},{execution_time},{num_selected_features},\"")
    file.write(",".join(selected_features))
    file.write("\"\n")

# Print number of selected features and their names
print("Number of selected features:", num_selected_features)
print("Selected features:", selected_features.tolist())

# Filter both training and testing data with selected features
X_train_selected = X_train[selected_features]
X_train_selected
X_test_selected = X_test[selected_features]
X_test_selected


Number of selected features: 7
Selected features: ['L4_SRC_PORT', 'L4_DST_PORT', 'PROTOCOL', 'L7_PROTO', 'IN_BYTES', 'OUT_BYTES', 'FLOW_DURATION_MILLISECONDS']


Unnamed: 0,L4_SRC_PORT,L4_DST_PORT,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,FLOW_DURATION_MILLISECONDS
0,-1.114900,-0.559883,-0.23334,-0.036608,-0.019444,-0.008553,-2.083921
1,1.108007,0.110072,-0.23334,-0.252892,-0.019461,-0.008504,0.495838
2,-0.772350,-0.476342,-0.23334,-0.252892,-0.019461,-0.008504,0.495840
3,1.185416,-0.559883,-0.23334,-0.036608,-0.018068,-0.006928,0.475220
4,0.006402,1.059842,-0.23334,-0.252892,-0.019461,-0.008504,0.495841
...,...,...,...,...,...,...,...
178608,1.493495,-0.421145,-0.23334,-0.252892,-0.019461,-0.008553,-2.083921
178609,-0.512046,0.029649,-0.23334,-0.252892,-0.019461,-0.008504,0.495841
178610,1.023946,-0.352909,-0.23334,-0.252892,-0.019461,-0.008553,-2.083921
178611,0.083552,-0.147423,-0.23334,2.497003,-0.019461,-0.008504,0.495839
