In [1]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [17]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

class MRFOFeatureSelection:
    def __init__(self, num_mantas=5, max_iter=2, num_carry_outs=2, num_exploration=3):
        self.num_mantas = num_mantas
        self.max_iter = max_iter
        self.num_carry_outs = num_carry_outs
        self.num_exploration = num_exploration

    def _explore_features(self, selected_features, num_features):
        num_selected_features = len(selected_features)
        change_indices = np.random.choice(num_selected_features, min(self.num_exploration, num_selected_features), replace=False)
        new_features = selected_features.copy()
        new_features[change_indices] = ~new_features[change_indices]
        return new_features

    def fit(self, X, y):
        num_features = X.shape[1]
        best_feature_set = None
        best_accuracy = 0.0

        for _ in range(self.max_iter):
            manta_solutions = []

            for _ in range(self.num_mantas):
                # Generate a random feature subset
                feature_mask = np.random.randint(2, size=num_features, dtype=bool)
                selected_features = np.where(feature_mask)[0]

                # Evaluate subset using a simple model (e.g., Random Forest)
                clf = RandomForestClassifier()
                X_subset = X.iloc[:, selected_features]
                clf.fit(X_subset, y)
                accuracy = accuracy_score(y, clf.predict(X_subset))

                manta_solutions.append((selected_features, accuracy))

                # Update best solution found
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_feature_set = selected_features

            # Carry-out and exploration behavior
            for _ in range(self.num_carry_outs):
                selected_manta = np.random.choice(self.num_mantas)
                selected_features = manta_solutions[selected_manta][0]
                new_features = self._explore_features(selected_features, num_features)
                X_subset = X.iloc[:, new_features]
                clf.fit(X_subset, y)
                accuracy = accuracy_score(y, clf.predict(X_subset))

                if accuracy > manta_solutions[selected_manta][1]:
                    manta_solutions[selected_manta] = (new_features, accuracy)

        self.selected_features = best_feature_set
        return self

# Load your dataset
filename = "NF-BOT-IOT"
train_data = pd.read_csv('/content/drive/MyDrive/Wireless dataset/NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')

X_train = train_data.drop(columns=['label'], axis=1)
y_train = train_data['label']

# Instantiate and run the MRFO algorithm for feature selection
mrfo = MRFOFeatureSelection(num_mantas=5, max_iter=2, num_carry_outs=2, num_exploration=3)
mrfo.fit(X_train, y_train)

selected_feature_indices = mrfo.selected_features
selected_features = X_train.columns[selected_feature_indices]

# Print number of selected features and list them
num_selected_features = len(selected_features)
print("Number of selected features:", num_selected_features)
print("Selected Features:", selected_features.tolist())  # Convert Index to list for easier viewing


Number of selected features: 3
Selected Features: ['L4_SRC_PORT', 'L4_DST_PORT', 'IN_BYTES']
