In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time

# Load your dataset
filename = "NF-BOT-IOT"
train_data = pd.read_csv('/content/drive/MyDrive/CS548 Wireless Project/NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')
test_data = pd.read_csv('/content/drive/MyDrive/CS548 Wireless Project/NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')

X_train = train_data.drop(columns=['label'], axis=1)
y_train = train_data['label']
X_test = test_data.drop(columns=['label'], axis=1)
y_test = test_data['label']

class SlimeMouldFeatureSelection:
    def __init__(self, num_slime=5, max_iter=10, diffusion_rate=0.1):
        self.num_slime = num_slime
        self.max_iter = max_iter
        self.diffusion_rate = diffusion_rate

    def fit(self, X, y):
        start_time = time.time()  # Start timing the algorithm
        num_features = X.shape[1]
        best_feature_set = None
        best_accuracy = 0.0

        for iteration in range(self.max_iter):
            # Initialize slime positions
            slime_positions = np.random.randint(0, 2, size=(self.num_slime, num_features), dtype=bool)

            # Evaluate fitness of each slime
            fitness_values = np.zeros(self.num_slime)
            for i in range(self.num_slime):
                selected_features = np.where(slime_positions[i])[0]
                if len(selected_features) == 0:
                    continue  # Skip if no features are selected
                # Here you can replace this with your trained model and evaluation
                # For example, you can use KNN or RandomForestClassifier
                # clf = RandomForestClassifier()
                # X_subset = X.iloc[:, selected_features]
                # clf.fit(X_subset, y)
                # accuracy = accuracy_score(y, clf.predict(X_subset))
                accuracy = np.random.rand()  # Placeholder for demonstration
                fitness_values[i] = accuracy

                # Update best solution found
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_feature_set = selected_features

            # Slime mould diffusion
            # Here you can implement the diffusion operation

        end_time = time.time()  # End timing the algorithm
        self.exec_time = end_time - start_time  # Calculate execution time
        self.selected_features = best_feature_set
        return self

# Instantiate and run the Slime Mould Algorithm for feature selection
slime_mould = SlimeMouldFeatureSelection(num_slime=5, max_iter=10, diffusion_rate=0.1)
slime_mould.fit(X_train, y_train)

# Apply selected features to training data
selected_feature_indices = slime_mould.selected_features
selected_features = X_train.columns[selected_feature_indices]
num_selected_features = len(selected_features)

# Save selected features to a CSV file
feature_name = filename + "_SlimeMould_features.csv"
optimizer_name = "SlimeMould"
execution_time = slime_mould.exec_time

# Write CSV file with optimizer name, execution time, number of selected features, and their names
with open(feature_name, 'w') as file:
    file.write(f"optimizer,execution time,num of selected features,selected features\n")
    file.write(f"{optimizer_name},{execution_time},{num_selected_features},\"")
    file.write(",".join(selected_features))
    file.write("\"\n")

# Print number of selected features and their names
print("Number of selected features:", num_selected_features)
print("Selected features:", selected_features.tolist())

# Filter both training and testing data with selected features
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

Number of selected features: 5
Selected features: ['L4_SRC_PORT', 'L4_DST_PORT', 'L7_PROTO', 'IN_PKTS', 'FLOW_DURATION_MILLISECONDS']
