In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time

# Load your dataset
filename = "NF-BOT-IOT"
train_data = pd.read_csv('/content/drive/MyDrive/dataset_wireless/NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')
test_data = pd.read_csv('/content/drive/MyDrive/dataset_wireless/NF-BOT-IOT_test_preprocessed.csv', sep=',', encoding='utf-8')

# Prepare training data
X_train = train_data.drop(columns=['label'], axis=1)
y_train = train_data['label']

# Prepare test data
X_test = test_data.drop(columns=['label'], axis=1)
y_test = test_data['label']

# Split a small portion of training and test data for optimization (GNDOA)
X_t, _, y_t, _ = train_test_split(X_train, y_train, train_size=0.01, random_state=7)
X_test_t, _, y_test_t, _ = train_test_split(X_test, y_test, train_size=0.01, random_state=7)

# Define custom Generalized Normal Distribution Optimization Algorithm (GNDOA) for feature selection
class GNDOAFeatureSelection:
    def __init__(self, num_iterations=10, num_samples=5):
        self.num_iterations = num_iterations
        self.num_samples = num_samples

    def fit(self, X, y):
        start_time = time.time()  # Start timer
        num_features = X.shape[1]
        gndo_values = np.random.normal(size=(self.num_samples, num_features))  # Initialize with normal distribution
        best_feature_set = None
        best_accuracy = 0.0

        for iteration in range(self.num_iterations):
            fitness_values = []

            for gndo_value in gndo_values:
                threshold = np.percentile(gndo_value, 50)  # Threshold to select features
                selected_features = np.where(gndo_value > threshold)[0]

                # Evaluate subset using a simple model (e.g., Random Forest)
                clf = RandomForestClassifier()
                X_subset = X.iloc[:, selected_features]
                clf.fit(X_subset, y)
                accuracy = accuracy_score(y, clf.predict(X_subset))

                fitness_values.append((selected_features, accuracy))

                # Update best solution found
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_feature_set = selected_features

            # Update GNDO values based on fitness values
            sorted_indices = np.argsort([fitness[1] for fitness in fitness_values])[::-1]
            gndo_values = np.array([gndo_values[idx] for idx in sorted_indices])

        end_time = time.time()  # End timer
        execution_time = end_time - start_time  # Calculate execution time

        self.selected_features = best_feature_set
        self.execution_time = execution_time
        self.num_selected_features = len(best_feature_set)
        return self

# Instantiate and run the GNDOA algorithm for feature selection on training data
gndoa = GNDOAFeatureSelection(num_iterations=10, num_samples=5)
gndoa.fit(X_t, y_t)

# Apply selected features to training data
selected_features_train = X_train.columns[gndoa.selected_features]

# Create a DataFrame for optimization results
optimization_results = pd.DataFrame({
    "Optimization": ["GNDOA"],
    "Execution Time of Optimizer": [gndoa.execution_time],
    "No of Feature Selected": [gndoa.num_selected_features],
    "Selected Feature": [', '.join(selected_features_train.tolist())]
})

# Save optimization results to a CSV file
optimization_results.to_csv('Generalised_optimization_results.csv', index=False)
