In [6]:
pip install pygmo

Collecting pygmo
  Downloading pygmo-2.19.5-cp310-cp310-manylinux_2_28_x86_64.whl (14.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.7/14.7 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pygmo
Successfully installed pygmo-2.19.5


In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load your dataset
filename = "NF-BOT-IOT"
train_data = pd.read_csv('/content/sample_data/NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')

X_train = train_data.drop(columns=['label'], axis=1)
y_train = train_data['label']

# Split a small portion of training data for optimization (ACO)
X_t, _, y_t, _ = train_test_split(X_train, y_train, train_size=0.01, random_state=7)

# Define custom Ant Colony Optimization (ACO) algorithm for feature selection
class ACOFeatureSelection:
    def __init__(self, num_ants=5, max_iter=10):
        self.num_ants = num_ants
        self.max_iter = max_iter

    def fit(self, X, y):
        num_features = X.shape[1]
        pheromone = np.ones(num_features)  # Initialize pheromone levels
        best_feature_set = None
        best_accuracy = 0.0

        for iteration in range(self.max_iter):
            ant_solutions = []

            for ant in range(self.num_ants):
                # Generate a random feature subset based on pheromone levels
                feature_mask = (np.random.rand(num_features) < pheromone)
                selected_features = np.where(feature_mask)[0]

                # Evaluate subset using a simple model (e.g., Random Forest)
                clf = RandomForestClassifier()
                X_subset = X.iloc[:, selected_features]
                clf.fit(X_subset, y)
                accuracy = accuracy_score(y, clf.predict(X_subset))

                ant_solutions.append((selected_features, accuracy))

                # Update best solution found
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_feature_set = selected_features

            # Update pheromone levels based on ant solutions
            pheromone *= 0.5  # Decay pheromone levels
            for selected_features, accuracy in ant_solutions:
                pheromone[selected_features] += accuracy / num_features

        self.selected_features = best_feature_set
        return self

# Instantiate and run the ACO algorithm for feature selection
aco = ACOFeatureSelection(num_ants=5, max_iter=10)
aco.fit(X_t, y_t)

# Apply selected features to training data
selected_features = X_train.columns[aco.selected_features]

# Save selected features to a CSV file
feature_name = filename + "_ACO_feature.csv"
pd.Series(selected_features).to_csv(feature_name, index=False, header=True)

# Print number of selected features and list them
num_selected_features = len(selected_features)
print("Number of selected features:", num_selected_features)
print("Selected Features:", selected_features.tolist())  # Convert Index to list for easier viewing

# Use selected features to filter columns in X_train
X_train_selected = X_train[selected_features]

# Output the dataframe with selected features
X_train_selected.head()


Number of selected features: 10
Selected Features: ['L4_SRC_PORT', 'L4_DST_PORT', 'PROTOCOL', 'L7_PROTO', 'IN_BYTES', 'OUT_BYTES', 'IN_PKTS', 'OUT_PKTS', 'TCP_FLAGS', 'FLOW_DURATION_MILLISECONDS']


Unnamed: 0,L4_SRC_PORT,L4_DST_PORT,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS
0,0.165107,-0.331298,-0.23334,-0.252892,-0.01864,-0.00795,-0.022356,0.001374,0.629846,0.495722
1,-3.856688,-0.565481,-0.23334,7.100761,-0.019469,-0.008504,-0.04665,-0.024643,-0.235788,0.495841
2,0.194481,-0.331298,-0.23334,-0.252892,-0.01864,-0.00795,-0.022356,0.001374,0.629846,0.49572
3,0.306015,2.671561,4.057829,-0.252892,-0.019494,-0.008553,-0.04665,-0.029847,-2.709027,-2.083921
4,0.151803,-0.331298,-0.23334,-0.252892,-0.01864,-0.00795,-0.022356,0.001374,0.629846,0.495721
