## **IMPORT LIBRARIES**

In [None]:
# importing required libraries
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import time
import os
from itertools import combinations

import pickle
from os import path

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import confusion_matrix

from sklearn import metrics
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.tree import DecisionTreeClassifier


## **Importing Datasets**

In [None]:
filename = "NF-BOT-IOT"
train_data = pd.read_csv('NF-BOT-IOT_train_preprocessed.csv', sep=',', encoding='utf-8')
test_data = pd.read_csv('NF-BOT-IOT_test_preprocessed.csv', sep=',', encoding='utf-8')

In [None]:
X_train = train_data.drop(columns=['label'],axis=1)
y_train = train_data['label']
X_test = test_data.drop(columns=['label'],axis=1)
y_test = test_data['label']

## **Feature selection Wrapper Methods**

* Options: (The exact term to be imported is mentioned inside parentheses)
* Binary Bat Algorithm (BBA)
* Cuckoo Search Algorithm (CS)
* Equilibrium Optimizer (EO)
* Genetic Algorithm (GA)
* Gravitational Search Algorithm (GSA)
* Grey Wolf Optimizer (GWO)
* Harmony Search (HS)
* Mayfly Algorithm (MA)
* Particle Swarm Optimization (PSO)
* Red Deer Algorithm (RDA)
* Sine Cosine Algorithm (SCA)
* Whale Optimization Algorithm (WOA)

Solution contains the following properties:
*   **best_agent** : best feature vector over all the iterations
*   **best_fitness** : fitness value of the best_agent
*   **best_accuracy** : classification accuracy of the best_agent
*   **final_population** : final population of agents
*   **final_fitness** : fitness value of the final_population
*   **final_accuracy** : classification accuracy of the final_population
*   **convergence_curve** : record of fitness and number of features over the course of iteration
*   **execution_time** : time required to execute the piece of code

In [None]:
# %1 of train data taken as input to optimization
X_t,temp1,y_t,temp2 = train_test_split(X_train,y_train,train_size=0.01, random_state=7)
del temp1,temp2
feature_name = filename+"_BBA_feature.csv"
file = open(feature_name, 'w')
file.write("optimization,execution time of optimzier,no of feature selected,selected feature \n")
file.write("BAA,")
file.close()

In [None]:
from Py_FS.wrapper.population_based.BBA import BBA as FS
algo = FS(num_agents=30, max_iter=50, train_data=X_t, train_label=y_t
          , save_conv_graph=True)
results = algo.run()
del X_t,y_t

In [None]:
file = open(feature_name, 'a')
file.write(str(results.exec_time) +",")
cnt = 0
for i in results.solution.best_agent:
  if i == 1:
    cnt+=1
file.write(str(cnt) +",")
file.write("\"")
column_headers = list(X_train.columns.values)
for i in range(len(results.solution.best_agent)):
  if results.solution.best_agent[i] == 1:
    file.write(column_headers[i]+",")
file.write("\"\n")
file.close()

## **Selection of feature**

In [None]:
feature_df = pd.read_csv(feature_name, sep=',', encoding='utf-8')
selected_feature = feature_df.iat[0, 3]
selected_feature = selected_feature[0:-1]
selected_feature

In [None]:
selected_feature = list(selected_feature.split(","))
selected_feature

In [None]:
X_train = X_train[selected_feature]
X_train

In [None]:
X_test = X_test[selected_feature]
X_test