In [45]:
# -------------------------------------------------%
# Created by Safwan Mahmood Al-Selwi - 22/08/2024  %
# Email:     saf1.alselwi@gmail.com                %
# Github:    https://github.com/SafwanAlselwi      %
# -------------------------------------------------%

# Please consider citing our work if you find it useful:
# Al-Selwi, S. M., Hassan, M. F., Abdulkadir, S. J., & et al. (2024). 
# Smart Grid Stability Prediction Using Adaptive Aquila Optimizer and Ensemble Stacked BiLSTM. Results in Engineering, 24, 103261. 
# doi: https://doi.org/10.1016/j.rineng.2024.103261.

[MAFESE (Metaheuristic Algorithms for FEature SElection)](https://github.com/thieu1995/mafese)

[Documentation](https://mafese.readthedocs.io/en/latest/pages/mafese.html)

In [46]:
# !pip install mafese==0.1.9
# !pip install mealpy==2.5.4

# **Importing Libraries**

In [47]:
import numpy as np
import pandas as pd

import mafese
from mafese.wrapper.mha import MhaSelector, MultiMhaSelector
from mafese import Data

from imblearn.over_sampling import SMOTE

import time
import os
import datetime
import statistics

In [48]:
mafese.__version__

'0.1.9'

In [55]:
epoch = 10
pop_size = 10

optimizer="AdaptiveAO"
optimizer_paras = {"name": optimizer, "epoch": epoch, "pop_size": pop_size, "sharpness":10.0, "sigmoid_midpoint":0.5}

dataset = "smart_grid_fs"
problem = "classification"

ESTIMATOR = "knn"
ESTIMATOR_PARAS = {"n_neighbors": 5,  "weights": "uniform",  "algorithm": "auto",  "leaf_size": 30,  "p": 2,  "metric": "minkowski",  "metric_params": None,  "n_jobs": None}

base_path = ""

def get_dataset_csv(dataset_name):
  data_type = "CLASSIFICATION"
  df = pd.read_csv(f"{base_path}{dataset_name}.csv", header=None)
  X = np.array(df.iloc[:, 0:-1])
  y = np.array(df.iloc[:, -1])
      
  if dataset_name in ("smart_grid_fs_a","smart_grid_fs"):
    X, y = SMOTE(random_state=42).fit_resample(X, y)
  elif dataset_name in ("PenglungEW", "Lymphography", "Arrhythmia"):
    X, y = SMOTE(k_neighbors=1).fit_resample(X, y)
  # else:
  #   X, y = SMOTE().fit_resample(X, y)

  data = Data(X, y)

  print(f"Requested {data_type} dataset: {dataset_name} found and loaded from CSV")
  return data

# **Run Single Algorithm**

In [None]:
# This function runs a single instance of a feature selection algorithm
def run_single_algorithm():
    # define mafese feature selection method
    feat_selector = MhaSelector(problem = problem,
                            estimator = ESTIMATOR,
                            estimator_paras = ESTIMATOR_PARAS,
                            optimizer = optimizer,
                            optimizer_paras = optimizer_paras,
                            transfer_func="vstf_01",
                            obj_name="AS")

   
    output = {}
    start_time = time.time()
    
    data = get_dataset_csv(dataset)
    data.split_train_test(test_size=0.2, random_state=10)
    
    # Feature selection (find all relevant features)
    feat_selector.fit(data.X_train, data.y_train, fit_weights=(0.9, 0.1), verbose=False)

    # check selected features - True (or 1) is selected, False (or 0) is not selected
    print(feat_selector.selected_feature_masks)
    print(feat_selector.selected_feature_solution)
    print(feat_selector.selected_feature_indexes)

    # Evaluate final dataset with different estimator with multiple performance metrics
    results = feat_selector.evaluate(estimator="svm", data=data, metrics=["AS", "PS", "RS","F1S", "SS", "CEL"])

    execution_time = time.time() - start_time

    # Initialize and populate the output dictionary
    output = {
        'dataset': dataset,
        'optimizer': optimizer,
        'features': len(feat_selector.selected_feature_solution),
        'AS_train': results.get("AS_test"),
        'PS_train': results.get("PS_train"),
        'RS_train': results.get("RS_train"),
        'F1S_train': results.get("F1S_train"),
        'AS_test': results.get("AS_test"),
        'PS_test': results.get("PS_test"),
        'RS_test': results.get("RS_test"),
        'F1S_test': results.get("F1S_test"),
        'time': execution_time
    }
            
    print(output)

run_single_algorithm()


Requested CLASSIFICATION dataset: smart_grid_fs found and loaded from CSV
[ True  True  True  True False False False  True  True  True  True  True]
[1 1 1 1 0 0 0 1 1 1 1 1]
[ 0  1  2  3  7  8  9 10 11]
{'dataset': 'smart_grid_fs', 'optimizer': 'AdaptiveAO', 'features': 12, 'AS_train': 0.90909, 'PS_train': 0.91575, 'RS_train': 0.91487, 'F1S_train': 0.9148, 'AS_test': 0.90909, 'PS_test': 0.91091, 'RS_test': 0.90909, 'F1S_test': 0.90915, 'time': 17.568374156951904}
