In [28]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import sys, os

sys.path.insert(0, os.path.abspath(os.path.join('..')))
from MLPackage.FS.hho import jfs 


# load data
features_excel = "pfeatures"
working_path = os.path.dirname(os.getcwd())

feature_path = os.path.join(working_path, 'Datasets', features_excel + ".xlsx")
DF_features = pd.read_excel(feature_path, index_col = 0)





print( "[INFO] feature shape: ", DF_features.shape)


f_names = ['MDIST_RD', 'MDIST_AP', 'MDIST_ML', 'RDIST_RD', 'RDIST_AP', 'RDIST_ML', 'TOTEX_RD', 'TOTEX_AP', 'TOTEX_ML', 'MVELO_RD', 'MVELO_AP', 'MVELO_ML', 'RANGE_RD', 'RANGE_AP', 'RANGE_ML','AREA_CC', 'AREA_CE', 'AREA_SW', 'MFREQ_RD', 'MFREQ_AP', 'MFREQ_ML', 'FDPD_RD', 'FDPD_AP', 'FDPD_ML', 'FDCC', 'FDCE']
columnsName = f_names + [ "subject_ID", "left(0)/right(1)"]
DF_features.columns = columnsName


data  = DF_features.values
feat  = np.asarray(data[:, 0:-2])
label = np.asarray(data[:, -2])

# split data into train & validation (70 -- 30)
xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.3, stratify=label)
fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}

# parameter
k    = 5     # k-value in KNN
N    = 10    # number of chromosomes
T    = 100   # maximum number of generations
CR   = 0.8
MR   = 0.01
opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'CR':CR, 'MR':MR}

# perform feature selection
fmdl = jfs(feat, label, opts)

[INFO] feature shape:  (2851, 28)
Iteration: 1
Best (HHO): 0.8943862329259525
Iteration: 2
Best (HHO): 0.8913012221423436
Iteration: 3
Best (HHO): 0.8913012221423436
Iteration: 4
Best (HHO): 0.8913012221423436
Iteration: 5
Best (HHO): 0.8893727534148095
Iteration: 6
Best (HHO): 0.8824308051761323
Iteration: 7
Best (HHO): 0.8824308051761323
Iteration: 8
Best (HHO): 0.8824308051761323
Iteration: 9
Best (HHO): 0.8824308051761323
Iteration: 10
Best (HHO): 0.8824308051761323
Iteration: 11
Best (HHO): 0.8824308051761323
Iteration: 12
Best (HHO): 0.8824308051761323
Iteration: 13
Best (HHO): 0.8824308051761323
Iteration: 14
Best (HHO): 0.8824308051761323
Iteration: 15
Best (HHO): 0.8824308051761323
Iteration: 16
Best (HHO): 0.8824308051761323
Iteration: 17
Best (HHO): 0.8824308051761323
Iteration: 18
Best (HHO): 0.8824308051761323
Iteration: 19
Best (HHO): 0.8824308051761323
Iteration: 20
Best (HHO): 0.8781919482386772
Iteration: 21
Best (HHO): 0.8781919482386772
Iteration: 22
Best (HHO): 0.87

In [24]:
list(map(f_names.__getitem__, fmdl['sf'].tolist()))

['MDIST_RD',
 'MDIST_ML',
 'RDIST_AP',
 'RDIST_ML',
 'TOTEX_RD',
 'TOTEX_AP',
 'MVELO_RD',
 'MVELO_AP',
 'RANGE_AP',
 'RANGE_ML',
 'MFREQ_AP',
 'FDPD_RD',
 'FDPD_AP',
 'FDPD_ML',
 'FDCC']

In [29]:
list(map(f_names.__getitem__, fmdl['sf'].tolist()))

['MDIST_AP',
 'MDIST_ML',
 'RDIST_AP',
 'RDIST_ML',
 'TOTEX_AP',
 'TOTEX_ML',
 'MVELO_AP',
 'MVELO_ML',
 'RANGE_AP',
 'RANGE_ML',
 'AREA_SW',
 'MFREQ_ML',
 'FDPD_ML']