In [None]:
import copy
import math
import xlsxwriter
import pandas as pd
import numpy as np, numpy
from scipy import stats
import random
import sys
import time
import re
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold, \
    ShuffleSplit
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import warnings
rng = np.random.default_rng()

warnings.filterwarnings('ignore')

# Dataset
datasets = ['BreastEW']

# Dictionary
data_loc_path = "../datasets/"

nbr_exec = 30      ## Execution times
dragonfly_num = 20 ## Population size
max_iter = 151     ## Maximum number of iterations
alpha = 0.99       ## alpha of fitness function 
beta = 0.01        ## beta of fitness function 


In [None]:
class FsProblem :
    def __init__(self,data):
        self.data=data
        self.nb_attribs= len(self.data.columns)-1       
        self.outPuts=self.data.iloc[:,self.nb_attribs]     
        self.classifier = KNeighborsClassifier(n_neighbors=5)
    
    def evaluate(self,solution):
        list=[i for i, n in enumerate(solution) if n == 1]
        if (len(list)== 0):
            return 0
        df = self.data.iloc[:,list]        
        array=df.values
        nbrAttributs =len(array[0])
        X = array[:,0:nbrAttributs]
        Y = self.outPuts
        cv = ShuffleSplit(n_splits=10, test_size=0.3, random_state=0)
        results_accuracy = cross_val_score(self.classifier, X, Y, cv=cv,scoring='accuracy')      
        return results_accuracy.mean()

    def evaluate_weight(self,solution):
        list=[i for i, n in enumerate(solution) if n == 1]
        if (len(list)== 0):
            return 0
        df = self.data.iloc[:,list]        
        array=df.values
        nbrAttributs =len(array[0])
        X = array[:,0:nbrAttributs]
        Y = self.outPuts
        cv = ShuffleSplit(n_splits=10, test_size=0.3, random_state=0)
        results_f1_weighted = cross_val_score(self.classifier, X, Y, cv=cv,scoring='f1_weighted') 
        t_static, p_t = stats.ttest_ind(X, Y,equal_var=True)     
        t_static = np.nan_to_num(t_static,nan=0.0, posinf=1, neginf=-1.0)
        p_t = np.nan_to_num(p_t,nan=0.0, posinf=1, neginf=-1.0)       
        return results_f1_weighted.mean(),t_static.mean(), p_t.mean()

In [None]:
class Test:
    def __init__(self,problem,alpha,beta): 
        self.id = id
        self.data = problem
        self.a = alpha
        self.b = beta

    def nbrUn(self,solution):
        return len([i for i, n in enumerate(solution) if n == 1])   ## Obtain attribute values with a quantity of 1 in the solution 

    def fitness_eq(self,X):    # Solving the fitness function
        return (1-self.data.evaluate(X))*self.a+(self.nbrUn(X)/self.data.nb_attribs)*self.b
    
    def CaculateFitness(self, X):
        f = []
        for i in range(X.shape[0]): 
            fitness = self.fitness_eq(X[i, :])
            f.append(fitness)
        return f 

    def SortFitness(self, Fit):
        fitness = np.sort(Fit, axis=0)
        index = np.argsort(Fit, axis=0)
        return fitness, index

    def SortPosition(self, X, index):
        Xnew = np.zeros(X.shape)
        for i in range(X.shape[0]):
            Xnew[i, :] = X[index[i], :]
        return Xnew

    def BorderCheck(self, X, ub, lb, dim):
        for j in range(dim):
            if X[j] > ub[0, j]:
                X[j] = ub[0, j]
            elif X[j] < lb[0, j]:
                X[j] = lb[0, j]
        return X

    def transform(self, Position, dim):  
        Position_B = Position * 1
        for j in range(Position.shape[0]):  
            s = 1 / (1 + numpy.exp(-Position[j]))  
            if s > random.random():
                Position_B[j] = 1
            else:
                Position_B[j] = 0
        return Position_B

    def sigma(self, beta):
        p = math.gamma(1 + beta) * math.sin(math.pi * beta / 2) / (
                    math.gamma((1 + beta) / 2) * beta * (pow(2, (beta - 1) / 2)))
        return pow(p, 1 / beta)

    def sigmoid(self, x):
        try:
            return 1.0 / (1.0 + math.exp(-x))
        except OverflowError:
            return 0.000001
    

    def B_MRFO(self,N,max_iter):
        print("The Improved Binary of MRFO is optimizing your problem...")
        nVar = self.data.nb_attribs 
        dim = nVar                  
        Dim = dim
        
        # Initialize upper and lower limits
        low = -6   
        Up = 6

        lb = xl = low * np.ones([1,Dim],dtype='float')  #？？？
        ub = xu = Up  * np.ones([1,Dim],dtype='float')   #？？？   
        
        # Initialize population size
        nPop = N    
        
        A = 0.5 # The athreshold f CTM
        bet = 1.5 
        
        Food_fitness = float("inf")
        Food_pos = np.array(np.zeros(dim, int))

        # Initialize the position of the manta ray
        MR = np.array([[0 for i in range(nVar)] for j in range(N)])   
        
        ## CTM
        rng = np.random.default_rng()
        z = rng.random((N,dim))
        threshold = 0.3
        for i in range(N):
            for j in range(dim):
                if z[i,j]<0.5:
                    z[i,j] = z[i,j]/threshold
                elif z[i,j]>=0.5:
                    z[i,j] = (1-z[i,j])/threshold   
            z[i:] = xl + z[i:]*(xu-xl)        
        mean = np.mean(z)
        for i in range(0,N):
            for j in range(0,dim):
                if z[i,j] < mean:
                    MR[i][j] = 0
                else :
                    MR[i][j] = 1     
        food_x = np.array(np.zeros(max_iter))

        fitness = self.CaculateFitness(MR) 
        fitness, sortIndex = self.SortFitness(fitness)  
        MR = self.SortPosition(MR, sortIndex)  
            

        for i in range(N):
            if Food_fitness > fitness[i]:  
                Food_fitness = copy.copy(fitness[i])
                Food_pos = copy.copy(MR[i, :])
        MR_best = Food_pos                    
        food_x[0] = Food_fitness      
        
        for t in range(max_iter):  
            MR_new = np.array([[0 for i in range(nVar)] for j in range(N)])              
            
            for i in range(N):
                rand = random.random()                
                # 执行螺旋觅食 + 链式觅食
                if rand < 0.5: 
                #  Cyclone Foraging
                    r = random.random()
                    r1 = random.random()
                    beta_r  = beta = 2*math.exp(r1*((max_iter-t+1)/max_iter))*math.sin(2*math.pi*r1)
                    
                    if t/max_iter < rand:    # Cyclone Foraging 1
                        MR_rand = np.zeros(dim)
                        num = random.randint(1,max(int(0.1*dim),2))
                        pos = random.sample(range(0,dim-1),num)
                        for index in pos:
                            MR_rand[index] = 1                            
                        if i > 0:
                            MR_new[i] = MR_rand + (r * (MR[i-1,:] - MR[i,:] ) + beta_r*(MR_rand- MR[i,:] ))
                        else:
                            MR_new[i] = MR_rand + (r * (MR_rand   - MR[i,:] ) + beta_r*(MR_rand- MR[i,:] ))
                            
                    else:   # Cyclone Foraging 2                    
                        if i > 0:
                            MR_new[i] = MR_best + (r * (MR[i-1,:] - MR[i,:] ) + beta_r*(MR_best- MR[i,:] ))
                            
                        else: 
                            MR_new[i] = MR_best + (r * (MR_best   - MR[i,:] ) + beta_r*(MR_best- MR[i,:] ))
                else:
                # Chain Foraging 
                    r = random.random()
                    alpha_r = 2*r*(abs(math.log(r))**0.5)
           
                    if i > 0:
                        MR_new[i] = MR[i,:] + r * (MR[i-1,:] - MR[i,:]) + alpha_r * (MR_best- MR[i,:])
                    
                    else:
                        MR_new[i] = MR[i,:] + r * (MR_best -  MR[i,:])  + alpha_r * (MR_best- MR[i,:])    

                MR_new[i] = self.BorderCheck(MR_new[i], ub, lb, dim)
                
                # Binary mapping
                MR_new[i] = self.transform(MR_new[i], dim)
                fitness_temp = self.fitness_eq(MR_new[i])
                
                if Food_fitness > fitness_temp :  
                    Food_fitness = copy.copy(fitness_temp)
                    Food_pos = copy.copy(MR_new[i, :]) 
                MR_best = Food_pos                    
            food_x[t] = Food_fitness 
                
            # Somersault foraging
           
            for i in range(N):
                S = 2.0
                r2 = random.random()
                r3 = random.random()
                ## ASF
                MR_new[i,] = MR_new[i,]+ S*(np.arctan(abs(self.fitness_eq(MR_new[i]) - food_x[t])))*(r2 * MR_best - r3 * MR_new[i,:])                 
                MR_new[i] = self.BorderCheck(MR_new[i], ub, lb, dim)
                MR_new[i] = self.transform(MR_new[i], dim)
                fitness_temp = self.fitness_eq(MR_new[i])
                if Food_fitness > fitness_temp :  
                    Food_fitness = copy.copy(fitness_temp)
                    Food_pos = copy.copy(MR_new[i, :]) 
                MR_best = Food_pos                    
            food_x[t] = Food_fitness
        return self.data.evaluate(MR_best), self.nbrUn(MR_best),Food_fitness,food_x,self.data.evaluate_weight(MR_best)


class FSData:

    def __init__(self,location,nbr_exec,dragonfly_num,max_iter):
        
        self.location = location
        self.nb_exec = nbr_exec

        self.dataset_name = re.search(r'[A-Za-z\-]*.csv',self.location)[0].split('.')[0]
        self.df = pd.read_csv(self.location,header=None)
        self.fsd = FsProblem(self.df)
        self.dragonfly_num=dragonfly_num
        self.max_iter=max_iter
        self.classifier_name = str(type(self.fsd.classifier)).strip('< > \' class ').split('.')[3]
        path = '../sheets/'
        self.instance_name = str("IB_MRFO_"+'-'+str(alpha)+"-"+str(beta)+"-"+self.dataset_name+'_'+time.strftime("%m-%d-%H-%M_", time.localtime()))   
        sheet_filename = str(path + '/' +self.instance_name )
        log_file = open(log_filename + '.txt','w+')
        sys.stdout = log_file
        print("[START] Dataset" + self.dataset_name + "description \n")
        print("Shape : " + str(self.df.shape) + "\n")
        print(self.df.describe())
        print("\n[END] Dataset" + self.dataset_name + "description\n")
        print("[START] Ressources specifications\n")
        print("[END] Ressources specifications\n")
        self.workbook = xlsxwriter.Workbook(sheet_filename + '.xlsx')
        self.worksheet = self.workbook.add_worksheet(self.classifier_name)        
        self.worksheet.write(0,0,'Iteration')
        self.worksheet.write(0,1,'accuracy')
        self.worksheet.write(0,2,'N_Features')
        self.worksheet.write(0,3,'Fitness')
        self.worksheet.write(0,4,'f1_weighted')
        self.worksheet.write(0,5,'t-value')
        self.worksheet.write(0,6,'p-value')
        self.worksheet.write(0,7,'Time')
        self.worksheet2 = self.workbook.add_worksheet('fitness') 
    def attributs_to_flip(self,nb_att):
        return list(range(nb_att))
    
    def run(self,alpha,beta):
        t_init = time.time()
        for itr in range(1,self.nb_exec+1):
            print ("Execution N:{0}".format(str(itr)))
            self.fsd = FsProblem(self.df)
            t1 = time.time()
            test=Test(self.fsd,alpha,beta)
            best=test.B_MRFO(self.dragonfly_num,self.max_iter)
            t2 = time.time()
 
            self.worksheet.write(itr, 0, itr)       
            self.worksheet.write(itr, 1, best[0])   
            self.worksheet.write(itr, 2, best[1])   
            self.worksheet.write(itr, 3, best[2])   
            self.worksheet.write(itr, 4, best[4][0])
            self.worksheet.write(itr, 5, best[4][1]) 
            self.worksheet.write(itr, 6, best[4][2]) 
            self.worksheet.write(itr, 7, t2-t1)                
            num = 0                                    
            column = 0
            
            for item in best[3]:                    
                if num % 5 == 0:
                    self.worksheet2.write(column, (itr-1), item)    
                    column = column +1
                num = num+1
                
        t_end = time.time()
        print ("Total execution time for dataset {0} is {1:.2f} s".format(self.dataset_name,t_end-t_init))
        self.workbook.close()    

for dataset in datasets:
    print(dataset)
    location = data_loc_path + dataset + ".csv"
    instance = FSData(location, nbr_exec, dragonfly_num, max_iter)
    instance.run(alpha, beta)