In [3]:
import numpy as np
import pandas as pd
from scipy.io.arff import loadarff 
import time
import scipy.spatial.distance as dt

In [19]:
## CARGAR LOS DATOS
def load_df(data_name, ind):
    train_set = pd.DataFrame()
    test_set = pd.DataFrame()
    i = ind
    a = [1,2,3,4,5]
    # print from ind-th index to (n+i)th index.
    while i < 5 + (ind - 1):
        txt = 'Instancias_APC/{d_name}_{n_data}.arff'
        #print(txt.format(d_name = data_name, n_data = a[i % 5]))
        raw_data = loadarff(txt.format(d_name = data_name, n_data = a[i % 5]))
        raw_df_data = pd.DataFrame(raw_data[0])
        train_set = pd.concat([train_set, raw_df_data], ignore_index=True)
        i = i + 1
        
    
    raw_data = loadarff(txt.format(d_name = data_name, n_data = ind))
    raw_df_data = pd.DataFrame(raw_data[0])
    test_set = pd.concat([test_set, raw_df_data], ignore_index=True)
    
    columns = train_set.columns[:-1] 
    for column in columns:
        min_value = min(test_set[column].min(),train_set[column].min())
        max_value = max(test_set[column].max(),train_set[column].max())
        train_set[column] = (train_set[column] - min_value) / (max_value - min_value)
        test_set[column] = (test_set[column] - min_value) / (max_value - min_value)
    

    if data_name == 'diabetes':
        
        values = train_set['class'].unique()
        train_set.loc[train_set['class'] == values[0], 'class'] = 0
        train_set.loc[train_set['class'] == values[1], 'class'] = 1
        test_set.loc[test_set['class'] == values[0], 'class'] = 0
        test_set.loc[test_set['class'] == values[1], 'class'] = 1
        
    return train_set, test_set

## BUSQUEDA LOCAL
def busqueda_local(X_train, y_train, w, f_value, max_iter, k = 1):
    iter_c = 0
    iter_eval = 0
    
    columns_t = np.arange(len(X_train.columns))
    
    max_eval = 20 * len(X_train.columns)
    
    while iter_c < max_iter and iter_eval < max_eval:
        
        if (len(columns_t) < k ):
            columns_t = np.arange(len(X_train.columns))

        np.random.shuffle(columns_t)
        
        w_new = w.copy()
        
        
        for i in range(k):
            s = np.random.normal(loc = 0, scale = 0.3)
            w_new[columns_t[i]] += s
            
        w_new[w_new > 1] = 1
        w_new[w_new < 0.1] = 0
        
        y_pred = validar_knn_train(X_train.copy(), y_train.copy(), w_new.copy())
        class_v, red_v, f_value_new = func(y_train.copy(), y_pred.copy(), w_new.copy())
        
        if f_value_new > f_value:
            w = w_new.copy()
            f_value = f_value_new
            iter_eval = 0
        else:
            iter_eval += 1
        
        for i in range(k):
            columns_t = np.delete(columns_t, 0)
        
        iter_c += 1
                
    return w, f_value, iter_c

def funcion_eval(X_train, y_train, w_new):
    y_pred = validar_knn_train(X_train.copy(), y_train.copy(), w_new.copy())
    class_v, red_v, f_value_new = func(y_train.copy(), y_pred.copy(), w_new.copy())
    return f_value_new

def validar_knn(X_train, y_train, x_test, w_true):
    w = np.array(w_true.copy())
    X_train_p = pd.concat([X_train, x_test], ignore_index=True)
    dm = dt.pdist(X_train_p, metric = "euclidean", w = w)
    a = pd.DataFrame(dt.squareform(dm)) 
    indexes = a.loc[X_train.shape[0]:, :X_train.shape[0] - 1].idxmin(axis=1)
    y_pred = y_train[indexes].values
    return y_pred

def validar_knn_train(X_train, y_train, w_true):
    w = np.array(w_true.copy())
    y_pred = np.zeros(len(y_train))
    dm = dt.pdist(X_train, metric = "euclidean", w = w)
    a = pd.DataFrame(dt.squareform(dm))
    np.fill_diagonal(a.values, 99999)
    indexes = a.idxmin(axis = 1)
    y_pred = y_train[indexes].values 
    return y_pred
    
def func(y_true, y_pred, w_true):
    w = np.array(w_true.copy())
    arr_p = np.where((y_true-y_pred) == 0)
    aciertos = len(arr_p[0])
    
    tasa_class = 100.0*(aciertos/len(y_true))
    tasa_red = 100.0*(len(w[w<0.1])/len(w))

    return tasa_class, tasa_red, 0.8*tasa_class + 0.2*tasa_red

In [37]:
datasets_names = ['diabetes', 'ozone-320', 'spectf-heart']

# SEMILLA
seed = 0
np.random.seed(seed)

algs = ["BMB", "ILS1", "ILS2", "VNS", "ES"] 

for alg in algs:
    
    df_output = pd.DataFrame()

    for name in ['diabetes']:
        print("******** Exp :", name, "**************")
        print("Partition", "%_class", "%_red", "Fit", "T")
        mean_t = []
        mean_fit = []
        mean_class = []
        mean_red = []
        final_df = []  

        for i in range(5):   

            train = pd.DataFrame()
            test = pd.DataFrame()
            data_name = name
            train, test = load_df(data_name, i+1)

            X_train = train
            if name == 'diabetes':
                y_train = train['class'].astype(int)
                X_train = X_train.drop(columns = ['class'])
            else:
                y_train = train['Class'].astype(int)
                X_train = X_train.drop(columns = ['Class'])

            x_test = test
            if name == 'diabetes':
                y_test = test['class'].astype(int)
                x_test = x_test.drop(columns = ['class'])
            else:
                y_test = test['Class'].astype(int)
                x_test = x_test.drop(columns = ['Class'])


            inicio = time.time()

            fss = FSS(iterations_number = 5000, num_of_individuos = 50, probability_of_recombination = 0.6, dimensions = X_train.shape[1])
            fss.search()
            
            w_bl = fss.global_best_position
            
            fin = time.time()
            tiempo = (fin-inicio)

            y_pred = validar_knn(X_train.copy(), y_train.copy(), x_test.copy(), w_bl)
            class_v, red_v, f_value = func(y_test.copy(), y_pred.copy(), w_bl)

            metrics = []
            metrics.append(class_v)
            metrics.append(red_v)
            metrics.append(f_value)
            metrics.append(tiempo)

            print(i + 1, ";", class_v , ";" , red_v, ";", f_value, ";", tiempo)
            mean_fit.append(f_value)
            mean_t.append(tiempo)
            mean_class.append(class_v)
            mean_red.append(red_v)

            final_df.append(metrics)

        print("Media;", np.array(mean_class).mean(), ";", np.array(mean_red).mean(), ";", np.array(mean_fit).mean(), ";", np.array(mean_t).mean())

        metrics = []
        metrics.append( np.array(mean_class).mean())
        metrics.append(np.array(mean_red).mean())
        metrics.append(np.array(mean_fit).mean())
        metrics.append(np.array(mean_t).mean())
        final_df.append(metrics)

        df = pd.DataFrame(final_df, columns = ["%class", "%red", "fit", "T"], index = ["P1", "P2", "P3", "P4", "P5", "MEDIA"])
        df_output = pd.concat([df_output, df], axis= 1)

    #with pd.ExcelWriter('MetricasMH.xlsx', engine="openpyxl", mode='a') as writer:  
        #df_output.to_excel(writer, sheet_name=alg)

    #print(df_output)



******** Exp : diabetes **************
Partition %_class %_red Fit T
iter: 0 = cost: 63.52605863192182
iter: 200 = cost: 70.87133550488599
iter: 400 = cost: 73.24104234527687
iter: 600 = cost: 76.39250814332247
iter: 800 = cost: 76.39250814332247
iter: 1000 = cost: 76.78338762214983
iter: 1200 = cost: 78.08631921824104
iter: 1400 = cost: 79.15309446254072
iter: 1600 = cost: 79.15309446254072
iter: 1800 = cost: 79.15309446254072
iter: 2000 = cost: 79.15309446254072
iter: 2200 = cost: 79.15309446254072
iter: 2400 = cost: 79.15309446254072
iter: 2600 = cost: 79.15309446254072
iter: 2800 = cost: 79.15309446254072
iter: 3000 = cost: 79.15309446254072
iter: 3200 = cost: 79.15309446254072
iter: 3400 = cost: 79.15309446254072
iter: 3600 = cost: 79.15309446254072
iter: 3800 = cost: 79.15309446254072
iter: 4000 = cost: 79.15309446254072
iter: 4200 = cost: 79.15309446254072
iter: 4400 = cost: 79.15309446254072
iter: 4600 = cost: 79.15309446254072
iter: 4800 = cost: 79.15309446254072
1 ; 66.233766

KeyboardInterrupt: 

In [34]:


class Fish():

    def __init__(self, positions, iterations_number):
        self.current_position = positions
        self.weight = iterations_number / 2.0
        self.fitness = 0
        self.delta_fitness = 0
        self.delta_position = []

    def evaluate(self):
        new_fitness = funcion_eval(X_train, y_train, self.current_position)
        self.fitness = new_fitness

    def update_position_individual_movement(self, step_ind):
        new_positions = []
        for pos in self.current_position:
            new = pos + (step_ind * np.random.uniform(-1, 1))
            if new > 1:
                new = 1
            elif new < 0:
                new = 0
            new_positions.append(new)
        assert len(new_positions) == len(self.current_position)

        new_fitness = funcion_eval(X_train, y_train, new_positions)
        if new_fitness > self.fitness:
            self.delta_fitness = abs(new_fitness - self.fitness)
            self.fitness = new_fitness
            self.delta_position = [x - y for x, y in zip(new_positions, self.current_position)]
            self.current_position = list(new_positions)
        else:
            self.delta_position = [0] * len(self.current_position)
            self.delta_fitness = 0

    def feed(self, max_delta_fitness):
        if max_delta_fitness != 0:
            self.weight = self.weight + (self.delta_fitness / max_delta_fitness)
        else:
            self.weight = 1

    def update_position_collective_movement(self, sum_delta_fitness):
        collective_instinct = []
        for i, _ in enumerate(self.delta_position):
            collective_instinct.append(self.delta_position[i] * self.delta_fitness)
        if sum_delta_fitness != 0:
            collective_instinct = [val / sum_delta_fitness for val in collective_instinct]

        new_positions = []
        for i, _ in enumerate(self.current_position):
            new = self.current_position[i] + collective_instinct[i]
            if new > 1:
                new = 1
            elif new < 0:
                new = 0
            new_positions.append(new)

        assert len(new_positions) == len(self.current_position)
        self.current_position = list(new_positions)

    def update_position_volitive_movement(self, barycenter, step_vol, search_operator):
        new_positions = []
        for i, pos in enumerate(self.current_position):
            new = pos + (((pos - barycenter[i]) * step_vol * np.random.uniform(0, 1)) * search_operator)
            if new > 1:
                new = 1
            elif new < 0:
                new = 0
            new_positions.append(new)
        # volitive_step = [x - y for x, y in zip(self.current_position,barycenter)] / np.linalg.norm([self.current_position, barycenter])
        # volitive_step = np.random.uniform(0, 1) * step_vol * volitive_step * search_operator
        # new_positions = [x + y for x, y in zip(self.current_position, volitive_step)]

        assert len(new_positions) == len(self.current_position)
        self.current_position = list(new_positions)



class FSS():

    def __init__(self, iterations_number, num_of_individuos, probability_of_recombination, dimensions):
       
        self.dimensions = dimensions
        self.iterations_number = iterations_number
        self.num_of_individuos = num_of_individuos
        self.cluster = []
        self.global_best = float(0)
        self.global_best_position = []

        # Params
        self.total_weight = 1 * self.num_of_individuos
        self.initial_step_ind = 0.1
        self.final_step_ind = 0.0001
        self.step_ind = self.initial_step_ind 
        self.initial_step_vol = 0.01
        self.final_step_vol = 0.001
        self.step_vol = self.initial_step_vol 
        self.list_global_best_values = []

    def search(self):
        self._initialize_cluster()

        for i in range(self.iterations_number):
            self.evaluate_cluster()
            self.updates_optimal_solution()

            self.apply_individual_movement()
            self.evaluate_cluster()
            self.updates_optimal_solution()

            self.apply_feeding()

            self.apply_instintive_collective_movement()
            self.apply_collective_volitive_movement()

            self.update_step(i)
            self.update_total_weight()

            self.evaluate_cluster()
            self.updates_optimal_solution()
            self.list_global_best_values.append(self.global_best)
            if i % 200 == 0:
                print("iter: {} = cost: {}".format(i, self.global_best))
            
            

    def update_total_weight(self):
        self.total_weight = sum([fish.weight for fish in self.cluster])

    def _initialize_cluster(self):
        self.cluster = []
        for _ in range(self.num_of_individuos):
            fish = Fish(
                positions=[self._get_random_number() for _ in range(self.dimensions)],
                iterations_number = self.iterations_number
            )
            self.cluster.append(fish)

    def evaluate_cluster(self):
        for fish in self.cluster:
            fish.evaluate()

    def updates_optimal_solution(self):
        for fish in self.cluster:
            if fish.fitness > self.global_best:
                self.global_best = fish.fitness
                self.global_best_position = list(fish.current_position)

    def apply_individual_movement(self):
        for fish in self.cluster:
            fish.update_position_individual_movement(self.step_ind)

    def apply_feeding(self):
        max_delta_fitness = max([fish.delta_fitness for fish in self.cluster])
        for fish in self.cluster:
            fish.feed(max_delta_fitness)

    def apply_instintive_collective_movement(self):
        sum_delta_fitness = sum([fish.delta_fitness for fish in self.cluster])

        for fish in self.cluster:
            fish.update_position_collective_movement(sum_delta_fitness)

    def _calculate_barycenter(self):
        sum_weights = sum([fish.weight for fish in self.cluster])
        sum_position_and_weights = [[x * fish.weight for x in fish.current_position] for fish in self.cluster]
        sum_position_and_weights = np.sum(sum_position_and_weights, 0)
        return [s / sum_weights for s in sum_position_and_weights]

    def apply_collective_volitive_movement(self):
        barycenter = self._calculate_barycenter()
        current_total_weight = sum([fish.weight for fish in self.cluster])
        search_operator = -1 if current_total_weight > self.total_weight else 1
        for fish in self.cluster:
            fish.update_position_volitive_movement(barycenter, self.step_vol, search_operator)

    def update_step(self, current_i):
        self.step_ind = self.initial_step_ind - current_i * float(
            self.initial_step_ind - self.final_step_ind) / self.iterations_number
        self.step_vol = self.initial_step_vol - current_i * float(
            self.initial_step_vol - self.final_step_vol) / self.iterations_number

    def _get_random_number(self):
        return np.random.uniform(0, 1)

