In [13]:
# differential evolution search of the two-dimensional sphere objective function
from numpy.random import rand
from numpy.random import choice
from numpy import asarray
from numpy import clip
from numpy import argmax
from numpy import max
from numpy import around
import random
from matplotlib import pyplot
from scipy.spatial.distance import cdist
import numpy as np
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.preprocessing import LabelEncoder

def Evaluate_State(state, data, target):
    #finding the distance between centroids and all the data points
    distances = cdist(data, state,'euclidean') #Step 2

    k = len(state)

    #Centroid with the minimum Distance
    points = np.array([np.argmin(i) for i in distances]) #Step 3

    #Repeating the above steps for a defined number of iterations
    #Step 4
    for _ in range(10):
        _state = state
        state = []
        for idx in range(k):
            #Updating Centroids by taking mean of Cluster it belongs to
            if(len(data[points == idx]) == 0):
                state.append(_state[idx])
            else:
                state.append(data[points == idx].mean(axis=0))

        state = np.vstack(state) #Updated Centroids

        distances = cdist(data, state,'euclidean')
        points = np.array([np.argmin(i) for i in distances])

    return accuracy_score(target, points), state

# define mutation operation
def mutation(x, F):
    return x[0] + F * (x[1] - x[2])


# define boundary check operation
def check_bounds(mutated, bounds):
    array = []
    for i in range(len(mutated)):
        auxiliar = []
        for g in range(len(bounds)):
            auxiliar.append(clip(mutated[i][g],bounds[g][0],bounds[g][1]))
        if not np.array_equal(array, []):
            array = np.vstack([array, auxiliar])
        else:
            array.append(auxiliar)
    mutated_bound = array
    return mutated_bound


# define crossover operation
def crossover(mutated, target, dims, cr):
    # generate a uniform random value for every dimension
    p = rand(len(mutated))
    # generate trial vector by binomial crossover
    trial = np.array([mutated[i] if p[i] < cr else target[i] for i in range(len(mutated))])
    return trial

def differential_evolution(pop_size, iter, F, cr, data, target, centroids):
    # initialise population of candidate solutions randomly within the specified bounds
    bounds = np.array([[min(data[:,i]),max(data[:,i])] for i in range(data.shape[1])])

    pop = np.array([[[random.uniform(bounds[i][0], bounds[i][1]) for i in range(data.shape[1])] for _ in range(centroids)] for _ in range(pop_size)])
    pop = np.squeeze(pop)
    # evaluate initial population of candidate solutions
    obj_all = np.array([Evaluate_State(ind, data, target) for ind in pop])
    # find the best performing vector of initial population
    best_vector = obj_all[argmax(obj_all[:,0]),1]
    best_obj = max(obj_all[:,0])
    prev_obj = best_obj
    # initialise list to store the objective function value at each iteration
    obj_iter = list()
    # run iterations of the algorithm
    for i in range(iter):
        # iterate over all candidate solutions
        for j in range(pop_size):
            # choose three candidates, a, b and c, that are not the current one
            candidates = [candidate for candidate in range(pop_size) if candidate != j]
            a, b, c = pop[choice(candidates, 3, replace=False)]
            # perform mutation
            mutated = mutation([a, b, c], F)
            # check that lower and upper bounds are retained after mutation
            mutated = check_bounds(mutated, bounds)
            # perform crossover
            trial = crossover(mutated, pop[j], len(bounds), cr)
            # compute objective function value for target vector
            obj_target = Evaluate_State(pop[j], data, target)
            # compute objective function value for trial vector
            obj_trial = Evaluate_State(trial, data, target)
            # perform selection
            if obj_trial[0] > obj_target[0]:
                # replace the target vector with the trial vector
                pop[j] = trial[1]
                # store the new objective function value
                obj_all[j, 0] = obj_trial[0]
        # find the best performing vector at each iteration
        best_obj = max(obj_all[:,0])

        if best_obj > prev_obj:
            best_vector = obj_all[argmax(obj_all[:,0]),1]
            prev_obj = best_obj
            obj_iter.append(best_obj)
            # report progress at each iteration
         #   print('Iteration: %d f([%s]) = %.5f' % (i, around(best_vector, decimals=5), best_obj))
    return [best_vector, prev_obj, obj_iter]


# define population size
pop_size = 100

# define number of iterations
iter = 100
# define scale factor for mutation
F = 0.5
# define crossover rate for recombination
cr = 0.7

In [14]:

# evolution strategy (mu + lambda) of the ackley objective function
from numpy import asarray
from numpy import argsort
from numpy.random import randn
from numpy.random import rand
from scipy.spatial.distance import cdist
from numpy.random import seed
from sklearn.metrics import accuracy_score
import pandas as pd
import random
import numpy as np

# objective function
def objective(state, data, target):
    #finding the distance between centroids and all the data points
    distances = cdist(data, state,'euclidean') #Step 2

    k = len(state)

    #Centroid with the minimum Distance
    points = np.array([np.argmin(i) for i in distances]) #Step 3

    #Repeating the above steps for a defined number of iterations
    #Step 4
    for _ in range(10):
        _state = state
        state = []
        for idx in range(k):
            #Updating Centroids by taking mean of Cluster it belongs to
            if(len(data[points == idx]) == 0):
                state.append(_state[idx])
            else:
                state.append(data[points == idx].mean(axis=0))

        state = np.vstack(state) #Updated Centroids

        distances = cdist(data, state,'euclidean')
        points = np.array([np.argmin(i) for i in distances])

    return accuracy_score(target, points), state

# check if a point is within the bounds of the search
def in_bounds(point, bounds):
    #print("Point:", point)
    #print("Bounds:", bounds)

    # enumerate all dimensions of the point
    for d in range(len(point)):
        # check if out of bounds for this dimension
        for j in range(len(point[d])):
            if point[d][j] < bounds[j][0] or point[d][j] > bounds[j][1]:
                return False

    return True

# evolution strategy (mu + lambda) algorithm
def es_plus(objective, bounds, n_iter, step_size, mu, lam, centroids, data, target):
    bounds = np.array([[min(data[:,i]),max(data[:,i])] for i in range(data.shape[1])])

    best, best_eval = 0, 0
    # calculate the number of children per parent
    n_children = int(lam / mu)
    # initial population
    population = [[[random.uniform(bounds[i][0], bounds[i][1]) for i in range(data.shape[1])] for _ in range(centroids)] for _ in range(lam)]
    # perform the search

    population = np.array(population)

    for epoch in range(n_iter):
        # evaluate fitness for the population
        scores = np.array([objective(c, data, target) for c in population])
        # rank scores in ascending order
        ranks = argsort(argsort(scores[:, 0]))[::-1]
        # select the indexes for the top mu ranked solutions
        selected = [i for i,_ in enumerate(ranks) if ranks[i] < mu]

        # create children from parents
        children = list()

        for i in selected:
            # check if this parent is the best solution ever seen
            if scores[i][0] > best_eval:
                best, best_eval = scores[i][1], scores[i][0]
            #  print('%d, Best: f(%s) = %.5f' % (epoch, best, best_eval))
            # keep the parent
            children.append(scores[i][1])
            # create children for parent
            for _ in range(n_children):
                child = None
                while child is None or not in_bounds(child, bounds):
                    child = scores[i][1] + randn(np.array(scores[i][1]).shape[0], np.array(scores[i][1]).shape[1]) * step_size
                children.append(child)
        # replace population with children
        population = np.array(children)

    return [best, best_eval]

# seed the pseudorandom number generator
seed(1)
# define range for input
bounds = asarray([0, 8.0])
# define the total iterations
n_iter = 100
# define the maximum step size
step_size = 0.15
# number of parents selected
mu = 20
# the number of children generated by parents
lam = 100

In [15]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split

labelencoder = LabelEncoder()

def Start_Iris(b_ES = True):
    data = pd.read_csv('databases/iris.data', names = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
    target = labelencoder.fit_transform(data['class'].values)
    data = data.drop('class', axis = 1).values

    scores = []
    vectors = []

    centroids = len(set(target))

    if(b_ES):
        for i in range(10):
            best, score = es_plus(objective, bounds, n_iter, step_size, mu, lam, centroids, data, target)
            scores.append(score)
            vectors.append(best)

    else:
        for i in range(10):
            solution = differential_evolution(pop_size, iter, F, cr, data, target, centroids)
            scores.append(solution[1])
            vectors.append(solution[0])

    scores = np.array(scores)

    print("\n Iris \n")
    print("Mean: ", scores.mean())
    print("Standard Deviation: ", scores.std())
    print("Min: ", scores.min())
    print("Max: ", scores.max())
    print("Best vector", vectors[scores.argmax()])

def Start_Wine(b_ES = True):
    data = pd.read_csv('databases/wine.data', names = ['class', 'alcohol', 'malic acid', 'ash', 'alcalinity of ash', 'magnesium', 'total phenols', 'flavanoids', 'nonflavanoid phenols', 'proanthocyanins', 'color intensity', 'hue', 'diluted', 'proline'])

    target = data['class'].values
    data_drop = data.drop('class',axis=1)
    data = data_drop.values

    sc = StandardScaler()
    data = sc.fit_transform(data)

    scores = []
    vectors = []

    centroids  = len(set(target))

    if(b_ES):
        for i in range(10):
            best, score = es_plus(objective, bounds, n_iter, step_size, mu, lam, centroids, data, target)
            scores.append(score)
            vectors.append(best)
    else:
        for i in range(10):
            solution = differential_evolution(pop_size, iter, F, cr, data, target, centroids)
            scores.append(solution[1])
            vectors.append(solution[0])

    scores = np.array(scores)

    print("\n Wine \n")
    print("Mean: ", scores.mean())
    print("Standard Deviation: ", scores.std())
    print("Min: ", scores.min())
    print("Max: ", scores.max())
    print("Best vector", vectors[scores.argmax()])

def Start_Breast_Cancer(b_ES = True):
    data = pd.read_csv('databases/breast-cancer.data', names = ['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
                                                                'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
                                                                'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
                                                                'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
                                                                'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
                                                                'fractal_dimension_se', 'radius_worst', 'texture_worst',
                                                                'perimeter_worst', 'area_worst', 'smoothness_worst',
                                                                'compactness_worst', 'concavity_worst', 'concave points_worst',
                                                                'symmetry_worst', 'fractal_dimension_worst'])

    data = data.drop('id',axis=1)

    data['diagnosis'] = data['diagnosis'].map({'M':1,'B':0})

    datas = pd.DataFrame(preprocessing.scale(data.iloc[:,1:31]))
    datas.columns = list(data.iloc[:,1:31].columns)
    target = data['diagnosis']
    data = datas.values

    scores = []
    vectors = []

    centroids = len(set(target))

    if(b_ES):
        for i in range(10):
            best, score = es_plus(objective, bounds, n_iter, step_size, mu, lam, centroids, data, target)
            scores.append(score)
            vectors.append(best)
    else:
        for i in range(10):
            solution = differential_evolution(pop_size, iter, F, cr, data, target, centroids)
            scores.append(solution[1])
            vectors.append(solution[0])

    scores = np.array(scores)

    print("\n Breast Cancer \n")
    print("Mean: ", scores.mean())
    print("Standard Deviation: ", scores.std())
    print("Min: ", scores.min())
    print("Max: ", scores.max())
    print("Best vector", vectors[scores.argmax()])


In [16]:
# Fazer o grafico de fitness

Start_Iris(False)
Start_Wine(False)
Start_Breast_Cancer(False)

Start_Iris()
Start_Wine()
Start_Breast_Cancer()


 Iris 

Mean:  0.916
Standard Deviation:  0.005333333333333367
Min:  0.9066666666666666
Max:  0.92
Best vector [[5.006      3.418      1.464      0.244     ]
 [5.885      2.74       4.37666667 1.41833333]
 [6.8275     3.07       5.7        2.0625    ]]

 Wine 

Mean:  0.702247191011236
Standard Deviation:  0.0
Min:  0.702247191011236
Max:  0.702247191011236
Best vector [[-0.33349644  0.30017529 -0.09537268  0.45322359 -0.32993388 -0.73111596
  -0.76399035  0.5491078  -0.58870948  0.06870293 -0.47300399 -0.64699538
  -0.5877751 ]
 [ 0.3731984  -0.33591045  0.10672657 -0.50717878  0.36921172  0.81815357
   0.85494159 -0.61447777  0.65879394 -0.07688185  0.52931399  0.72401864
   0.65774832]
 [ 2.22848131  1.85092841 -2.09940778 -0.0317066   4.28821918  2.48225422
   1.36503869 -1.55821276 -0.47920888  0.75260239  1.12407707 -1.83813446
   2.86261373]]

 Breast Cancer 

Mean:  0.9291739894551846
Standard Deviation:  0.014892931671269129
Min:  0.9156414762741653
Max:  0.9578207381370826
B