# Preliminaries

Importing python packages and setting display parameters

In [1]:
import numpy as np
import pandas as pd
import itertools as it
import scipy.stats as stats

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

from thesis_EAfunc import *
from thesis_visfunc import *

In [2]:
plt.style.use('bmh')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

pd.set_option('display.latex.repr', True)
pd.set_option('display.latex.longtable', True)

# Fitness Landscape Definition

In [3]:
#Problem domain
x_min = -15
x_max = 15
y_min = -15
y_max = 15

#Known minimum
x_point = -6.01717
y_point = 9.06022

domain = (x_min, x_max, y_min, y_max)
point = (x_point, y_point)
img_size = (8.5, 4.25)

#Problem definition
import math
def g(x,y):
    mag = np.sqrt(x**2. + y**2.)
    return -(50.*np.sinc(mag/np.pi) - mag)

def f(x, y):
    x_min = -6.01717
    y_min = 9.06022
    f_min = g(x_min+11., y_min+9.) + g(x_min-11., y_min-3.) + g(x_min+6., y_min-9.)
    tripsinc = g(x+11., y+9.) + g(x-11., y-3.) + g(x+6., y-9.) - (f_min)
    return tripsinc

In [4]:
#Testing the minimum
print(f(-6.01717,9.06022))

0.0


In [5]:
#Testing the function
print(f(-1.,-1.), f(-11.,-9.), f(11.,3.), f(-6.,9.))

50.62059878583003 5.177364279021976 6.107247239602234 0.031278340140559635


# Setting up the experiment
64 Experiments
>L-> In each experiment, one set of parameters is used.
>>L-> 40 Replicates per experiment.
>>>L-> Each replicate is different due to randomness effects.

In [6]:
#starting seed
np.random.seed(42)

## Initializing data storage

In [7]:
mult_fit_cols = ['exp'] + ['pop_s'] + ['b'] + ['mut_p'] + ['mut_s'] + ['p_sel'] + ['s_sel'] + ['run', 'generation', 'fitness_min', 'fitness_max', 'fitness_mean', 'fitness_std']
multi_fit = pd.DataFrame(columns=mult_fit_cols)
multi_fit = multi_fit.infer_objects()

## Parameter space for the experiment

### Initializing

In [8]:
#Algorithm parameters
## Number of replicates, and generations per experiment
rep_n = 30
gen_f = 200

## Population size
pop_s = [40, 160]

## Parent subpopulation's selection method and size
par_selection = ['uniform','tournament_k3']
b = [0.5, 5]
par_s = [z*y for z in pop_s for y in b]

## Progeny subpopulation's size
prog_s = par_s

### Crossover Method
crossover = 'uniform'
### Mutation method, probability and size
mutation = 'random_all_gau_dis'
mut_p = [0.1, 0.5]
mut_s = [2.5, 7.5]

## New population selection method
sur_selection = ['uniform','tournament_k3']

### 2-Level Factors encoded values

In [9]:
inputs_labels = {'pop_s' : 'Population size',
                 'b'     : 'Progeny-to-population ratio',
                 'mut_p' : 'Mutation Probability',
                 'mut_s' : 'Mutation size',
                 'p_sel' : 'Parent selection',
                 's_sel' : 'Survivor selection method'
                }

dat = [('pop_s',  40, 160, -1, 1, 'Numerical'),
       ('b'    , 0.5, 5, -1, 1, 'Numerical'),
       ('mut_p', 0.1, 0.5, -1, 1, 'Numerical (<1)'),
       ('mut_s', 2.5, 7.5, -1, 1, 'Numerical'),
       ('p_sel', 'uniform', 'tournament k3', -1, 1, 'Categorical'),
       ('s_sel', 'uniform', 'tournament k3', -1, 1, 'Categorical')
      ]

inputs_df = pd.DataFrame(dat,columns=['index', 'Value_low', 'Value_high', 'encoded_low', 'encoded_high', 'Variable type'])
inputs_df = inputs_df.set_index(['index'])
inputs_df['Label'] = inputs_df.index.map( lambda z : inputs_labels[z] )
inputs_df = inputs_df[['Label', 'Variable type', 'Value_low', 'Value_high', 'encoded_low', 'encoded_high' ]]

inputs_df

Unnamed: 0_level_0,Label,Variable type,Value_low,Value_high,encoded_low,encoded_high
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
pop_s,Population size,Numerical,40,160,-1,1
b,Progeny-to-population ratio,Numerical,0.5,5,-1,1
mut_p,Mutation Probability,Numerical (<1),0.1,0.5,-1,1
mut_s,Mutation size,Numerical,2.5,7.5,-1,1
p_sel,Parent selection,Categorical,uniform,tournament k3,-1,1
s_sel,Survivor selection method,Categorical,uniform,tournament k3,-1,1


### Combining the 2-level Factors

We create a list with all the possible combinations of the 2-level factors

In [10]:
exp_par = list(it.product(pop_s, b, mut_p, mut_s, par_selection, sur_selection))
print('Cantidad de combinaciones de parametros en "exp_par" :'+str(len(exp_par)))
print()
print('Primera y última combinación de parametros en "exp_par":')
print('Secuencia (pop_s, b, mut_p, mut_s, p_sel, s_sel)')
print(exp_par[0])
print(exp_par[63])

Cantidad de combinaciones de parametros en "exp_par" :64

Primera y última combinación de parametros en "exp_par":
Secuencia (pop_s, b, mut_p, mut_s, p_sel, s_sel)
(40, 0.5, 0.1, 2.5, 'uniform', 'uniform')
(160, 5, 0.5, 7.5, 'tournament_k3', 'tournament_k3')


# Experiment execution

In [11]:
%%time
exp_n = 1
for (zz, yy, xx, vv, uu, tt) in exp_par:
    sur_selection = tt
    par_selection = uu
    mut_s = vv
    mut_p = xx
    b = yy
    pop_s = zz
    prog_s = int(b * pop_s)
    par_s = prog_s
    
    fitness_res = EA_exp_only_fitness(rep_n, gen_f, f, domain, pop_s, par_s, prog_s, mut_p, mut_s, par_selection, crossover, mutation, sur_selection)
    
    fitness_res.insert(0, 's_sel', tt)
    fitness_res.insert(0, 'p_sel', uu)
    fitness_res.insert(0, 'mut_s', vv)
    fitness_res.insert(0, 'mut_p', xx)
    fitness_res.insert(0, 'b', yy)
    fitness_res.insert(0, 'pop_s', zz)
    fitness_res.insert(0, 'exp', exp_n)
    multi_fit = multi_fit.append(fitness_res, ignore_index=True, sort=False)
    multi_fit = multi_fit.infer_objects()
    
    exp_n += 1

CPU times: user 2h 13min 24s, sys: 35.9 s, total: 2h 14min
Wall time: 2h 14min 9s


## Data storage

Writing the Data Frame to a pickle file

In [12]:
multi_fit.to_pickle('./TEST_B_2L_FitData.gz', compression='gzip')

Reading the Data Frame from a pickle file

In [33]:
multi_fit = pd.read_pickle('./TEST_B_2L_FitData.gz', compression='gzip')

# Processing data for DOE Analysis

In [35]:
multi_fit.tail()

Unnamed: 0,exp,pop_s,b,mut_p,mut_s,p_sel,s_sel,run,generation,fitness_min,fitness_max,fitness_mean,fitness_std
232315,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,116,7.193797,71.236066,45.619641,14.213987
232316,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,117,11.467743,73.680178,44.388044,15.458068
232317,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,118,7.543423,64.100916,39.732343,17.950929
232318,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,119,7.100332,66.17139,42.403105,17.924494
232319,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,120,7.100332,61.400337,41.825771,16.10021


In [23]:
# Storing the latest generation's population of each replicate
query = (multi_fit['generation']==gen_f)
multi_final_fitness_res = multi_fit[query]

# Reordering columns
multi_final_fitness_res.drop(['exp', 'generation'], axis=1, inplace=True)
multi_final_fitness_res.columns = ['pop_s', 'b', 'mut_p', 'mut_s', 'p_sel', 's_sel', 'run', 'f_min', 'f_max', 'f_mean', 'f_std']
multi_final_fitness_res = multi_final_fitness_res[['run', 'pop_s', 'b', 'mut_p', 'mut_s', 'p_sel', 's_sel', 'f_min', 'f_max', 'f_mean', 'f_std']]

# Encoding values for DOE's Factos
multi_final_fitness_res['pop_s'] = multi_final_fitness_res['pop_s'].replace([10, 30], [-1, 1]).infer_objects()
multi_final_fitness_res['b'] = multi_final_fitness_res['b'].replace([.5, 5], [-1, 1]).infer_objects()
multi_final_fitness_res['mut_p'] = multi_final_fitness_res['mut_p'].replace([.1, .9], [-1, 1]).infer_objects()
multi_final_fitness_res['mut_s'] = multi_final_fitness_res['mut_s'].replace([.1, 10], [-1, 1]).infer_objects()
multi_final_fitness_res['p_sel'] = multi_final_fitness_res['p_sel'].replace(['fitness_proportional_selection','tournament_k3'], [-1, 1]).infer_objects()
multi_final_fitness_res['s_sel'] = multi_final_fitness_res['s_sel'].replace(['fitness_proportional_selection','tournament_k3'], [-1, 1]).infer_objects()
multi_final_fitness_res = multi_final_fitness_res.set_index('run')
multi_final_fitness_res.head()

Unnamed: 0_level_0,pop_s,b,mut_p,mut_s,p_sel,s_sel,f_min,f_max,f_mean,f_std
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,-1,-1.0,-1.0,-1.0,-1,-1,5.14329,5.14329,5.14329,0.0
1,-1,-1.0,-1.0,-1.0,-1,-1,42.836483,42.959203,42.898512,0.04453
2,-1,-1.0,-1.0,-1.0,-1,-1,45.071924,45.071924,45.071924,0.0
3,-1,-1.0,-1.0,-1.0,-1,-1,0.024066,0.024066,0.024066,0.0
4,-1,-1.0,-1.0,-1.0,-1,-1,17.900133,24.086404,18.51876,1.956271


In [25]:
inputs_df.to_pickle('./TEST_B_DOE_data.gz', compression='gzip')
multi_final_fitness_res.to_pickle('./TEST_B_DOE_code.gz', compression='gzip')