# 2-Level 6-Factor Full Factorial (With 30 replicates)
# (TESTCASE A)
# Generating Data

<br />

### Table of Contents
* [Preliminaries](#Preliminaries)
 * Importing Python packages
 * [Define Landscape](#Landscape)
* [Running the EA](#EA)
 * [Setting the data storage](#EA_DaSt)
 * [Setting parameter space](#EA_PaSp)
  * [2-Level Factors encoded values](#EA_DaCo)
 * [Iterative experiment](#EA_ItEx)
* [Processing data for DOE Analysis](#Data)
<br />

# Preliminaries <a name="Preliminaries"></a>
## Importing packages

In [1]:
import numpy as np
import pandas as pd
import itertools as it

import scipy.stats as stats

import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('bmh')
%matplotlib inline

from thesis_EAfunc import *
from thesis_visfunc import *

In [2]:
import datetime
time_a = datetime.datetime.now()
print(time_a)

2019-05-22 12:37:12.979952


## Define Landscape <a name="Landscape"></a>

In [3]:
#Problem domain
x_min = -15
x_max = 15
y_min = -15
y_max = 15

#Known minimum
x_point = -6.01717
y_point = 9.06022

domain = (x_min, x_max, y_min, y_max)
point = (x_point, y_point)
img_size = (8.5, 4.25)

#Problem definition
import math
def g(x,y):
    mag = np.sqrt(x**2. + y**2.)
    return -(50.*np.sinc(mag/np.pi) - mag)

def f(x, y):
    x_min = -6.01717
    y_min = 9.06022
    f_min = g(x_min+11., y_min+9.) + g(x_min-11., y_min-3.) + g(x_min+6., y_min-9.)
    tripsinc = g(x+11., y+9.) + g(x-11., y-3.) + g(x+6., y-9.) - (f_min)
    return tripsinc

#Testing the minimum
print(f(-1,-1), f(-11,-9), f(11,3), f(-6,9))
print()
print(f(-6.01717, 9.06022))

50.62059878583003 5.177364279021976 6.107247239602234 0.031278340140559635

0.0


# Running the Evolutionary Algorithm <a name="EA"></a>
1 Experiments
L-> 64 Runs in the experiment, each run with a different set of the 5 Factors.
    L-> 30 Replicates per run
        L-> Each replicate is different due to randomness effects.

In [4]:
#starting seed
np.random.seed(42)

## Setting data storage <a name="EA_DaSto"></a>

In [5]:
mult_fit_cols = ['exp'] + ['pop_s'] + ['b'] + ['mut_p'] + ['mut_s'] + ['p_sel'] + ['s_sel'] + ['run', 'generation', 'fitness_min', 'fitness_max', 'fitness_mean', 'fitness_std']
multi_fit = pd.DataFrame(columns=mult_fit_cols)
multi_fit = multi_fit.infer_objects()

multi_gen_cols = ['exp'] + ['pop_s'] + ['b'] + ['mut_p'] + ['mut_s'] + ['p_sel'] + ['s_sel'] + ['run', 'birthdate', 'generation', 'function', 'fitness', 'gen_x', 'gen_y']
multi_gen = pd.DataFrame(columns=multi_gen_cols)
multi_gen = multi_gen.infer_objects()

## Setting parameter space for the experiment <a name="EA_PaSp"></a>

In [6]:
# Algorithm parameters
## Number of experiments, and generations per experiment
run_n = 30
gen_f = 60

## Population size
pop_s = [10, 30]

## Parent subpopulation's selection method and size
par_selection = ['fitness_proportional_selection','tournament_k3']
b = [0.5, 5]
par_s = [z*y for z in pop_s for y in b]

## Progeny subpopulation's size
prog_s = par_s

### Crossover Method
crossover = 'uniform'
### Mutation method, probability and size
mutation = 'random_all_gau_dis'
mut_p = [0.1, 0.9]
mut_s = [.1, 10]

## New population selection method
sur_selection = ['fitness_proportional_selection','tournament_k3']

### 2-Level Factors encoded values <a name="EA_FaCo"></a>

In [24]:
inputs_labels = {'pop_s' : 'Population size',
                 'b'     : 'Progeny-to-population ratio',
                 'mut_p' : 'Mutation Probability',
                 'mut_s' : 'Mutation size',
                 'p_sel' : 'Parent selection',
                 's_sel' : 'Survivor selection method'
                }

dat = [('pop_s',  10, 30, -1, 1, 'Numerical'),
       ('b'    , 0.5, 5, -1, 1, 'Numerical'),
       ('mut_p', 0.1, 0.9, -1, 1, 'Numerical (<1)'),
       ('mut_s', 0.1, 10, -1, 1, 'Numerical'),
       ('p_sel', 'fitness proportional', 'tournament k3', -1, 1, 'Categorical'),
       ('s_sel', 'fitness proportional', 'tournament k3', -1, 1, 'Categorical')
      ]

inputs_df = pd.DataFrame(dat,columns=['index', 'Value_low', 'Value_high', 'encoded_low', 'encoded_high', 'Variable type'])
inputs_df = inputs_df.set_index(['index'])
inputs_df['Label'] = inputs_df.index.map( lambda z : inputs_labels[z] )
inputs_df = inputs_df[['Label', 'Variable type', 'Value_low', 'Value_high', 'encoded_low', 'encoded_high' ]]

inputs_df

Unnamed: 0_level_0,Label,Variable type,Value_low,Value_high,encoded_low,encoded_high
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
pop_s,Population size,Numerical,10,30,-1,1
b,Progeny-to-population ratio,Numerical,0.5,5,-1,1
mut_p,Mutation Probability,Numerical (<1),0.1,0.9,-1,1
mut_s,Mutation size,Numerical,0.1,10,-1,1
p_sel,Parent selection,Categorical,fitness proportional,tournament k3,-1,1
s_sel,Survivor selection method,Categorical,fitness proportional,tournament k3,-1,1


## Iterative experiment <a name="EA_ItEx"></a>

In [7]:
print()
exp_par = list(it.product(pop_s, b, mut_p, mut_s, par_selection, sur_selection))
len(exp_par)




64

In [16]:
%%time
exp_n = 1
for (zz, yy, xx, vv, uu, tt) in exp_par:
    sur_selection = tt
    par_selection = uu
    mut_s = vv
    mut_p = xx
    b = yy
    pop_s = zz
    prog_s = int(b * pop_s)
    par_s = prog_s
    
    genera_res, fitness_res = EA_exp(run_n, gen_f, f, domain, pop_s, par_s, prog_s, mut_p, mut_s, par_selection, crossover, mutation, sur_selection)
    
    fitness_res.insert(0, 's_sel', tt)
    fitness_res.insert(0, 'p_sel', uu)
    fitness_res.insert(0, 'mut_s', vv)
    fitness_res.insert(0, 'mut_p', xx)
    fitness_res.insert(0, 'b', yy)
    fitness_res.insert(0, 'pop_s', zz)
    fitness_res.insert(0, 'exp', exp_n)
    multi_fit = multi_fit.append(fitness_res, ignore_index=True, sort=False)
    multi_fit = multi_fit.infer_objects()
    
    genera_res.insert(0, 's_sel', tt)
    genera_res.insert(0, 'p_sel', uu)
    genera_res.insert(0, 'mut_s', vv)
    genera_res.insert(0, 'mut_p', xx)
    genera_res.insert(0, 'b', yy)
    genera_res.insert(0, 'pop_s', zz)
    genera_res.insert(0, 'exp', exp_n)
    multi_gen = multi_gen.append(genera_res, ignore_index=True, sort=False)
    multi_gen = multi_gen.infer_objects()
        
    exp_n += 1

Wall time: 1h 41min 16s


In [17]:
multi_fit.to_pickle('./EA_B_05_fit.gz', compression='gzip')
multi_gen.to_pickle('./EA_B_05_gen.gz', compression='gzip')

In [8]:
multi_fit = pd.read_pickle('./EA_B_05_fit.gz', compression='gzip')
multi_gen = pd.read_pickle('./EA_B_05_gen.gz', compression='gzip')

In [9]:
time_b = datetime.datetime.now()
print(time_b)
print(time_b-time_a)

2019-05-22 12:37:32.024519
0:00:19.044567


# Processing data for DOE Analysis <a name="Data"></a>

In [10]:
['pop_s'] + ['b'] + ['mut_p'] + ['mut_s'] + ['p_sel'] + ['s_sel']
multi_fit.tail()

Unnamed: 0,exp,pop_s,b,mut_p,mut_s,p_sel,s_sel,run,generation,fitness_min,fitness_max,fitness_mean,fitness_std
117115,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,56.0,3.989594,68.500789,30.678328,21.681171
117116,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,57.0,3.989594,60.166577,40.362543,16.885566
117117,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,58.0,3.989594,64.375162,41.128973,16.680773
117118,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,59.0,3.989594,62.164109,42.597829,16.653509
117119,64,30,5.0,0.9,10.0,tournament_k3,tournament_k3,29,60.0,1.5599,62.524584,38.240462,18.753256


In [23]:
# Storing the latest generation's population of each replicate
query = (multi_fit['generation']==gen_f)
multi_final_fitness_res = multi_fit[query]

# Reordering columns
multi_final_fitness_res.drop(['exp', 'generation'], axis=1, inplace=True)
multi_final_fitness_res.columns = ['pop_s', 'b', 'mut_p', 'mut_s', 'p_sel', 's_sel', 'run', 'f_min', 'f_max', 'f_mean', 'f_std']
multi_final_fitness_res = multi_final_fitness_res[['run', 'pop_s', 'b', 'mut_p', 'mut_s', 'p_sel', 's_sel', 'f_min', 'f_max', 'f_mean', 'f_std']]

# Encoding values for DOE's Factos
multi_final_fitness_res['pop_s'] = multi_final_fitness_res['pop_s'].replace([10, 30], [-1, 1]).infer_objects()
multi_final_fitness_res['b'] = multi_final_fitness_res['b'].replace([.5, 5], [-1, 1]).infer_objects()
multi_final_fitness_res['mut_p'] = multi_final_fitness_res['mut_p'].replace([.1, .9], [-1, 1]).infer_objects()
multi_final_fitness_res['mut_s'] = multi_final_fitness_res['mut_s'].replace([.1, 10], [-1, 1]).infer_objects()
multi_final_fitness_res['p_sel'] = multi_final_fitness_res['p_sel'].replace(['fitness_proportional_selection','tournament_k3'], [-1, 1]).infer_objects()
multi_final_fitness_res['s_sel'] = multi_final_fitness_res['s_sel'].replace(['fitness_proportional_selection','tournament_k3'], [-1, 1]).infer_objects()
multi_final_fitness_res = multi_final_fitness_res.set_index('run')
multi_final_fitness_res.head()

Unnamed: 0_level_0,pop_s,b,mut_p,mut_s,p_sel,s_sel,f_min,f_max,f_mean,f_std
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,-1,-1.0,-1.0,-1.0,-1,-1,5.14329,5.14329,5.14329,0.0
1,-1,-1.0,-1.0,-1.0,-1,-1,42.836483,42.959203,42.898512,0.04453
2,-1,-1.0,-1.0,-1.0,-1,-1,45.071924,45.071924,45.071924,0.0
3,-1,-1.0,-1.0,-1.0,-1,-1,0.024066,0.024066,0.024066,0.0
4,-1,-1.0,-1.0,-1.0,-1,-1,17.900133,24.086404,18.51876,1.956271


In [25]:
inputs_df.to_pickle('./EA_B_05_code.gz', compression='gzip')
multi_final_fitness_res.to_pickle('./EA_B_05_data.gz', compression='gzip')

In [26]:
time_c = datetime.datetime.now()
print(time_c)
print(time_c-time_a)

2019-05-23 10:43:01.423546
22:05:48.443594
