This notebook contains the code needed to process the data which tracks the POIS and diversity, as generated by the SOS framework
This code is part of the paper "The Importance of Being Restrained" DETAILS + CITATION INFO TO ADD HERE

In [1]:
import numpy as np
import pickle
import pandas as pd
from functools import partial
import glob 

import seaborn as sbs
import matplotlib.pyplot as plt

from scipy.stats import kendalltau, rankdata

font = {'size'   : 20}

plt.rc('font', **font)

In [None]:
base_folder = "/mnt/e/Research/DE/"

In [None]:
output_location = "Datatables/"

In [None]:
def get_merged_dt(cross, sdis, F, CR, popsize):
    dt_large = pd.DataFrame()
    files = glob.glob(f"{base_folder}Runs_only/DEro{cross}{sdis}p{popsize}D30*F{F}Cr{CR}.txt")
    for f in files:
        dt_temp = pd.read_csv(f, sep=' ', header=None, skiprows=1)
        dt_large = dt_large.append(dt_temp)
    dt_large['cross'] = cross
    dt_large['sdis'] = sdis
    dt_large['F'] = F
    dt_large['CR'] = CR
    dt_large['popsize'] = popsize
    return dt_large

In [None]:
def get_full_dt():
    dt_full = pd.DataFrame()
    for cross in ['b','e']:
        for sdis in ['c', 'h', 'm', 's', 't', 'u']:
            for CR in ['005', '099']:
                for F in ['0916', '005']:
                    for popsize in [5,20,100]:
                        dt_temp = get_merged_dt(cross, sdis, F, CR, popsize)
                        dt_full = dt_full.append(dt_temp)
    return dt_full

In [None]:
def get_merged_dt_v2(cross, sdis, F, CR, popsize):
    dt_large = pd.DataFrame()
    files = glob.glob(f"{base_folder}CosineSimilarity-MoreData/CosineSimilarity-MoreData/7/DEro{cross}{sdis}p{popsize}D30*F{F}Cr{CR}.txt")
    if len(files) == 0:
        return dt_large
    for f in files:
        dt_temp = pd.read_csv(f, sep=' ', header=None, skiprows=1)
        dt_large = dt_large.append(dt_temp)
    dt_large['cross'] = cross
    dt_large['sdis'] = sdis
    dt_large['F'] = F
    dt_large['CR'] = CR
    dt_large['popsize'] = popsize
    dt_large.columns = ['cosine', 'applied', 'accept', 'cross', 'sdis', 'F', 'CR', 'popsize']
    return dt_large

In [None]:
for cross in ['b','e']:
    for sdis in ['c', 'h', 'm', 's', 't', 'u']:
        for CR in ['005','0285','052','0755','099']:
            for F in ['005','0285','052','0755','099']:
                for popsize in [5,20, 100]:
                    dt = get_merged_dt_v2(cross, sdis, F, CR, popsize)
                    dt.to_csv(f"{output_location}DEro{cross}_{sdis}_p{popsize}_F{F}CR{CR}_cosine.csv")

In [None]:
def get_merged_dt_v3(sdis, F, CR, popsize):
    dt_large = pd.DataFrame()
    files = glob.glob(f"{base_folder}CosineSimilarity-LookingCloser/7/DErob{sdis}p{popsize}D30*F{F}Cr{CR}.txt")
    if len(files) == 0:
        return dt_large
    for f in files:
        dt_temp = pd.read_csv(f, sep=' ', header=None, skiprows=1)
        dt_large = dt_large.append(dt_temp)
    dt_large['sdis'] = sdis
    dt_large['F'] = F
    dt_large['CR'] = CR
    dt_large['popsize'] = popsize
    dt_large.columns = ['cosine', 'nr_mut', 'nr_exceed', 'accept', 'sdis', 'F', 'CR', 'popsize']
    return dt_large

In [None]:
for sdis in ['c', 'h', 'm', 's', 't', 'u']:
    for popsize in [5, 20, 100]:
        for idx_0, F in enumerate(['099','0755','052','0285','005']):
            for idx_1, CR in enumerate(['0041','0081','0121','0161','0201']):
                dt = get_merged_dt_v3(sdis, F, CR, popsize)
                dt.to_csv(f"{output_location}DE_{sdis}_p{popsize}_F{F}CR{CR}_cosine_v3.csv")

In [None]:
def get_merged_dt_v4(sdis, F, CR, popsize):
    dt_large = pd.DataFrame()
    files = glob.glob(f"{base_folder}Div_cos_sim/CosineSimilarity/7/DErob{sdis}p{popsize}D30f0*_F{F}Cr{CR}.txt")
    if len(files) == 0:
        return dt_large
    for f in files:
        dt_temp = pd.read_csv(f, sep=' ', header=None, skiprows=1)
        dt_large = dt_large.append(dt_temp)
    dt_large['sdis'] = sdis
    dt_large['F'] = F
    dt_large['CR'] = CR
    dt_large['popsize'] = popsize
    dt_large.columns = ['cosine', 'nr_mut', 'nr_exceed', 'accept', 'sdis', 'F', 'CR', 'popsize']
    return dt_large

In [None]:
for F in ['0285', '099', '052', '005']: #'0755', 
    for CR in ['0755', '0285', '099', '052', '005', '00891', '01283', '01675', '02067', '02458']:
        for popsize in [5, 20, 100]:
            for sdis in  ['t', 'h', 'm', 's', 'c', 'u']:
                dt = get_merged_dt_v4(sdis, F, CR, popsize)
                dt.to_csv(f"{output_location}DE_{sdis}_p{popsize}_F{F}CR{CR}_cosine_v4.csv")

In [None]:
def get_diversity_dt(sdis, F, CR, popsize):
    dt_large = pd.DataFrame()
    files = glob.glob(f"/mnt/e/Research/DE/Div_cos_sim/CosineSimilarity/7/Diversity-DErob{sdis}p{popsize}D30f0*_F{F}Cr{CR}.txt")
    if len(files) == 0:
        return dt_large
    for f in files:
        dt_temp = pd.read_csv(f, sep=' ', header=None, skiprows=1)
        dt_large = dt_large.append(dt_temp)
    dt_large['sdis'] = sdis
    dt_large['F'] = F
    dt_large['CR'] = CR
    dt_large['popsize'] = popsize
    dt_large.columns = ['div0', 'div1', 'sdis', 'F', 'CR', 'popsize']
    return dt_large

In [None]:
for F in ['0285', '099', '052', '005']: #'0755', 
    for CR in ['0755', '0285', '099', '052', '005', '00891', '01283', '01675', '02067', '02458']:
        for popsize in [5, 20, 100]:
            for sdis in  ['t', 'h', 'm', 's', 'c', 'u']:
                dt = get_diversity_dt(sdis, F, CR, popsize)
                dt.to_csv(f"{output_location}DE_{sdis}_p{popsize}_F{F}CR{CR}_diversity.csv")