In [2]:

import pandas as pd 
import numpy as np
import pickle
import os

In [2]:
def create_processed_landscape_data():
    for dim in [5, 30]:
        df_ELA = pd.read_csv("./data/raw_data/ELA_data/ELA_Sobol_100D_"+str(dim)+"dim.csv")
        df_ELA.drop(df_ELA.columns[len(df_ELA.columns)-1], axis=1, inplace=True)
        ela_features = np.unique(df_ELA['ela_feature_class'])
        problems = np.array(np.unique(df_ELA['problem_instance']))
        data_desc =np.zeros((len(problems), len(ela_features)))
        column_names = np.append(["id", "function", "instance"],ela_features)
        functions = np.array([problem.split("_")[3] for problem in problems])
        instances = np.array([problem.split("_")[5] for problem in problems])
        data_desc = np.concatenate((problems.reshape(len(problems),1), 
        functions.reshape(len(problems),1),
        instances.reshape(len(problems), 1), data_desc), axis = 1)
        for index, row in df_ELA.iterrows():
            index_x = problems.tolist().index(row[0])
            index_y = ela_features.tolist().index(row[1])+3
            data_desc[index_x][index_y] = row[2]
        df_descriptive = pd.DataFrame(data_desc, columns=column_names)
        df_descriptive.index = df_descriptive['function']+"_"+ df_descriptive['instance']
        df_descriptive.columns = [column.split("/")[-1] for column in df_descriptive.columns]
        df_descriptive.drop(['id', 'function', 'instance'], inplace=True, axis=1)
        df_descriptive.to_csv(f"./data/landscape_data/ELA_{dim}D.csv")
   
create_processed_landscape_data()

In [None]:
def create_processed_performance_data_modCMA(log_mode):
    modAlgo = 'modCMA'
    for dim in [5, 30]:
        for file in os.listdir(f"./data/raw_data/performance_{modAlgo}"):
            if not file.endswith(str(dim)+"D.rds.csv"): continue
            print(file)
            file_path = os.path.join(f"./data/raw_data/performance_{modAlgo}", file)
            df_conf = pd.read_csv(file_path, index_col=0)
            for budget in [50*dim, 100*dim, 300*dim, 500*dim, 1000*dim, 1500*dim]: 
                df_conf_budget = df_conf[df_conf['runtime']==budget]
                targets = []
                targets_index = []
                for fid in range(1,25):
                    df_conf_fid = df_conf_budget[df_conf_budget['funcId']==fid]
                    for iid in range(1, 6):
                        df_conf_iid = df_conf_fid[df_conf_fid['instance']==iid]
                        nt = np.median(df_conf_iid['f(x)'])
                        if log_mode == 'log':
                            targets.append(np.log10(10**(-8)) if nt <= 10**(-8) else np.log10(nt))
                        else:
                            targets.append(nt) 
                        targets_index.append(f'{fid}_{iid}')
                df_processed = pd.DataFrame(targets, columns=['target'], index=targets_index)
                df_processed.to_csv(f"./data/performance_data/{modAlgo}/{log_mode}/budget_"+str(budget)+"_"+file.replace(".rds", ""))
               

create_processed_performance_data_modCMA(log_mode='unlog')
create_processed_performance_data_modCMA(log_mode='log')

In [None]:
# "","DIM","funcId","ID","runtime","run.1","run.2","run.3","run.4","run.5","run.6","run.7","run.8","run.9","run.10","algid"
import pandas as pd
import numpy as np

def create_processed_performance_data_modCMA(log_mode):
    modAlgo = 'modCMA'
    for dim in [5, 30]:
        for file in os.listdir(f"./data/raw_data/performance_{modAlgo}"):
            if not file.endswith(str(dim)+"D.rds.csv"): continue
            # print(file)
            conf_id = int(file.split("_")[1])
            file_path = os.path.join(f"./data/raw_data/performance_{modAlgo}", file)
            df_conf = pd.read_csv(file_path, index_col=0)
            for budget in [50*dim, 100*dim, 300*dim, 500*dim, 1000*dim, 1500*dim]: 
                df_conf_budget = df_conf[df_conf['runtime']==budget]
                data = []
                targets_index = []
                for fid in range(1,25):
                    df_conf_fid = df_conf_budget[df_conf_budget['funcId']==fid]
                    for iid in range(1, 6):
                        df_conf_iid = df_conf_fid[df_conf_fid['instance']==iid]['f(x)']
                        if log_mode == 'log':
                            df_conf_iid = [np.log10(10**(-8)) if i <= 10**(-8) else np.log10(i) for i in df_conf_iid] 
                        arr = [dim, fid, iid, budget]
                        arr.extend(df_conf_iid)
                        arr.append(conf_id)
                        data.append(arr)
                    
                df_processed = pd.DataFrame(data, columns=["DIM","funcId","ID","runtime","run.1","run.2","run.3","run.4","run.5","run.6","run.7","run.8","run.9","run.10","algid"])
                df_processed.to_csv(f"./data/performance_data_stat_tests/{modAlgo}/{log_mode}/budget_"+str(budget)+"_"+file.replace(".rds", ""))
               

create_processed_performance_data_modCMA(log_mode='unlog')
create_processed_performance_data_modCMA(log_mode='log')

In [5]:
def create_processed_performance_data_modDE(log_mode):
    modAlgo = 'modDE'
    for dim in [5, 30]:
        for file in os.listdir(f"./data/raw_data/performance_{modAlgo}"):
            if not file.endswith(str(dim)+"D.csv"): continue
            # print(file)
            conf_id = int(file.split("_")[1])
            file_path = os.path.join(f"./data/raw_data/performance_{modAlgo}", file)
            file_path_i5 = os.path.join(f"./data/raw_data/performance_{modAlgo}", file.split(".")[0]+"_I5.csv")
            df_conf = pd.read_csv(file_path, index_col=0)
            df_conf_i5 = pd.read_csv(file_path_i5, index_col=0)
            df_conf = pd.concat([df_conf, df_conf_i5])
            for budget in [50*dim, 100*dim, 300*dim, 500*dim, 1000*dim, 1500*dim]: 
                df_conf_budget = df_conf[df_conf['runtime']==budget]
                data = []
                targets_index = []
                for fid in range(1,25):
                    df_conf_fid = df_conf_budget[df_conf_budget['funcId']==fid]
                    for iid in range(1, 6):
                        df_conf_iid = df_conf_fid[df_conf_fid['instance']==iid][0:10]['f(x)']
                        if log_mode == 'log':
                            df_conf_iid = [np.log10(10**(-8)) if i <= 10**(-8) else np.log10(i) for i in df_conf_iid] 
                        arr = [dim, fid, iid, budget]
                        arr.extend(df_conf_iid)
                        arr.append(conf_id)
                        data.append(arr)
                    
                df_processed = pd.DataFrame(data, columns=["DIM","funcId","ID","runtime","run.1","run.2","run.3","run.4","run.5","run.6","run.7","run.8","run.9","run.10","algid"])
                df_processed.to_csv(f"./data/performance_data_stat_tests/{modAlgo}/{log_mode}/budget_"+str(budget)+"_"+file.replace(".rds", ""))
create_processed_performance_data_modDE(log_mode='unlog')
create_processed_performance_data_modDE(log_mode='log')

In [6]:
def create_processed_performance_data_modDE(log_mode):
    modAlgo = 'modDE'
    for dim in [5, 30]:
        for file in os.listdir(f"./data/raw_data/performance_{modAlgo}"):
            if not file.endswith(str(dim)+"D.csv"): continue
            # print(file)
            file_path = os.path.join(f"./data/raw_data/performance_{modAlgo}", file)
            file_path_i5 = os.path.join(f"./data/raw_data/performance_{modAlgo}", file.split(".")[0]+"_I5.csv")
            df_conf = pd.read_csv(file_path, index_col=0)
            df_conf_i5 = pd.read_csv(file_path_i5, index_col=0)
            df_conf = pd.concat([df_conf, df_conf_i5])
            for budget in [50*dim, 100*dim, 300*dim, 500*dim, 1000*dim, 1500*dim]: 
                df_conf_budget = df_conf[df_conf['runtime']==budget]
                targets = []
                targets_index = []
                for fid in range(1,25):
                    df_conf_fid = df_conf_budget[df_conf_budget['funcId']==fid]
                    for iid in range(1, 6):
                        df_conf_iid = df_conf_fid[df_conf_fid['instance']==iid][0:10]
                        nt = np.median(df_conf_iid['f(x)'])
                        if log_mode == 'log':
                            targets.append(np.log10(10**(-8)) if nt <= 10**(-8) else np.log10(nt))
                        else:
                            targets.append(nt) 
                        targets_index.append(f'{fid}_{iid}')
                df_processed = pd.DataFrame(targets, columns=['target'], index=targets_index)
                df_processed.to_csv(f"./data/performance_data/{modAlgo}/{log_mode}/budget_"+str(budget)+"_"+file.replace(".rds", ""))
create_processed_performance_data_modDE(log_mode='unlog')
create_processed_performance_data_modDE(log_mode='log')