# Function

In [None]:
from pathlib import Path
import subprocess
import pickle
import numpy as np
import pandas as pd
import time
import multiprocessing
import concurrent.futures
import os
import jax
import jax.numpy as jnp
from evosax import BIPOP_CMA_ES
from datetime import datetime
import shutil


In [None]:
def random_choose_index(n):
    global df_indexes, log_weight
    return np.random.choice(df_indexes, n, p=log_weight, replace=False)

In [None]:
def run_command(command):
    result = subprocess.run(command, capture_output=True)
    assert (
        result.returncode == 0
    ), f"""
command:
{' '.join(command)}
returncode: {result.returncode}
stderr:
{result.stderr.decode()}
stdout:
{result.stdout.decode()}
"""
    return result

In [None]:
def run_a_instance(chain_flags, instance_name, acopp_profit, seed):
    global debug_mode, debug_time, acopp_dir, sol_dir

    command = [
        'python3',
        f'{acopp_dir}/run.py',
        '--acopp_dir',
        str(acopp_dir),
        '--instance_name',
        instance_name,
        '--run_only',
        '--experiment',
        # '--no_log',
        '--sol_dir',
        str(sol_dir),
        '--silent',
        '1',
        "--postfix",
        str(time.time()),
        "--random_seed",
        str(seed),
        
        '--no_default',
        "--chain_flags",
        str(chain_flags),
    ]

    if debug_mode:
        command += ["--time", str(debug_time)]
    
    result = run_command(command)
    stdout_log = result.stdout.decode()
    profit = int(stdout_log)
    
    gain_percent = (profit - acopp_profit) / acopp_profit * 100
    return gain_percent

In [None]:
def to_arr_flag(a_list):
    arr_flag = map(str, a_list)
    arr_flag = ":".join(arr_flag)
    return arr_flag

In [None]:
def make_starting_min_max_values(name, params):
    global index_dict, n_decimal
    min_min_value = globals()["min_min_" + name]
    max_max_value = globals()["max_max_" + name]
    
    starting_value = params[index_dict[name]]
    left_value = params[index_dict[f"left_{name}"]]
    _mid_value = params[index_dict[f"_mid_{name}"]]
    right_value = params[index_dict[f"right_{name}"]]
    
    sum_value = left_value + _mid_value + right_value
    left_value = left_value / sum_value * (max_max_value - min_min_value)
    _mid_value = _mid_value / sum_value * (max_max_value - min_min_value)
    
    min_value = np.round(min_min_value + left_value, n_decimal)
    max_value = np.round(min_value + _mid_value, n_decimal)

    return starting_value, min_value, max_value

In [None]:
def to_chain_flags(params):
    global index_dict, max_max_rho, min_min_rho, max_max_indv_ants, min_min_indv_ants
    global mean_order, std_order, rho_order, indv_ants_order

    pop_size = params[index_dict["pop_size"]]

    rho, min_rho, max_rho = make_starting_min_max_values("rho", params)
    indv_ants, min_indv_ants, max_indv_ants = make_starting_min_max_values("indv_ants", params)

    mean_arr = to_arr_flag([params[index_dict[x]] for x in mean_order])
    std_arr = to_arr_flag([params[index_dict[x]] for x in std_order])

    rho_list = []
    for x in rho_order:
        rho_list.append(locals()[x])
    rho_arr = to_arr_flag(rho_list)
    
    indv_ants_list = []
    for x in indv_ants_order:
        indv_ants_list.append(locals()[x])
    indv_ants_arr = to_arr_flag(indv_ants_list)

    chain_flags = f"--adapt_evap --cmaes --lambda {pop_size} --mean_ary {mean_arr} --std_ary {std_arr} --adpt_rho {rho_arr} --indv_ants {indv_ants_arr}"
    return chain_flags

In [None]:
def make_priority_order(row):
    global min_gain
    if row.gain_percent < 0:
        return row.gain_percent - min_gain + 1
    else:
        return - row.gain_percent
    

def sort_by_priority(df):
    global min_gain
    min_gain = df.gain_percent.min()

    df["priority_order"] = df.apply(make_priority_order, axis=1)
    df.sort_values(by="priority_order", ascending=False, inplace=True)
    df.drop(columns=["priority_order"], inplace=True)


def update_df(picked_idxes, gain_percents):
    assert len(picked_idxes) == len(gain_percents)

    global df

    for i, df_idx in enumerate(picked_idxes):
        df.loc[df_idx, "gain_percent"] = gain_percents[i]
    sort_by_priority(df)


In [None]:
def evaluate_pop(pop_params):
    global n_run_each_trail, executor, eval_call_count, df

    tasks = []
    chain_flagss = []
    for params in pop_params:
        chain_flagss.append(to_chain_flags(params))
    picked_idxes = random_choose_index(n_run_each_trail)

    for df_idx in picked_idxes:
        for _chain_flags in chain_flagss:
            tasks.append((
                _chain_flags,
                df.loc[df_idx].instance,
                df.loc[df_idx].acopp_profit,
                eval_call_count+1,
                ))
            eval_call_count += 1

    gain_percents = executor.map(run_a_instance, *zip(*tasks))
    gain_percents = np.array(list(gain_percents))
    gain_percents = gain_percents.reshape((n_run_each_trail, len(pop_params)))
    
    update_df(picked_idxes, gain_percents.mean(axis=1))

    objective_values = gain_percents
    objective_values[objective_values > 0] = 0
    objective_values = objective_values.T.mean(axis=1)
    fitness = jnp.array(- objective_values)
    
    return fitness

In [None]:
def save_study():
    global state, rng, strategy, es_params
    global eval_call_count, index_dict
    global save_path, backup_dir
    global prev_best, prev_prev_best
    global df, csv_path

    study = {
        "strategy": strategy,
        "es_params": es_params,
        "state": state,
        "rng": rng,

        "index_dict": index_dict,
        
        "prev_prev_best": prev_prev_best,
        "prev_best": prev_best,
        "eval_call_count": eval_call_count,
    }
    if prev_best is not None:
        study["best_chain_flags"] = to_chain_flags(prev_best)
    if prev_prev_best is not None:
        study["second_best_chain_flags"] = to_chain_flags(prev_prev_best)

    with open(save_path, "wb") as f:
        pickle.dump(study, f)

    df.to_csv(csv_path, index=False)

    # Backup
    _now = time.time()
    df.to_csv(backup_dir / f"{_now}.csv", index=False)
    with open(backup_dir / f"{_now}.pkl", "wb") as f:
        pickle.dump(study, f)


In [None]:
def load_study():
    global state, rng, strategy, es_params
    global eval_call_count, index_dict
    global save_path
    global prev_best, prev_prev_best
    global df, csv_path

    with open(save_path, "rb") as f:
        study = pickle.load(f)
    
    state = study["state"]
    rng = study["rng"]
    es_params = study["es_params"]
    strategy = study["strategy"]
    
    index_dict = study["index_dict"]

    prev_best = study["prev_best"]
    prev_prev_best = study["prev_prev_best"]
    eval_call_count = study["eval_call_count"]
    df = pd.read_csv(csv_path)


In [None]:
def modify_pop(pop_params):
    global index_dict, eval_call_count, prev_best, prev_prev_best, n_decimal

    pop_params = np.array(pop_params)
    replace_idx = np.random.choice(np.arange(len(pop_params)), 2, replace=False)
    if prev_best is not None:
        pop_params[replace_idx[0]] = prev_best
    if prev_prev_best is not None:
        pop_params[replace_idx[1]] = prev_prev_best

    pop_params = np.round(pop_params, n_decimal)
    pop_params[:, index_dict["indv_ants"]] = np.round(pop_params[:, index_dict["indv_ants"]])
    pop_params[:, index_dict["pop_size"]] = np.round(pop_params[:, index_dict["pop_size"]])

    return jnp.array(pop_params)

In [None]:
def win_percent():
    global df
    return (df.gain_percent >= 0).sum() / len(df.gain_percent) * 100

In [None]:
def clean():
    global sol_dir

    if os.path.exists("./errcmaes.err"):
        os.remove("./errcmaes.err")
    if os.path.exists("./actparcmaes.par"):
        os.remove("./actparcmaes.par")
    if os.path.exists(sol_dir):
        shutil.rmtree(sol_dir)

In [None]:
def set_init(name, min_value, max_value, mean_value, std_value=None, std_factor=5):
    global clip_min, clip_max, init_min, init_max, sigma_init, index_dict

    if std_value is None:
        std_value = (max_value - min_value) / std_factor

    idx = index_dict[name]
    clip_min[idx] = min_value
    clip_max[idx] = max_value
    init_min[idx] = init_max[idx] = mean_value
    sigma_init[idx] = std_value
        

In [None]:
def set_init_min_max(name, min_value, max_value):
    min_min_value = globals()["min_min_" + name]
    max_max_value = globals()["max_max_" + name]

    min_value = (min_value - min_min_value) / (max_max_value - min_min_value)
    max_value = (max_value - min_min_value) / (max_max_value - min_min_value)
    min_variable_value = 0
    max_variable_value = 1

    set_init(
        name=f"left_{name}",
        min_value= min_variable_value,
        max_value= max_variable_value,
        mean_value= min_value - min_variable_value,
    )

    set_init(
        name=f"_mid_{name}",
        min_value= min_variable_value,
        max_value= max_variable_value,
        mean_value= max_value - min_value,
    )

    set_init(
        name=f"right_{name}",
        min_value= min_variable_value,
        max_value= max_variable_value,
        mean_value= max_variable_value - max_value,
    )

In [None]:
def set_init_with_std(
        name, min_value, max_value,
        mean_value, std_value=None, std_factor=5,
        min_std_value=0.01, max_std_value=None,
        max_std_factor=2, std_std_factor=5,
        ):
    
    if std_value is None:
        std_value = (max_value - min_value) / std_factor
    set_init(
        f"{name}_mean", min_value, max_value,
        mean_value, std_value, std_factor)

    if max_std_value is None:
        max_std_value = (max_value - min_value) / max_std_factor
    set_init(
        name=f"{name}_std",
        min_value=min_std_value,
        max_value=max_std_value,
        mean_value=std_value,
        std_value=None,
        std_factor=std_std_factor,
        )
        

# Config

In [None]:
debug_mode = False
# debug_mode = True
debug_time = 7

n_jobs = max(1, multiprocessing.cpu_count() // 2)
acopp_dir = Path("../")
signal_path = Path("./signal.txt")
sol_dir = Path("/home/user2/temp/evosax_tuning/solutions")
n_decimal = 2

if not debug_mode:
    n_run_each_trail = 10
else:
    n_run_each_trail = 2
    optim_popsize = 4

if not debug_mode:
    experiment_name = "evosax_tuning"
    experiment_dir = Path("/home/user2/experiments") / experiment_name
else:
    experiment_name = "temp_evosax_tuning"
    experiment_dir = Path("./") / experiment_name

csv_path = experiment_dir / "gain_percent.csv"
save_path = experiment_dir / "study.pkl"
backup_dir = experiment_dir / "backup"

# Prepare

In [None]:
os.makedirs(experiment_dir, exist_ok=True)
os.makedirs(sol_dir, exist_ok=True)
os.makedirs(backup_dir, exist_ok=True)
os.makedirs(save_path.parent, exist_ok=True)

In [None]:
# Build
assert os.path.isdir(acopp_dir)
command = [
    'python3',
    f'{acopp_dir}/run.py',
    '--acopp_dir',
    str(acopp_dir),
    '--build_only',
    '--experiment'
    ]
result = run_command(command)
print(result.stdout.decode())

In [None]:
min_min_indv_ants = 2
max_max_indv_ants = 105
min_min_rho = 0.01
max_max_rho = 0.99

In [None]:
mean_order = [
    "alpha_mean",
    "beta_mean",
    "par_a_mean",
    "par_b_mean",
    "par_c_mean",
    "q0_mean",
    "neighbour_mean",
]
std_order = [
    "alpha_std",
    "beta_std",
    "par_a_std",
    "par_b_std",
    "par_c_std",
    "q0_std",
    "neighbour_std",
]
rho_order = [
    "rho",
    "min_rho",
    "max_rho",
]
indv_ants_order = [
    "indv_ants",
    "min_indv_ants",
    "max_indv_ants",
]

In [None]:
df = pd.read_csv(csv_path)
n_instance = len(df.instance)
assert n_instance == 432

In [None]:
df_indexes = np.arange(n_instance)

log_weight = np.log(n_instance + 1) - np.log(df_indexes + 1)
log_weight /= log_weight.sum()

# IMPORTANCE: One time preparation

In [None]:
# variable_list = [
#     "pop_size",

#     "rho",
#     "left_rho",
#     "_mid_rho",
#     "right_rho",

#     "indv_ants",
#     "left_indv_ants",
#     "_mid_indv_ants",
#     "right_indv_ants",

# ] + mean_order + std_order

# index_dict = dict()
# for idx, key in enumerate(variable_list):
#     index_dict[key] = idx

# dim = len(index_dict)
# if not debug_mode:
#     optim_popsize = int(np.floor(4 + 3 * np.log(dim)))
# rng = jax.random.PRNGKey(int(time.time()))
# strategy = BIPOP_CMA_ES(popsize=optim_popsize, num_dims=dim)
# es_params = strategy.default_params
# eval_call_count = 0
# clip_max= np.zeros((dim,))
# clip_min= np.zeros((dim,))
# init_min= np.zeros((dim,))
# init_max= np.zeros((dim,))
# sigma_init= np.zeros((dim,))
# prev_best = None
# prev_prev_best = None

# set_init(
#     name="pop_size",
#     min_value= 8,
#     max_value= 40,
#     mean_value= 17,
# )

# set_init(
#     name="indv_ants",
#     min_value= 2,
#     max_value= 25,
#     mean_value= 7.0,
# )
# set_init_min_max(
#     name="indv_ants",
#     min_value= 15.919399,
#     max_value= 65.6589,
# )

# set_init(
#     name="rho",
#     min_value= 0.01,
#     max_value= 0.99,
#     mean_value= 0.5558,
# )
# set_init_min_max(
#     name="rho",
#     min_value= 0.235,
#     max_value= 0.68009996,
# )

# set_init_with_std(
#     name="alpha",
#     min_value= 0.01,
#     max_value= 10,
#     mean_value= 0.7373,
#     std_value= 0.01,
# )
# set_init_with_std(
#     name="beta",
#     min_value= 0.01,
#     max_value= 10,
#     mean_value= 4.8772,
#     std_value= 2.3687,
# )

# set_init_with_std(
#     name="par_a",
#     min_value= 0.01,
#     max_value= 1,
#     mean_value= 0.7586,
#     std_value= 0.2307,
# )
# set_init_with_std(
#     name="par_b",
#     min_value= 0.01,
#     max_value= 1,
#     mean_value= 0.918,
#     std_value= 0.0819,
# )
# set_init_with_std(
#     name="par_c",
#     min_value= 0.01,
#     max_value= 1,
#     mean_value= 0.1964,
#     std_value= 0.1697,
# )

# set_init_with_std(
#     name="q0",
#     min_value= 0,
#     max_value= 0.99,
#     mean_value= 0.5,
#     std_value= 0.2,
# )
# set_init_with_std(
#     name="neighbour",
#     min_value= 0,
#     max_value= 1,
#     mean_value= 0.5,
#     std_value= 0.2,
# )

# es_params = es_params.replace(
#         strategy_params=es_params.strategy_params.replace(
#             clip_min=jnp.array(clip_min),
#             clip_max=jnp.array(clip_max),
#             init_min=jnp.array(init_min),
#             init_max=jnp.array(init_max),
#             sigma_init=jnp.array(sigma_init),
#             )
#         )
# state = strategy.initialize(rng, es_params)

# assert not os.path.exists(save_path)
# assert not os.path.exists(save_path)
# assert not os.path.exists(save_path)
# save_study()

# Tuning

In [None]:
load_study()

with concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
    # minimize
    while True:
        rng, rng_ask = jax.random.split(rng, 2)

        if state.restart_state.restart_next:
            print(f"--> Restarted Strategy: {eval_call_count} eval calls")
            print_update = True
        else:
            print_update = False

        x, state = strategy.ask(rng_ask, state, es_params)

        if print_update:
            print(f"--> New Popsize: {state.restart_state.active_popsize}")

        x = modify_pop(x)
        fitness = evaluate_pop(x)
        state = strategy.tell(x, fitness, state, es_params)
        
        state = state.replace(
            restart_state = state.restart_state.replace(
                restart_next = state.restart_state.restart_next.any()
            )
        )
        prev_prev_best = prev_best
        prev_best = x[fitness.argmin()] # minimize

        clean()
        save_study()

        print(f"{datetime.now()} | eval_call_count: {eval_call_count} | gen_mean_gain_percent: {np.round(- fitness.mean(), n_decimal)} | win_percent: {np.round(win_percent(), n_decimal)} | best_chain_flags: {to_chain_flags(prev_best)}")
        with open(signal_path, "rt") as f:
            if f.read() == "BREAK":
                break