# Function

In [None]:
from pathlib import Path
import subprocess
import pickle
import numpy as np
import pandas as pd
import time
import multiprocessing
import concurrent.futures
import os
import jax
import jax.numpy as jnp
from evosax import BIPOP_CMA_ES

In [None]:
def random_choose_index(n):
    global df_indexes, log_weight
    return np.random.choice(df_indexes, n, p=log_weight, replace=False)

In [None]:
def run_command(command):
    result = subprocess.run(command, capture_output=True)
    assert (
        result.returncode == 0
    ), f"""
command:
{' '.join(command)}
returncode: {result.returncode}
stderr:
{result.stderr.decode()}
stdout:
{result.stdout.decode()}
"""
    return result

In [None]:
def run_a_instance(chain_flags, instance_name, acopp_profit, seed):
    global debug_mode, debug_time, acopp_dir, sol_dir

    command = [
        'python3',
        f'{acopp_dir}/run.py',
        '--acopp_dir',
        str(acopp_dir),
        '--instance_name',
        instance_name,
        '--run_only',
        '--experiment',
        # '--no_log',
        '--sol_dir',
        str(sol_dir),
        '--silent',
        '1',
        "--postfix",
        str(time.time()),
        "--random_seed",
        str(seed),
        
        '--no_default',
        "--chain_flags",
        str(chain_flags),
    ]

    if debug_mode:
        command += ["--time", str(debug_time)]
    
    result = run_command(command)
    stdout_log = result.stdout.decode()
    profit = int(stdout_log)
    
    gain_percent = (profit - acopp_profit) / acopp_profit * 100
    return gain_percent

In [None]:
def to_arr_flag(a_list):
    arr_flag = map(str, a_list)
    arr_flag = ":".join(arr_flag)
    return arr_flag

In [None]:
def to_chain_flags(params):
    global index_dict, max_max_rho, min_min_rho, max_max_indv_ants, min_min_indv_ants
    
    pop_size = params[index_dict["pop_size"]]

    alpha_mean = params[index_dict["alpha_mean"]]
    beta_mean = params[index_dict["beta_mean"]]
    par_a_mean = params[index_dict["par_a_mean"]]
    par_b_mean = params[index_dict["par_b_mean"]]
    par_c_mean = params[index_dict["par_c_mean"]]

    alpha_std = params[index_dict["alpha_std"]]
    beta_std = params[index_dict["beta_std"]]
    par_a_std = params[index_dict["par_a_std"]]
    par_b_std = params[index_dict["par_b_std"]]
    par_c_std = params[index_dict["par_c_std"]]

    rho = params[index_dict["rho"]]
    indv_ants = params[index_dict["indv_ants"]]

    left_rho = params[index_dict["left_rho"]]
    _mid_rho = params[index_dict["_mid_rho"]]
    right_rho = params[index_dict["right_rho"]]
    sum_rho = left_rho + _mid_rho + right_rho
    left_rho = left_rho / sum_rho * (max_max_rho - min_min_rho)
    _mid_rho = _mid_rho / sum_rho * (max_max_rho - min_min_rho)
    
    min_rho = left_rho + min_min_rho
    max_rho = min_rho + _mid_rho

    left_indv_ants = params[index_dict["left_indv_ants"]]
    _mid_indv_ants = params[index_dict["_mid_indv_ants"]]
    right_indv_ants = params[index_dict["right_indv_ants"]]
    sum_indv_ants = left_indv_ants + _mid_indv_ants + right_indv_ants
    left_indv_ants = left_indv_ants / sum_indv_ants * (max_max_indv_ants - min_min_indv_ants)
    _mid_indv_ants = _mid_indv_ants / sum_indv_ants * (max_max_indv_ants - min_min_indv_ants)
    
    min_indv_ants = left_indv_ants + min_min_indv_ants
    max_indv_ants = min_indv_ants + _mid_indv_ants

    mean_arr = to_arr_flag([alpha_mean, beta_mean, par_a_mean, par_b_mean, par_c_mean])
    std_arr = to_arr_flag([alpha_std, beta_std, par_a_std, par_b_std, par_c_std])
    rho_arr = to_arr_flag([rho, min_rho, max_rho])
    indv_ants_arr = to_arr_flag([indv_ants, min_indv_ants, max_indv_ants])

    chain_flags = f"--adapt_evap --cmaes --lambda {pop_size} --mean_ary {mean_arr} --std_ary {std_arr} --adpt_rho {rho_arr} --indv_ants {indv_ants_arr}"
    return chain_flags

In [None]:
def make_priority_order(row):
    global min_gain
    if row.gain_percent < 0:
        return row.gain_percent - min_gain + 1
    else:
        return - row.gain_percent
    

def sort_by_priority(df):
    global min_gain
    min_gain = df.gain_percent.min()

    df["priority_order"] = df.apply(make_priority_order, axis=1)
    df.sort_values(by="priority_order", ascending=False, inplace=True)
    df.drop(columns=["priority_order"], inplace=True)


def update_df(picked_idxes, gain_percents):
    global df

    gain_percents = gain_percents.T.mean(axis=1)
    for i, df_idx in enumerate(picked_idxes):
        df.loc[df_idx, "gain_percent"] = gain_percents[i]
    sort_by_priority(df)


In [None]:
def evaluate_pop(pop_params):
    global n_run_each_trail, executor, eval_call_count, df

    tasks = []
    chain_flagss = []
    for params in pop_params:
        chain_flagss.append(to_chain_flags(params))
    picked_idxes = random_choose_index(n_run_each_trail)

    for _chain_flags in chain_flagss:
        for df_idx in picked_idxes:
            tasks.append((
                _chain_flags,
                df.loc[df_idx].instance,
                df.loc[df_idx].acopp_profit,
                eval_call_count+1,
                ))
            eval_call_count += 1

    gain_percents = executor.map(run_a_instance, *zip(*tasks))
    gain_percents = np.array(list(gain_percents))
    gain_percents = gain_percents.reshape((len(pop_params), n_run_each_trail))
    
    update_df(picked_idxes, gain_percents)
    
    objective_values = gain_percents
    objective_values[objective_values >= 0] = 100
    objective_values = objective_values.mean(axis=1)

    fitness = jnp.array(- objective_values)
    return fitness

In [None]:
def save_study():
    global state, rng, strategy, es_params
    global eval_call_count, index_dict
    global save_path, backup_dir
    global prev_best, prev_prev_best
    global df, csv_path

    study = {
        "strategy": strategy,
        "es_params": es_params,
        "state": state,
        "rng": rng,
        "index_dict": index_dict,
        "prev_prev_best": prev_prev_best,
        "prev_best": prev_best,
        "eval_call_count": eval_call_count,
    }
    if prev_best is not None:
        study["best_chain_flags"] = to_chain_flags(prev_best)
    if prev_prev_best is not None:
        study["second_best_chain_flags"] = to_chain_flags(prev_prev_best)

    with open(save_path, "wb") as f:
        pickle.dump(study, f)

    df.to_csv(csv_path, index=False)

    # Backup
    _now = time.time()
    df.to_csv(backup_dir / f"{_now}.csv", index=False)
    with open(backup_dir / f"{_now}.pkl", "wb") as f:
        pickle.dump(study, f)


In [None]:
def load_study():
    global state, rng, strategy, es_params
    global eval_call_count, index_dict
    global save_path
    global prev_best, prev_prev_best
    global df, csv_path

    with open(save_path, "rb") as f:
        study = pickle.load(f)
    
    eval_call_count = study["eval_call_count"]
    state = study["state"]
    rng = study["rng"]
    strategy = study["strategy"]
    prev_best = study["prev_best"]
    prev_prev_best = study["prev_prev_best"]
    index_dict = study["index_dict"]
    es_params = study["es_params"]
    df = pd.read_csv(csv_path)


In [None]:
def modify_pop(pop_params):
    global index_dict, eval_call_count, prev_best, prev_prev_best

    pop_params = np.array(pop_params)
    replace_idx = np.random.choice(np.arange(len(pop_params)), 2, replace=False)
    if prev_best is not None:
        pop_params[replace_idx[0]] = prev_best
    if prev_prev_best is not None:
        pop_params[replace_idx[1]] = prev_prev_best

    pop_params[:, index_dict["indv_ants"]] = np.round(pop_params[:, index_dict["indv_ants"]])
    pop_params[:, index_dict["pop_size"]] = np.round(pop_params[:, index_dict["pop_size"]])

    return jnp.array(pop_params)

In [None]:
def win_percent():
    global df
    return (df.gain_percent >= 0).sum() / len(df.gain_percent) * 100

# Config

In [None]:
debug_mode = False
# debug_mode = True
debug_time = 9

n_jobs = max(1, multiprocessing.cpu_count() // 2)
acopp_dir = Path("../")
signal_path = Path("./signal.txt")

if not debug_mode:
    n_run_each_trail = 10
else:
    n_run_each_trail = 2
    optim_popsize = 4

if not debug_mode:
    experiment_name = "evosax_tuning"
    experiment_dir = Path("/home/user2/experiments") / experiment_name
else:
    experiment_name = "temp_evosax_tuning"
    experiment_dir = Path("./") / experiment_name

csv_path = experiment_dir / "gain_percent.csv"
save_path = experiment_dir / "study.pkl"
backup_dir = experiment_dir / "backup"
sol_dir = experiment_dir / "solutions"

In [None]:
min_min_indv_ants = 2
max_max_indv_ants = 105
min_min_rho = 0.01
max_max_rho = 0.99

# Prepare

In [None]:
os.makedirs(experiment_dir, exist_ok=True)
os.makedirs(sol_dir, exist_ok=True)
os.makedirs(backup_dir, exist_ok=True)
os.makedirs(save_path.parent, exist_ok=True)

In [None]:
# Build
assert os.path.isdir(acopp_dir)
command = [
    'python3',
    f'{acopp_dir}/run.py',
    '--acopp_dir',
    str(acopp_dir),
    '--build_only',
    '--experiment'
    ]
result = run_command(command)
print(result.stdout.decode())

In [None]:
n_instance = 432

df_indexes = np.arange(n_instance)

log_weight = np.log(n_instance + 1) - np.log(df_indexes + 1)
log_weight /= log_weight.sum()

# IMPORTANCE: One time preparation

In [None]:
# df = pd.read_csv(csv_path)

# variable_list = (
#     "pop_size",

#     "alpha_mean",
#     "beta_mean",
#     "par_a_mean",
#     "par_b_mean",
#     "par_c_mean",

#     "alpha_std",
#     "beta_std",
#     "par_a_std",
#     "par_b_std",
#     "par_c_std",

#     "rho",
#     "left_rho",
#     "_mid_rho",
#     "right_rho",

#     "indv_ants",
#     "left_indv_ants",
#     "_mid_indv_ants",
#     "right_indv_ants",
# )

# index_dict = dict()
# for idx, key in enumerate(variable_list):
#     index_dict[key] = idx

# dim = len(index_dict)
# if not debug_mode:
#     optim_popsize = int(np.floor(4 + 3 * np.log(dim)))
# rng = jax.random.PRNGKey(int(time.time()))
# strategy = BIPOP_CMA_ES(popsize=optim_popsize, num_dims=dim)
# es_params = strategy.default_params
# eval_call_count = 0
# clip_max= np.zeros((dim,))
# clip_min= np.zeros((dim,))
# init_min= np.zeros((dim,))
# init_max= np.zeros((dim,))
# sigma_init= np.zeros((dim,))
# prev_best = None
# prev_prev_best = None

# idx = index_dict["pop_size"]
# clip_min[idx] = 8
# clip_max[idx] = 25
# init_min[idx] = init_max[idx] = 20
# sigma_init[idx] = (clip_max[idx] - clip_min[idx]) / 5

# idx = index_dict["indv_ants"]
# clip_min[idx] = 2
# clip_max[idx] = 15
# init_min[idx] = init_max[idx] = 2
# sigma_init[idx] = (clip_max[idx] - clip_min[idx]) / 5

# left_idx = index_dict["left_indv_ants"]
# _mid_idx = index_dict["_mid_indv_ants"]
# right_idx = index_dict["right_indv_ants"]
# clip_min[left_idx] = clip_min[_mid_idx] = clip_min[right_idx] = 0
# clip_max[left_idx] = clip_max[_mid_idx] = clip_max[right_idx] = 1
# sigma_init[left_idx] = sigma_init[_mid_idx] = sigma_init[right_idx] = (1 - 0) / 5
# min_indv_ants = 10
# max_indv_ants = 100
# min_indv_ants = (min_indv_ants - min_min_indv_ants) / (max_max_indv_ants - min_min_indv_ants)
# max_indv_ants = (max_indv_ants - min_min_indv_ants) / (max_max_indv_ants - min_min_indv_ants)
# init_min[left_idx] = init_max[left_idx] = min_indv_ants - 0
# init_min[_mid_idx] = init_max[_mid_idx] = max_indv_ants - min_indv_ants
# init_min[right_idx] = init_max[right_idx] = 1 - max_indv_ants

# idx = index_dict["rho"]
# clip_min[idx] = 0.01
# clip_max[idx] = 0.99
# init_min[idx] = init_max[idx] = 0.5
# sigma_init[idx] = 0.253226

# left_idx = index_dict["left_rho"]
# _mid_idx = index_dict["_mid_rho"]
# right_idx = index_dict["right_rho"]
# clip_min[left_idx] = clip_min[_mid_idx] = clip_min[right_idx] = 0
# clip_max[left_idx] = clip_max[_mid_idx] = clip_max[right_idx] = 1
# sigma_init[left_idx] = sigma_init[_mid_idx] = sigma_init[right_idx] = (1 - 0) / 5
# min_rho = 0.1
# max_rho = 0.99
# min_rho = (min_rho - min_min_rho) / (max_max_rho - min_min_rho)
# max_rho = (max_rho - min_min_rho) / (max_max_rho - min_min_rho)
# init_min[left_idx] = init_max[left_idx] = min_rho - 0
# init_min[_mid_idx] = init_max[_mid_idx] = max_rho - min_rho
# init_min[right_idx] = init_max[right_idx] = 1 - max_rho

# idx = index_dict["alpha_mean"]
# std_idx = index_dict["alpha_std"]
# clip_min[idx] = 0.01
# clip_max[idx] = 10
# init_min[idx] = init_max[idx] = 1.55
# sigma_init[idx] = 1.507
# clip_min[std_idx] = 0.01
# clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
# init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
# sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

# idx = index_dict["beta_mean"]
# std_idx = index_dict["beta_std"]
# clip_min[idx] = 0.01
# clip_max[idx] = 10
# init_min[idx] = init_max[idx] = 4.89
# sigma_init[idx] = 2.046
# clip_min[std_idx] = 0.01
# clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
# init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
# sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

# idx = index_dict["par_a_mean"]
# std_idx = index_dict["par_a_std"]
# clip_min[idx] = 0.01
# clip_max[idx] = 1
# init_min[idx] = init_max[idx] = 0.3
# sigma_init[idx] = 0.2
# clip_min[std_idx] = 0.01
# clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
# init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
# sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

# idx = index_dict["par_b_mean"]
# std_idx = index_dict["par_b_std"]
# clip_min[idx] = 0.01
# clip_max[idx] = 1
# init_min[idx] = init_max[idx] = 0.7
# sigma_init[idx] = 0.2
# clip_min[std_idx] = 0.01
# clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
# init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
# sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

# idx = index_dict["par_c_mean"]
# std_idx = index_dict["par_c_std"]
# clip_min[idx] = 0.01
# clip_max[idx] = 1
# init_min[idx] = init_max[idx] = 0.9
# sigma_init[idx] = 0.2
# clip_min[std_idx] = 0.01
# clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
# init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
# sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

# es_params = es_params.replace(
#         strategy_params=es_params.strategy_params.replace(
#             clip_min=jnp.array(clip_min),
#             clip_max=jnp.array(clip_max),
#             init_min=jnp.array(init_min),
#             init_max=jnp.array(init_max),
#             sigma_init=jnp.array(sigma_init),
#             )
#         )
# state = strategy.initialize(rng, es_params)

# assert not os.path.exists(save_path)
# assert not os.path.exists(save_path)
# assert not os.path.exists(save_path)
# save_study()

# Tuning

In [None]:
load_study()

with concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
    # minimize
    while True:
        rng, rng_ask = jax.random.split(rng, 2)

        if state.restart_state.restart_next:
            print(f"--> Restarted Strategy: {eval_call_count} eval calls")
            print_update = True
        else:
            print_update = False

        x, state = strategy.ask(rng_ask, state, es_params)

        if print_update:
            print(f"--> New Popsize: {state.restart_state.active_popsize}")

        x = modify_pop(x)
        fitness = evaluate_pop(x)

        prev_prev_best = prev_best
        prev_best = x[fitness.argmin()] # minimize

        state = strategy.tell(x, fitness, state, es_params)
        state = state.replace(
            restart_state = state.restart_state.replace(
                restart_next = state.restart_state.restart_next.any()
            )
        )
        save_study()

        print(f"eval_call_count: {eval_call_count} | gen_mean_fitness: {fitness.mean():.4f} | win_percent: {win_percent()} | best_chain_flags: {to_chain_flags(prev_best)}")

        with open(signal_path, "rt") as f:
            if f.read() == "BREAK":
                break