In [1]:
# Function

In [2]:
from pathlib import Path
import subprocess
import pickle
import numpy as np
import pandas as pd
import time
import multiprocessing
import concurrent.futures
import os
import jax
import jax.numpy as jnp
from evosax import BIPOP_CMA_ES

In [3]:
def random_choose_index():
    global df_indexes, df
    
    return np.random.choice(df_indexes, p=df.prob)

In [4]:
def run_command(command):
    result = subprocess.run(command, capture_output=True)
    assert (
        result.returncode == 0
    ), f"""
command:
{' '.join(command)}
returncode: {result.returncode}
stderr:
{result.stderr.decode()}
stdout:
{result.stdout.decode()}
"""
    return result

In [5]:
def run_a_instance(chain_flags, instance_name, acopp_profit, seed):
    global debug_mode, debug_time, acopp_dir, sol_dir

    command = [
        'python3',
        f'{acopp_dir}/run.py',
        '--acopp_dir',
        str(acopp_dir),
        '--instance_name',
        instance_name,
        '--run_only',
        '--experiment',
        # '--no_log',
        '--sol_dir',
        str(sol_dir),
        '--silent',
        '1',
        "--postfix",
        str(time.time()),
        "--random_seed",
        str(seed),
        
        '--no_default',
        "--chain_flags",
        str(chain_flags),
    ]

    if debug_mode:
        command += ["--time", str(debug_time)]
    
    result = run_command(command)
    stdout_log = result.stdout.decode()
    profit = int(stdout_log)
    
    gain_percent = (profit - acopp_profit) / acopp_profit * 100
    return gain_percent

In [6]:
def to_arr_flag(a_list):
    arr_flag = map(str, a_list)
    arr_flag = ":".join(arr_flag)
    return arr_flag

In [7]:
def to_chain_flags(params):
    global index_dict, max_max_rho, min_min_rho, max_max_indv_ants, min_min_indv_ants
    
    pop_size = params[index_dict["pop_size"]]

    alpha_mean = params[index_dict["alpha_mean"]]
    beta_mean = params[index_dict["beta_mean"]]
    par_a_mean = params[index_dict["par_a_mean"]]
    par_b_mean = params[index_dict["par_b_mean"]]
    par_c_mean = params[index_dict["par_c_mean"]]

    alpha_std = params[index_dict["alpha_std"]]
    beta_std = params[index_dict["beta_std"]]
    par_a_std = params[index_dict["par_a_std"]]
    par_b_std = params[index_dict["par_b_std"]]
    par_c_std = params[index_dict["par_c_std"]]

    rho = params[index_dict["rho"]]
    indv_ants = params[index_dict["indv_ants"]]

    left_rho = params[index_dict["left_rho"]]
    _mid_rho = params[index_dict["_mid_rho"]]
    right_rho = params[index_dict["right_rho"]]
    sum_rho = left_rho + _mid_rho + right_rho
    left_rho = left_rho / sum_rho * (max_max_rho - min_min_rho)
    _mid_rho = _mid_rho / sum_rho * (max_max_rho - min_min_rho)
    
    min_rho = left_rho + min_min_rho
    max_rho = min_rho + _mid_rho

    left_indv_ants = params[index_dict["left_indv_ants"]]
    _mid_indv_ants = params[index_dict["_mid_indv_ants"]]
    right_indv_ants = params[index_dict["right_indv_ants"]]
    sum_indv_ants = left_indv_ants + _mid_indv_ants + right_indv_ants
    left_indv_ants = left_indv_ants / sum_indv_ants * (max_max_indv_ants - min_min_indv_ants)
    _mid_indv_ants = _mid_indv_ants / sum_indv_ants * (max_max_indv_ants - min_min_indv_ants)
    
    min_indv_ants = left_indv_ants + min_min_indv_ants
    max_indv_ants = min_indv_ants + _mid_indv_ants

    mean_arr = to_arr_flag([alpha_mean, beta_mean, par_a_mean, par_b_mean, par_c_mean])
    std_arr = to_arr_flag([alpha_std, beta_std, par_a_std, par_b_std, par_c_std])
    rho_arr = to_arr_flag([rho, min_rho, max_rho])
    indv_ants_arr = to_arr_flag([indv_ants, min_indv_ants, max_indv_ants])

    chain_flags = f"--adapt_evap --cmaes --lambda {pop_size} --mean_ary {mean_arr} --std_ary {std_arr} --adpt_rho {rho_arr} --indv_ants {indv_ants_arr}"
    return chain_flags

In [8]:
def evaluate_pop(pop_params):
    global n_run_each_trail, executor, eval_call_count

    tasks = []
    for i in range(len(pop_params)):
        chain_flags =  to_chain_flags(pop_params[i])
        for j in range(n_run_each_trail):
            df_idx = random_choose_index()

            tasks.append((
                chain_flags,
                df.loc[df_idx].instance,
                df.loc[df_idx].acopp_profit,
                eval_call_count+1,
                ))
            eval_call_count += 1

    objective_values = executor.map(run_a_instance, *zip(*tasks))
    objective_values = np.array(list(objective_values))
    objective_values = objective_values.reshape((len(pop_params), n_run_each_trail))
    objective_values = objective_values.mean(axis=1)
    
    return - objective_values

In [9]:
def save_study():
    global state, rng, strategy, es_params
    global eval_call_count, index_dict
    global save_path
    global prev_best, prev_prev_best

    study = {
        "strategy": strategy,
        "es_params": es_params,
        "state": state,
        "rng": rng,
        "index_dict": index_dict,
        "prev_prev_best": prev_prev_best,
        "prev_best": prev_best,
        "eval_call_count": eval_call_count,
    }

    with open(save_path, "wb") as f:
        pickle.dump(study, f)

In [10]:
def load_study():
    global state, rng, strategy, es_params
    global eval_call_count, index_dict
    global save_path
    global prev_best, prev_prev_best

    with open(save_path, "rb") as f:
        study = pickle.load(f)
    
    eval_call_count = study["eval_call_count"]
    state = study["state"]
    rng = study["rng"]
    strategy = study["strategy"]
    prev_best = study["prev_best"]
    prev_prev_best = study["prev_prev_best"]
    index_dict = study["index_dict"]
    es_params = study["es_params"]

In [11]:
def modify_pop(pop_params):
    global index_dict, eval_call_count, prev_best, prev_prev_best

    pop_params = np.array(pop_params)
    if prev_best is not None:
        pop_params[-1] = prev_best
    if prev_prev_best is not None:
        pop_params[-2] = prev_prev_best

    pop_params[:, index_dict["indv_ants"]] = np.round(pop_params[:, index_dict["indv_ants"]])
    pop_params[:, index_dict["pop_size"]] = np.round(pop_params[:, index_dict["pop_size"]])

    return pop_params

# Config

In [24]:
acopp_dir = Path("../")
experiment_name = "evosax_tuning"
experiment_dir = Path("~/experiments") / experiment_name
save_path = experiment_dir / "study.pkl"
sol_dir = experiment_dir / "solutions"

n_jobs = max(1, multiprocessing.cpu_count() // 2)
n_run_each_trail = 10

# debug_mode = False
debug_mode = True
debug_time = 9
if debug_mode:
    n_run_each_trail = 2
    optim_popsize = 4

2


In [13]:
min_min_indv_ants = 2
max_max_indv_ants = 105
min_min_rho = 0.01
max_max_rho = 0.99

# Prepare

In [None]:
assert os.path.isdir(experiment_dir)
assert os.path.isdir(acopp_dir)

In [14]:
# Build
command = [
    'python3',
    f'{acopp_dir}/run.py',
    '--acopp_dir',
    str(acopp_dir),
    '--build_only',
    '--experiment'
    ]
result = run_command(command)
print(result.stdout.decode())

$ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE -G Unix Makefiles -S.. -B../temp_build_experiment -DCMAKE_BUILD_TYPE:STRING=Release
-- Configuring done
-- Generating done
-- Build files have been written to: /mnt/c/home/vincent/data/mega/Projects/Work/Public/acoplusplus_thop_modified/src/aco++/temp_build_experiment

$ cmake --build ../temp_build_experiment -j 3
[35m[1mConsolidate compiler generated dependencies of target acothop[0m
[100%] Built target acothop




In [15]:
executor =  concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs)

In [16]:
df = pd.read_csv("./es_ant_gain_percent.csv")
df.describe()

Unnamed: 0,gain_percent,prob,acopp_profit
count,432.0,432.0,432.0
mean,-0.672013,0.002315,587456.2
std,1.388989,0.00066,1127111.0
min,-7.43447,0.0,2425.0
25%,-1.303017,0.001963,62491.12
50%,-0.36865,0.002171,172022.5
75%,0.06793,0.002615,538701.5
max,4.20111,0.005527,9760000.0


In [17]:
df_indexes = np.arange(len(df.instance))

# IMPORTANCE: One time preparation

In [18]:
assert not os.path.exists(sol_dir)

variable_list = (
    "pop_size",

    "alpha_mean",
    "beta_mean",
    "par_a_mean",
    "par_b_mean",
    "par_c_mean",

    "alpha_std",
    "beta_std",
    "par_a_std",
    "par_b_std",
    "par_c_std",

    "rho",
    "left_rho",
    "_mid_rho",
    "right_rho",

    "indv_ants",
    "left_indv_ants",
    "_mid_indv_ants",
    "right_indv_ants",
)

index_dict = dict()
for idx, key in enumerate(variable_list):
    index_dict[key] = idx

dim = len(index_dict)
if not debug_mode:
    optim_popsize = int(np.floor(4 + 3 * np.log(dim)))
rng = jax.random.PRNGKey(int(time.time()))
strategy = BIPOP_CMA_ES(popsize=optim_popsize, num_dims=dim)
es_params = strategy.default_params
eval_call_count = 0
clip_max= np.zeros((dim,))
clip_min= np.zeros((dim,))
init_min= np.zeros((dim,))
init_max= np.zeros((dim,))
sigma_init= np.zeros((dim,))
prev_best = None
prev_prev_best = None

idx = index_dict["pop_size"]
clip_min[idx] = 8
clip_max[idx] = 25
init_min[idx] = init_max[idx] = 20
sigma_init[idx] = (clip_max[idx] - clip_min[idx]) / 5

idx = index_dict["indv_ants"]
clip_min[idx] = 2
clip_max[idx] = 15
init_min[idx] = init_max[idx] = 2
sigma_init[idx] = (clip_max[idx] - clip_min[idx]) / 5

left_idx = index_dict["left_indv_ants"]
_mid_idx = index_dict["_mid_indv_ants"]
right_idx = index_dict["right_indv_ants"]
clip_min[left_idx] = clip_min[_mid_idx] = clip_min[right_idx] = 0
clip_max[left_idx] = clip_max[_mid_idx] = clip_max[right_idx] = 1
sigma_init[left_idx] = sigma_init[_mid_idx] = sigma_init[right_idx] = (1 - 0) / 5
min_indv_ants = 10
max_indv_ants = 100
min_indv_ants = (min_indv_ants - min_min_indv_ants) / (max_max_indv_ants - min_min_indv_ants)
max_indv_ants = (max_indv_ants - min_min_indv_ants) / (max_max_indv_ants - min_min_indv_ants)
init_min[left_idx] = init_max[left_idx] = min_indv_ants - 0
init_min[_mid_idx] = init_max[_mid_idx] = max_indv_ants - min_indv_ants
init_min[right_idx] = init_max[right_idx] = 1 - max_indv_ants

idx = index_dict["rho"]
clip_min[idx] = 0.01
clip_max[idx] = 0.99
init_min[idx] = init_max[idx] = 0.5
sigma_init[idx] = 0.253226

left_idx = index_dict["left_rho"]
_mid_idx = index_dict["_mid_rho"]
right_idx = index_dict["right_rho"]
clip_min[left_idx] = clip_min[_mid_idx] = clip_min[right_idx] = 0
clip_max[left_idx] = clip_max[_mid_idx] = clip_max[right_idx] = 1
sigma_init[left_idx] = sigma_init[_mid_idx] = sigma_init[right_idx] = (1 - 0) / 5
min_rho = 0.1
max_rho = 0.99
min_rho = (min_rho - min_min_rho) / (max_max_rho - min_min_rho)
max_rho = (max_rho - min_min_rho) / (max_max_rho - min_min_rho)
init_min[left_idx] = init_max[left_idx] = min_rho - 0
init_min[_mid_idx] = init_max[_mid_idx] = max_rho - min_rho
init_min[right_idx] = init_max[right_idx] = 1 - max_rho

idx = index_dict["alpha_mean"]
std_idx = index_dict["alpha_std"]
clip_min[idx] = 0.01
clip_max[idx] = 10
init_min[idx] = init_max[idx] = 1.55
sigma_init[idx] = 1.507
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["beta_mean"]
std_idx = index_dict["beta_std"]
clip_min[idx] = 0.01
clip_max[idx] = 10
init_min[idx] = init_max[idx] = 4.89
sigma_init[idx] = 2.046
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["par_a_mean"]
std_idx = index_dict["par_a_std"]
clip_min[idx] = 0.01
clip_max[idx] = 1
init_min[idx] = init_max[idx] = 0.3
sigma_init[idx] = 0.2
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["par_b_mean"]
std_idx = index_dict["par_b_std"]
clip_min[idx] = 0.01
clip_max[idx] = 1
init_min[idx] = init_max[idx] = 0.7
sigma_init[idx] = 0.2
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["par_c_mean"]
std_idx = index_dict["par_c_std"]
clip_min[idx] = 0.01
clip_max[idx] = 1
init_min[idx] = init_max[idx] = 0.9
sigma_init[idx] = 0.2
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

es_params = es_params.replace(
        strategy_params=es_params.strategy_params.replace(
            clip_min=jnp.array(clip_min),
            clip_max=jnp.array(clip_max),
            init_min=jnp.array(init_min),
            init_max=jnp.array(init_max),
            sigma_init=jnp.array(sigma_init),
            )
        )
state = strategy.initialize(rng, es_params)

assert not os.path.exists(save_path)
assert not os.path.exists(save_path)
assert not os.path.exists(save_path)
save_study()

# Tuning

In [23]:
load_study()
# minimize
while True:
    rng, rng_ask = jax.random.split(rng, 2)

    if state.restart_state.restart_next:
        print(f"--> Restarted Strategy: {eval_call_count} eval calls")
        print_update = True
    else:
        print_update = False

    x, state = strategy.ask(rng_ask, state, es_params)

    if print_update:
        print(f"--> New Popsize: {state.restart_state.active_popsize}")

    x = modify_pop(x)
    fitness = evaluate_pop(x)

    prev_prev_best = prev_best
    prev_best = x[fitness.argmin()] # minimize

    x = jnp.array(x)
    fitness = jnp.array(fitness)
    
    state = strategy.tell(x, fitness, state, es_params)
    state = state.replace(
        restart_state = state.restart_state.replace(
            restart_next = state.restart_state.restart_next.any()
        )
    )
    save_study()

    print(f"total # eval: {eval_call_count} | gen_mean_fitness: {- fitness.mean():.4f} | gen_best: {to_chain_flags(prev_best)}")

    with open("signal.txt", "rt") as f:
        if f.read() == "BREAK":
            break

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


total # eval: 12 | gen_mean_fitness: -0.7915 | gen_best: --lambda 22.0 --adapt_evap --cmaes --mean_ary 0.56131095:4.0676594:0.45615897:0.4967922:1.0 --std_ary 1.0265352:2.3701658:0.24934661:0.3137654:0.07623401 --adpt_rho 0.683076:0.2053757041692734:0.9900000292062759 --indv_ants 5.0:12.557607345283031:78.68981616944075
