In [17]:
# Function

In [49]:
from pathlib import Path
import subprocess
import pickle
import numpy as np
import pandas as pd
import time
import multiprocessing
import concurrent.futures
import os

In [19]:
def random_choose_index():
    import numpy as np
    global idx, df
    
    return np.random.choice(idx, p=df.prob)

In [20]:
def run_command(command):
    import subprocess
    
    result = subprocess.run(command, capture_output=True)
    assert (
        result.returncode == 0
    ), f"""
command:
{' '.join(command)}
returncode: {result.returncode}
stderr:
{result.stderr.decode()}
stdout:
{result.stdout.decode()}
"""
    return result

In [21]:
def run_a_instance(chain_flags, instance_name, acopp_profit, seed):
    global debug_mode, debug_time, acopp_dir, sol_dir, postfix

    command = [
        'python3',
        f'{acopp_dir}/run.py',
        '--acopp_dir',
        str(acopp_dir),
        '--instance_name',
        instance_name,
        '--run_only',
        '--experiment',
        # '--no_log',
        '--sol_dir',
        str(sol_dir),
        '--silent',
        '1',
        "--postfix",
        str(postfix),
        "--random_seed",
        str(seed),
        
        '--no_default',
        "--chain_flags",
        str(chain_flags),
    ]

    if debug_mode:
        command += ["--time", str(debug_time)]
    
    result = run_command(command)
    stdout_log = result.stdout.decode()
    profit = int(stdout_log)
    
    gain_percent = (profit - acopp_profit) / acopp_profit * 100
    return gain_percent

In [22]:
def to_arr_flag(a_list):
    arr_flag = map(str, a_list)
    arr_flag = ":".join(arr_flag)
    return arr_flag

In [23]:
def to_chain_flags(params):
    global index_dict
    
    alpha_mean = params[index_dict["alpha_mean"]]
    beta_mean = params[index_dict["beta_mean"]]
    par_a_mean = params[index_dict["par_a_mean"]]
    par_b_mean = params[index_dict["par_b_mean"]]
    par_c_mean = params[index_dict["par_c_mean"]]

    alpha_std = params[index_dict["alpha_std"]]
    beta_std = params[index_dict["beta_std"]]
    par_a_std = params[index_dict["par_a_std"]]
    par_b_std = params[index_dict["par_b_std"]]
    par_c_std = params[index_dict["par_c_std"]]

    rho = params[index_dict["rho"]]
    min_rho = params[index_dict["min_rho"]]
    max_rho = params[index_dict["max_rho"]]

    indv_ants = params[index_dict["indv_ants"]]
    min_indv_ants = params[index_dict["min_indv_ants"]]
    max_indv_ants = params[index_dict["max_indv_ants"]]

    pop_size = params[index_dict["pop_size"]]

    mean_arr = to_arr_flag([alpha_mean, beta_mean, par_a_mean, par_b_mean, par_c_mean])
    std_arr = to_arr_flag([alpha_std, beta_std, par_a_std, par_b_std, par_c_std])
    rho_arr = to_arr_flag([rho, min_rho, max_rho])
    indv_ants_arr = to_arr_flag([indv_ants, min_indv_ants, max_indv_ants])

    chain_flags = f"--lambda {pop_size} --adapt_evap --cmaes --mean_ary {mean_arr} --std_ary {std_arr} --adpt_rho {rho_arr} --indv_ants {indv_ants_arr}"
    return chain_flags

In [24]:
def evaluate_pop(pop_params):
    global n_run_each_trail, executor, eval_call_count

    tasks = []
    for i in range(len(pop_params)):
        chain_flags =  to_chain_flags(pop_params[i])
        for j in range(n_run_each_trail):
            df_idx = random_choose_index()

            t_idx = i * n_run_each_trail + j
            assert t_idx == tasks[t_idx]

            eval_call_count += 1
            tasks.append((
                chain_flags,
                df.loc[df_idx].instance,
                df.loc[df_idx].acopp_profit,
                eval_call_count,
                ))

    objective_values = executor.map(run_a_instance, *tasks)
    objective_values = np.array(list(objective_values))
    objective_values = objective_values.reshape((len(pop_params), n_run_each_trail))
    objective_values = objective_values.mean(axis=1)
    
    return - objective_values

In [None]:
def save_study():
    global eval_call_count, state, rng, save_path, prev_best, prev_prev_best

    study = {
        "eval_call_count": eval_call_count,
        "state": state,
        "rng": rng,
        "strategy": strategy,
        "prev_best", prev_best,
        "prev_prev_best", prev_prev_best,
    }

    with open(save_path, "wb") as f:
        pickle.dump(study, f)

In [None]:
def load_study():
    global eval_call_count, state, rng, save_path, prev_best, prev_prev_best

    with open(save_path, "rb") as f:
        study = pickle.load(f)
    
    eval_call_count = study["eval_call_count"]
    state = study["state"]
    rng = study["rng"]
    strategy = study["strategy"]
    prev_best = study["prev_best"]
    prev_prev_best = study["prev_prev_best"]

In [None]:
def repair_pop(pop_params):
    ??

# Config

In [25]:
acopp_dir = Path("../")
save_path = Path("./study.pkl")
sol_dir = Path("./solutions")
postfix = time.time()

# total_trial = 1000
# total_trial = 100
n_jobs = max(1, multiprocessing.cpu_count() // 2)
n_run_each_trail = 10
save_each_n_trial = n_jobs

# debug_mode = False
debug_mode = True
debug_time = 10
if debug_mode:
    n_jobs = 2
    n_run_each_trail = n_jobs
    total_trial = n_run_each_trail + 1
    save_each_n_trial = n_jobs

print(n_jobs)

2


# Prepare

In [None]:
# Build
command = [
    'python3',
    f'{acopp_dir}/run.py',
    '--acopp_dir',
    str(acopp_dir),
    '--build_only',
    '--experiment'
    ]
result = run_command(command)
print(result.stdout.decode())

In [None]:
executor =  concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs)

In [None]:
df = pd.read_csv("./es_ant_gain_percent.csv")
df.describe()

In [None]:
idx = np.arange(len(df.instance))

In [None]:
index_dict = {
    "alpha_mean": 0,
    "beta_mean": 1,
    "par_a_mean": 2,
    "par_b_mean": 3,
    "par_c_mean": 4,

    "alpha_std": 5,
    "beta_std": 6,
    "par_a_std": 7,
    "par_b_std": 8,
    "par_c_std": 9,

    "rho": 10,
    "min_rho": 11,
    "max_rho": 12,

    "indv_ants": 13,
    "min_indv_ants": 14,
    "max_indv_ants": 15,

    "pop_size": 16,
}

# IMPORTANCE: One time preparation

In [None]:
dim = len(index_dict)
optim_popsize = floor(4 + 3 * np.log(dim))
rng = jax.random.PRNGKey(int(time.time()))
strategy = BIPOP_CMA_ES(popsize=optim_popsize, num_dims=dim)
es_params = strategy.default_params
eval_call_count = 0
clip_max= np.zeros((dim,))
clip_min= np.zeros((dim,))
init_min= np.zeros((dim,))
init_max= np.zeros((dim,))
sigma_init= np.zeros((dim,))
prev_best = None
prev_prev_best = None

In [None]:
idx = index_dict["pop_size"]
clip_min[idx] = 8
clip_max[idx] = 20
init_min[idx] = init_max[idx] = 10
sigma_init[idx] = (clip_max[idx] - clip_min[idx]) / 5

idx = index_dict["indv_ants"]
clip_min[idx] = 0.10
clip_max[idx] = 15
init_min[idx] = init_max[idx] = ??
sigma_init[idx] = (clip_max[idx] - clip_min[idx]) / 5

idx = index_dict["rho"]
clip_min[idx] = 0.10
clip_max[idx] = 0.94
init_min[idx] = init_max[idx] = ??
sigma_init[idx] = (clip_max[idx] - clip_min[idx]) / 5

left_idx = index_dict["left_indv_ants"]
mid_idx = index_dict["mid_indv_ants"]
right_idx = index_dict["right_indv_ants"]
clip_min[left_idx] = clip_min[mid_idx] = clip_min[right_idx] = 0
clip_max[left_idx] = clip_max[mid_idx] = clip_max[right_idx] = 1
sigma_init[left_idx] = sigma_init[mid_idx] = sigma_init[right_idx] = (1 - 0) / 5
min_indv_ants = ??
max_indv_ants = ??
min_min_indv_ants = 2
max_max_indv_ants = 60
min_indv_ants = min_indv_ants - min_min_indv_ants
max_indv_ants = max_indv_ants - min_min_indv_ants
max_max_indv_ants = max_max_indv_ants - min_min_indv_ants
min_indv_ants = min_indv_ants / max_max_indv_ants
max_indv_ants = max_indv_ants / max_max_indv_ants
init_min[left_idx] = init_max[left_idx] = min_indv_ants - 0
init_min[mid_idx] = init_max[mid_idx] = max_indv_ants - min_indv_ants
init_min[right_idx] = init_max[right_idx] = 1 - max_indv_ants

left_idx = index_dict["left_rho"]
mid_idx = index_dict["mid_rho"]
right_idx = index_dict["right_rho"]
clip_min[left_idx] = clip_min[mid_idx] = clip_min[right_idx] = 0
clip_max[left_idx] = clip_max[mid_idx] = clip_max[right_idx] = 1
sigma_init[left_idx] = sigma_init[mid_idx] = sigma_init[right_idx] = (1 - 0) / 5
min_rho = ??
max_rho = ??
min_min_rho = 0.01
max_max_rho = 0.99
min_rho = min_rho - min_min_rho
max_rho = max_rho - min_min_rho
max_max_rho = max_max_rho - min_min_rho
min_rho = min_rho / max_max_rho
max_rho = max_rho / max_max_rho
init_min[left_idx] = init_max[left_idx] = min_rho - 0
init_min[mid_idx] = init_max[mid_idx] = max_rho - min_rho
init_min[right_idx] = init_max[right_idx] = 1 - max_rho

idx = index_dict["alpha_mean"]
std_idx = index_dict["alpha_std"]
clip_min[idx] = 0.01
clip_max[idx] = 10
init_min[idx] = init_max[idx] = ??
sigma_init[idx] = ??
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["beta_mean"]
std_idx = index_dict["beta_std"]
clip_min[idx] = 0.01
clip_max[idx] = 10
init_min[idx] = init_max[idx] = ??
sigma_init[idx] = ??
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["par_a_mean"]
std_idx = index_dict["par_a_std"]
clip_min[idx] = 0.01
clip_max[idx] = 1
init_min[idx] = init_max[idx] = ??
sigma_init[idx] = ??
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["par_b_mean"]
std_idx = index_dict["par_b_std"]
clip_min[idx] = 0.01
clip_max[idx] = 1
init_min[idx] = init_max[idx] = ??
sigma_init[idx] = ??
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

idx = index_dict["par_c_mean"]
std_idx = index_dict["par_c_std"]
clip_min[idx] = 0.01
clip_max[idx] = 1
init_min[idx] = init_max[idx] = ??
sigma_init[idx] = ??
clip_min[std_idx] = 0.01
clip_max[std_idx] = (clip_max[idx] - clip_min[idx]) / 2
init_min[std_idx] = init_max[std_idx] = sigma_init[idx]
sigma_init[std_idx] = (clip_max[std_idx] - clip_min[std_idx]) / 5

In [None]:
es_params = es_params.replace(
        strategy_params=es_params.strategy_params.replace(
            clip_min=jnp.array(clip_min),
            clip_max=jnp.array(clip_max),
            init_min=jnp.array(init_min),
            init_max=jnp.array(init_max),
            sigma_init=jnp.array(sigma_init),
            )
        )
state = strategy.initialize(rng, es_params)

In [None]:
assert not os.path.exists(save_path)
assert not os.path.exists(save_path)
assert not os.path.exists(save_path)
save_study()

# Tuning

In [None]:
load_study()
# minimize
while True:
    rng, rng_ask = jax.random.split(rng, 2)

    if state.restart_state.restart_next:
        print(f"--> Restarted Strategy: {eval_call_count} eval calls")
        print_update = True
    else:
        print_update = False

    x, state = strategy.ask(rng_ask, state, es_params)

    if print_update:
        print(f"--> New Popsize: {state.restart_state.active_popsize}")

    x = repair_pop(x)
    if eval_call_count != 0:
        x[-1] = prev_best
        x[-2] = prev_prev_best

    fitness = evaluate_pop(x)

    prev_prev_best = prev_best
    prev_best = x[fitness.argmin()]

    x = jnp.array(x)
    fitness = jnp.array(fitness)
    
    state = strategy.tell(x, fitness, state, es_params)
    save_study()

    print(f"total # eval: {eval_call_count} | gen_mean_fitness: {- fitness.mean():.4f} | gen_best: {to_chain_flags(prev_best)}")

    with open("signal.txt", "rt") as f:
        if f.read() == "BREAK":
            break

In [3]:
import numpy as np

In [4]:
np.nan

False

In [6]:
if [np.nan, np.nan]:
    print("haha")

haha
