In [1]:
import re
import sys
from time import sleep
from pprint import pprint
from pathlib import Path
from functools import partial
import subprocess
from subprocess import PIPE

import numpy as np
import optuna
import pandas as pd
import matplotlib.pyplot as plt

FILENAME = "main.cpp"

In [2]:
!cp ../answer/{FILENAME} ./{FILENAME}

In [3]:
!clang++ {FILENAME} -std=gnu++17 -Wall -Wextra -O2 -o {FILENAME}.out -I/

In [4]:
SEED = 0
N_TESTCASES = 500

In [5]:
!seq {SEED} {SEED+N_TESTCASES-1} > seeds.txt && cargo run --release --manifest-path ../tools/Cargo.toml --bin gen seeds.txt
!ls in

[0m[0m[1m[32m    Finished[0m release [optimized] target(s) in 0.19s
[0m[0m[1m[32m     Running[0m `/home/user/ahc_httf_2022_qual/tools/target/release/gen seeds.txt`
0000.txt  0063.txt  0126.txt  0189.txt	0252.txt  0315.txt  0378.txt  0441.txt
0001.txt  0064.txt  0127.txt  0190.txt	0253.txt  0316.txt  0379.txt  0442.txt
0002.txt  0065.txt  0128.txt  0191.txt	0254.txt  0317.txt  0380.txt  0443.txt
0003.txt  0066.txt  0129.txt  0192.txt	0255.txt  0318.txt  0381.txt  0444.txt
0004.txt  0067.txt  0130.txt  0193.txt	0256.txt  0319.txt  0382.txt  0445.txt
0005.txt  0068.txt  0131.txt  0194.txt	0257.txt  0320.txt  0383.txt  0446.txt
0006.txt  0069.txt  0132.txt  0195.txt	0258.txt  0321.txt  0384.txt  0447.txt
0007.txt  0070.txt  0133.txt  0196.txt	0259.txt  0322.txt  0385.txt  0448.txt
0008.txt  0071.txt  0134.txt  0197.txt	0260.txt  0323.txt  0386.txt  0449.txt
0009.txt  0072.txt  0135.txt  0198.txt	0261.txt  0324.txt  0387.txt  0450.txt
0010.txt  0073.txt  0136.txt  0199.txt	0262.tx

In [None]:
# %%time
# # もとのスコアの確認
# scores = []
# for i in range(N_TESTCASES):
#     i = f"{i:04d}"
#     score = !../tools/target/release/tester $(pwd)/{FILENAME}.out < in/{i}.txt 2>&1 | grep Score
#     print(i, *score)
#     score = int(score[0].split()[-1])
#     scores.append(score)
# np.mean(scores), np.mean(scores) * 50, np.std(scores)

In [6]:
# ここでエラー出力は何かおかしいかもしれない


# パラメータ抽出
with open(FILENAME) as f:
    answer = f.read()


variables_optimize = []

for left, value, right, options in re.findall(r"^([^/\n]*=\s*)(.+?)(\s*;\s*//(?:.*\W)?OPTIMIZE(\W.*))$", answer, re.MULTILINE):
    name = left.replace("=", "").strip().split()[-1]
    
    searched = re.search(r".*\[(?P<min>.*),(?P<max>.*)\].*", options)
    if searched:
        min_value = max_value = None
        try:
            min_value = eval(searched.group("min"))
            max_value = eval(searched.group("max"))
            assert min_value <= max_value
        except Exception as e:
            print(f"searched={searched}", file=sys.stderr)
            print(e, file=sys.stderr)
            continue
        log = "LOG" in options  # 雑、直したほうが良い
        if type(min_value) != type(max_value):
            print(f"searched={searched}", file=sys.stderr)
            print("types not matched", file=sys.stderr)
            continue
        if isinstance(min_value, int):
            method = "suggest_int"
        elif isinstance(min_value, float):
            method = "suggest_float"
        else:
            print(f"searched={searched}", file=sys.stderr)
            print(f"unknown type ({type(min_value)})", file=sys.stderr)
            continue
        variables_optimize.append({
            "name": name,
            "method": method,
            "min": min_value,
            "max": max_value,
            "log": log,
            "left": left,
            "right": right,
        })
    elif searched := re.search(r".*\{(?P<choices>.*?)\}.*", options):
        choices = list(map(lambda x: x.strip(), searched.group("choices").split(",")))
        variables_optimize.append({
            "name": name,
            "method": "suggest_categorical",
            "choices": choices,
            "left": left,
            "right": right,
        })
    else:
        print(f"searched={searched}", file=sys.stderr)
        print(f"pattern was matched but options are incorrect.", file=sys.stderr)

print(len(variables_optimize), "variables were found.")
if globals().get("pd"):
    display(pd.DataFrame(variables_optimize))
else:
    pprint(variables_optimize)



6 variables were found.


Unnamed: 0,name,method,min,max,log,left,right
0,MCMC_Q_L2_NORM_RANGE,suggest_float,2.0,20.0,True,constexpr auto MCMC_Q_L2_NORM_RANGE =,"; // OPTIMIZE LOG [2.0, 20.0]"
1,MCMC_Q_RANGE,suggest_float,0.2,10.0,True,constexpr auto MCMC_Q_RANGE =,"; // OPTIMIZE LOG [0.2, 10.0]"
2,EXPECTED_SKILL_EMA_ALPHA_COEF,suggest_float,0.02,2.0,True,constexpr auto EXPECTED_SKILL_EMA_ALPHA_COEF =,"; // OPTIMIZE LOG [0.02, 2.0]"
3,MAX_N_NOT_OPEN_TASKS_IN_QUEUE,suggest_int,60.0,100.0,False,constexpr auto MAX_N_NOT_OPEN_TASKS_IN_QUEUE =,"; // OPTIMIZE [60, 100]"
4,PRIORITY_DAY_OFFSET,suggest_int,400.0,1200.0,False,constexpr auto PRIORITY_DAY_OFFSET =,"; // OPTIMIZE [400, 1200]"
5,PRIORITY_COEF,suggest_float,0.002,2.0,True,constexpr auto PRIORITY_COEF =,"; // OPTIMIZE LOG [0.002, 2.0]"


In [7]:
def escape(string):  # 正規表現の中でそのまま使いたい文字列をエスケープ
    res = !echo '{string}' | sed -e 's/[]\/$*.^[]/\\&/g'
    return res[0]

def escape_sed(string):  # sed の置換後の文字列用のエスケープ
    res = !echo '{string}' | sed -e 's/[\/&]/\\&/g'
    return res[0]

def run(cmd):
    p = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=isinstance(cmd, str)
    )
    for line in iter(p.stdout.readline, b''):
        print(line.decode("utf8"), end="")

def objective_with_index(trial, n_internal_parallel):
    index_parallel = f"{trial.number:04d}"
    print(f"{index_parallel=}")
    
    parameters_changed_filename = f"{index_parallel}_{FILENAME}"
    directory_input = Path("./in")  # 中のすべてのファイルに対して実行される
    
    run(["mkdir", f"{index_parallel}_out"])
    run(["mkdir", f"{index_parallel}_score"])
    
    # ファイル作成
    run(f"cp {FILENAME} {parameters_changed_filename}")
    sed_options = [f"-i {parameters_changed_filename}"]
    for variable in variables_optimize:
        if variable["method"] == "suggest_categorical":
            val =  trial.suggest_categorical(variable["name"], variable["choices"])
        else:
            val = getattr(trial, variable["method"])(variable["name"], variable["min"], variable["max"], log=variable["log"])
        left = variable["left"]
        right = variable["right"]
        sed_options.append(f"""-e 's/^{escape(left)}.*{escape(right)}$/{escape_sed(left)}{val}{escape_sed(right)}/'""")
    command_sed = f"sed {' '.join(sed_options)}"
    print(command_sed)
    run(command_sed)
    
    # コンパイル
    command_compile = f"clang++ {parameters_changed_filename} -std=gnu++17 -O2 -DONLINE_JUDGE -I/ -o {parameters_changed_filename}.out"
    print(command_compile)
    run(command_compile)
    # 実行・採点コマンド (@ はファイル名)
    command_exec = (
        #f"../tools/target/release/tester $(pwd)/{parameters_changed_filename}.out < {directory_input}/@ 2>&1 | grep Score"
        f"../tools/target/release/tester $(pwd)/{parameters_changed_filename}.out < {directory_input}/@ 2>&1 | grep Score | sed -E s/[^0-9]+// > ./{index_parallel}_score/@;"
        #f"cargo run --release --manifest-path ../tools/Cargo.toml --bin tester {directory_input}/@ $(pwd)/{parameters_changed_filename}.out 2>&1 | grep Score | sed -E s/[^0-9]+// > ./{index_parallel}_score/@;"
        #f"./{parameters_changed_filename}.out < {directory_input}/@ > ./{index_parallel}_out/@;"
        #f"cargo run --release --manifest-path ./tools/Cargo.toml --bin vis {directory_input}/@ ./{index_parallel}_out/@ 2> /dev/null > ./{index_parallel}_score/@;"
    )
    # 並列実行 (sed はパスのディレクトリ部分を消してファイル名にしてる)
    run(f"find {directory_input}/* | sed 's!^.*/!!' | xargs -I@ -P {n_internal_parallel} sh -c '{command_exec}'")
    
    # 集計
    sum_score = 0
    for file_path in Path(f"./{index_parallel}_score/").iterdir():  
        with open(file_path) as f:
            sum_score += int(f.read())
    
    # 後始末
    run(f"rm -rf {index_parallel}_out")
    run(f"rm -rf {index_parallel}_score")
#     run(f"rm {parameters_changed_filename}")
    run(f"rm {parameters_changed_filename}.out")
    
    return sum_score


In [8]:
storage_path = f"study.db"
storage = f"sqlite:///{storage_path}"
study_name = "study"
study = optuna.create_study(storage=storage, load_if_exists=True, study_name=study_name, direction="maximize")

def callback(study, trial):
    try:
        if study.best_value == trial.value:
            print(f"Updated! {study.best_value}")
            index_parallel = f"{trial.number:04d}"
            parameters_changed_filename = f"{index_parallel}_{FILENAME}"
            !cp {parameters_changed_filename} 00000_best_parameters.cpp
        !rm {parameters_changed_filename}
        print("callback ok")
    except:
        print(":(")

[32m[I 2021-11-13 00:36:35,081][0m A new study created in RDB with name: study[0m


In [None]:
objective = partial(objective_with_index, n_internal_parallel=8)
study.optimize(objective, n_trials=3000, timeout=86400, n_jobs=3, callbacks=[callback])

index_parallel='0000'
index_parallel='0001'
index_parallel='0002'




sed -i 0000_main.cpp -e 's/^constexpr auto MCMC_Q_L2_NORM_RANGE = .*;           \/\/ OPTIMIZE LOG \[2\.0, 20\.0\]$/constexpr auto MCMC_Q_L2_NORM_RANGE = 12.94946521491701;           \/\/ OPTIMIZE LOG [2.0, 20.0]/' -e 's/^constexpr auto MCMC_Q_RANGE = .*;                   \/\/ OPTIMIZE LOG \[0\.2, 10\.0\]$/constexpr auto MCMC_Q_RANGE = 0.39550430018137583;                   \/\/ OPTIMIZE LOG [0.2, 10.0]/' -e 's/^constexpr auto EXPECTED_SKILL_EMA_ALPHA_COEF = .*; \/\/ OPTIMIZE LOG \[0\.02, 2\.0\]$/constexpr auto EXPECTED_SKILL_EMA_ALPHA_COEF = 0.2827244843106127; \/\/ OPTIMIZE LOG [0.02, 2.0]/' -e 's/^constexpr auto MAX_N_NOT_OPEN_TASKS_IN_QUEUE = .*; \/\/ OPTIMIZE \[60, 100\]$/constexpr auto MAX_N_NOT_OPEN_TASKS_IN_QUEUE = 79; \/\/ OPTIMIZE [60, 100]/' -e 's/^constexpr auto PRIORITY_DAY_OFFSET = .*;          \/\/ OPTIMIZE \[400, 1200\]$/constexpr auto PRIORITY_DAY_OFFSET = 461;          \/\/ OPTIMIZE [400, 1200]/' -e 's/^constexpr auto PRIORITY_COEF = .*; \/\/ OPTIMIZE LOG \[0\.002, 2\

[32m[I 2021-11-13 00:40:44,240][0m Trial 1 finished with value: 1004560.0 and parameters: {'MCMC_Q_L2_NORM_RANGE': 5.1808347426158425, 'MCMC_Q_RANGE': 0.5544895374948828, 'EXPECTED_SKILL_EMA_ALPHA_COEF': 0.2716678423621172, 'MAX_N_NOT_OPEN_TASKS_IN_QUEUE': 67, 'PRIORITY_DAY_OFFSET': 1005, 'PRIORITY_COEF': 0.056736490804161877}. Best is trial 1 with value: 1004560.0.[0m


Updated! 1004560.0
callback ok
index_parallel='0003'
sed -i 0003_main.cpp -e 's/^constexpr auto MCMC_Q_L2_NORM_RANGE = .*;           \/\/ OPTIMIZE LOG \[2\.0, 20\.0\]$/constexpr auto MCMC_Q_L2_NORM_RANGE = 6.4628090661349376;           \/\/ OPTIMIZE LOG [2.0, 20.0]/' -e 's/^constexpr auto MCMC_Q_RANGE = .*;                   \/\/ OPTIMIZE LOG \[0\.2, 10\.0\]$/constexpr auto MCMC_Q_RANGE = 2.3510492985738862;                   \/\/ OPTIMIZE LOG [0.2, 10.0]/' -e 's/^constexpr auto EXPECTED_SKILL_EMA_ALPHA_COEF = .*; \/\/ OPTIMIZE LOG \[0\.02, 2\.0\]$/constexpr auto EXPECTED_SKILL_EMA_ALPHA_COEF = 0.05054255079709201; \/\/ OPTIMIZE LOG [0.02, 2.0]/' -e 's/^constexpr auto MAX_N_NOT_OPEN_TASKS_IN_QUEUE = .*; \/\/ OPTIMIZE \[60, 100\]$/constexpr auto MAX_N_NOT_OPEN_TASKS_IN_QUEUE = 79; \/\/ OPTIMIZE [60, 100]/' -e 's/^constexpr auto PRIORITY_DAY_OFFSET = .*;          \/\/ OPTIMIZE \[400, 1200\]$/constexpr auto PRIORITY_DAY_OFFSET = 1170;          \/\/ OPTIMIZE [400, 1200]/' -e 's/^constexpr 

[32m[I 2021-11-13 00:40:50,108][0m Trial 0 finished with value: 1006491.0 and parameters: {'MCMC_Q_L2_NORM_RANGE': 12.94946521491701, 'MCMC_Q_RANGE': 0.39550430018137583, 'EXPECTED_SKILL_EMA_ALPHA_COEF': 0.2827244843106127, 'MAX_N_NOT_OPEN_TASKS_IN_QUEUE': 79, 'PRIORITY_DAY_OFFSET': 461, 'PRIORITY_COEF': 0.11713065138540142}. Best is trial 0 with value: 1006491.0.[0m


Updated! 1006491.0


[32m[I 2021-11-13 00:40:50,431][0m Trial 2 finished with value: 996169.0 and parameters: {'MCMC_Q_L2_NORM_RANGE': 2.8078774233660715, 'MCMC_Q_RANGE': 1.2423942257924114, 'EXPECTED_SKILL_EMA_ALPHA_COEF': 0.06005636744499221, 'MAX_N_NOT_OPEN_TASKS_IN_QUEUE': 83, 'PRIORITY_DAY_OFFSET': 598, 'PRIORITY_COEF': 0.6789506847419249}. Best is trial 0 with value: 1006491.0.[0m


callback ok
index_parallel='0004'
rm: cannot remove '{parameters_changed_filename}': No such file or directory
callback ok
index_parallel='0005'
sed -i 0004_main.cpp -e 's/^constexpr auto MCMC_Q_L2_NORM_RANGE = .*;           \/\/ OPTIMIZE LOG \[2\.0, 20\.0\]$/constexpr auto MCMC_Q_L2_NORM_RANGE = 6.2166385669602855;           \/\/ OPTIMIZE LOG [2.0, 20.0]/' -e 's/^constexpr auto MCMC_Q_RANGE = .*;                   \/\/ OPTIMIZE LOG \[0\.2, 10\.0\]$/constexpr auto MCMC_Q_RANGE = 5.080899803655884;                   \/\/ OPTIMIZE LOG [0.2, 10.0]/' -e 's/^constexpr auto EXPECTED_SKILL_EMA_ALPHA_COEF = .*; \/\/ OPTIMIZE LOG \[0\.02, 2\.0\]$/constexpr auto EXPECTED_SKILL_EMA_ALPHA_COEF = 0.7475306255661518; \/\/ OPTIMIZE LOG [0.02, 2.0]/' -e 's/^constexpr auto MAX_N_NOT_OPEN_TASKS_IN_QUEUE = .*; \/\/ OPTIMIZE \[60, 100\]$/constexpr auto MAX_N_NOT_OPEN_TASKS_IN_QUEUE = 65; \/\/ OPTIMIZE [60, 100]/' -e 's/^constexpr auto PRIORITY_DAY_OFFSET = .*;          \/\/ OPTIMIZE \[400, 1200\]$/constex