In [48]:
import re
import sys
from uuid import uuid1
from time import sleep
from pprint import pprint
from pathlib import Path
from tempfile import gettempdir
from threading import Thread
from functools import partial
from subprocess import Popen, PIPE

from tqdm.notebook import tqdm
import numpy as np
import optuna
import pandas as pd
import matplotlib.pyplot as plt

FILENAME = "main.cpp"


In [24]:
!cp ../{FILENAME} ./{FILENAME}

In [25]:
!g++ {FILENAME} -std=gnu++17 -Wall -Wextra -O2 -o {FILENAME}.out

In [5]:
%cd ../

/home/user/ahc013


In [8]:
!wget https://img.atcoder.jp/ahc013/a42302b3af.zip
!unzip a42302b3af.zip

--2022-08-15 19:23:35--  https://img.atcoder.jp/ahc013/a42302b3af.zip
Resolving img.atcoder.jp (img.atcoder.jp)... 18.160.200.96, 18.160.200.25, 18.160.200.128, ...
Connecting to img.atcoder.jp (img.atcoder.jp)|18.160.200.96|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60162 (59K) [application/zip]
Saving to: ‘a42302b3af.zip’


2022-08-15 19:23:36 (4.54 MB/s) - ‘a42302b3af.zip’ saved [60162/60162]

Archive:  a42302b3af.zip
   creating: tools/
  inflating: tools/Cargo.toml        
   creating: tools/in/
  inflating: tools/in/0007.txt       
  inflating: tools/in/0013.txt       
  inflating: tools/in/0012.txt       
  inflating: tools/in/0006.txt       
  inflating: tools/in/0038.txt       
  inflating: tools/in/0010.txt       
  inflating: tools/in/0004.txt       
  inflating: tools/in/0005.txt       
  inflating: tools/in/0011.txt       
  inflating: tools/in/0039.txt       
  inflating: tools/in/0015.txt       
  inflating: tools/in/0001.txt       
  infla

In [12]:
%cd -

/home/user/ahc013/tuning


In [14]:
SEED = 10000
N_TESTCASES_ALL = 3200

In [17]:
!seq {SEED} {SEED + N_TESTCASES_ALL - 1} > seeds.txt && cargo run --release --manifest-path ../tools/Cargo.toml --bin gen seeds.txt
!ls in

[0m[0m[1m[32m    Finished[0m release [optimized] target(s) in 0.01s
[0m[0m[1m[32m     Running[0m `/home/user/ahc013/tools/target/release/gen seeds.txt`
0000.txt  0400.txt  0800.txt  1200.txt	1600.txt  2000.txt  2400.txt  2800.txt
0001.txt  0401.txt  0801.txt  1201.txt	1601.txt  2001.txt  2401.txt  2801.txt
0002.txt  0402.txt  0802.txt  1202.txt	1602.txt  2002.txt  2402.txt  2802.txt
0003.txt  0403.txt  0803.txt  1203.txt	1603.txt  2003.txt  2403.txt  2803.txt
0004.txt  0404.txt  0804.txt  1204.txt	1604.txt  2004.txt  2404.txt  2804.txt
0005.txt  0405.txt  0805.txt  1205.txt	1605.txt  2005.txt  2405.txt  2805.txt
0006.txt  0406.txt  0806.txt  1206.txt	1606.txt  2006.txt  2406.txt  2806.txt
0007.txt  0407.txt  0807.txt  1207.txt	1607.txt  2007.txt  2407.txt  2807.txt
0008.txt  0408.txt  0808.txt  1208.txt	1608.txt  2008.txt  2408.txt  2808.txt
0009.txt  0409.txt  0809.txt  1209.txt	1609.txt  2009.txt  2409.txt  2809.txt
0010.txt  0410.txt  0810.txt  1210.txt	1610.txt  2010.txt 

In [21]:
from bisect import bisect_right

In [23]:
thresholds = [
    None,
    None,
    [21, 27, 33],
    [24, 30, 36],
    [27, 33, 39],
    [30, 36, 42],
]
bisect_right(thresholds[2], 20), bisect_right(thresholds[2], 21)

(0, 1)

In [27]:
!ls

Untitled.ipynb	in  main.cpp  main.cpp.out  seeds.txt


In [30]:
def get_n_k_nclass(file):
    # k: [2, 5]
    # n_class: [0, 3]
    with open(file) as f:
        n, k = map(int, f.readline().split())
    n_class = bisect_right(thresholds[k], n)
    return n, k, n_class

In [31]:
file = Path("in") / "0000.txt"
get_n_k_nclass(file)

(24, 2, 1)

In [32]:
for k in range(2, 6):
    for n_class in range(4):
        !mkdir in_{k}_{n_class}

In [55]:
for k in range(2, 6):
    for n_class in range(4):
        !mkdir work_{k}_{n_class}

In [34]:
for file in tqdm(sorted(Path("in").iterdir())):
    assert file.suffix == ".txt"
    n, k, n_class = get_n_k_nclass(file)
    !cp {file} ./in_{k}_{n_class}/

  0%|          | 0/3200 [00:00<?, ?it/s]

In [26]:
# ここでエラー出力は何かおかしいかもしれない


# パラメータ抽出
with open(FILENAME) as f:
    answer = f.read()


variables_optimize = []

for left, value, right, options in re.findall(r"^([^/\n]*=\s*)(.+?)(\s*;\s*//(?:.*\W)?OPTIMIZE(\W.*))$", answer, re.MULTILINE):
    name = left.replace("=", "").strip().split()[-1]
    
    searched = re.search(r".*\[(?P<min>.*),(?P<max>.*)\].*", options)
    if searched:
        min_value = max_value = None
        try:
            min_value = eval(searched.group("min"))
            max_value = eval(searched.group("max"))
            assert min_value <= max_value
        except Exception as e:
            print(f"searched={searched}", file=sys.stderr)
            print(e, file=sys.stderr)
            continue
        log = "LOG" in options  # 雑、直したほうが良い
        if type(min_value) != type(max_value):
            print(f"searched={searched}", file=sys.stderr)
            print("types not matched", file=sys.stderr)
            continue
        if isinstance(min_value, int):
            method = "suggest_int"
        elif isinstance(min_value, float):
            method = "suggest_float"
        else:
            print(f"searched={searched}", file=sys.stderr)
            print(f"unknown type ({type(min_value)})", file=sys.stderr)
            continue
        variables_optimize.append({
            "name": name,
            "method": method,
            "min": min_value,
            "max": max_value,
            "log": log,
            "left": left,
            "right": right,
        })
    elif searched := re.search(r".*\{(?P<choices>.*?)\}.*", options):
        choices = list(map(lambda x: x.strip(), searched.group("choices").split(",")))
        variables_optimize.append({
            "name": name,
            "method": "suggest_categorical",
            "choices": choices,
            "left": left,
            "right": right,
        })
    else:
        print(f"searched={searched}", file=sys.stderr)
        print(f"pattern was matched but options are incorrect.", file=sys.stderr)

print(len(variables_optimize), "variables were found.")
if globals().get("pd"):
    display(pd.DataFrame(variables_optimize))
else:
    pprint(variables_optimize)

10 variables were found.


Unnamed: 0,name,method,min,max,log,left,right
0,kErase,suggest_int,1.0,5.0,False,static constexpr auto kErase =,"; // OPTIMIZE [1, 5]"
1,kRadius,suggest_int,2.0,6.0,False,static constexpr auto kRadius =,"; // OPTIMIZE [2, 6]"
2,kAnnealingA,suggest_float,-15.0,15.0,False,static constexpr auto kAnnealingA =,"; // OPTIMIZE [-15.0, 15.0]"
3,kAnnealingB,suggest_float,0.0,3.0,False,static constexpr auto kAnnealingB =,"; // OPTIMIZE [0.0, 3.0]"
4,kAnnealingStart,suggest_float,1.0,100.0,True,static constexpr auto kAnnealingStart =,"; // OPTIMIZE LOG [1.0, 100.0]"
5,kSkipRatio,suggest_float,0.2,0.8,False,static constexpr auto kSkipRatio =,"; // OPTIMIZE [0.2, 0.8]"
6,kTargetDeterminationTrials,suggest_int,1.0,20.0,True,static constexpr auto kTargetDeterminationTria...,"; // OPTIMIZE LOG [1, 20]"
7,kAttractionRatio,suggest_float,0.01,0.9,True,static constexpr auto kAttractionRatio =,"; // OPTIMIZE LOG [0.01, 0.9]"
8,kMaxAttractionDistance,suggest_int,4.0,99.0,True,static constexpr auto kMaxAttractionDistance =,"; // OPTIMIZE LOG [4, 99]"
9,kStartAttraction,suggest_float,0.001,0.9,True,static constexpr auto kStartAttraction =,"; // OPTIMIZE LOG [0.001, 0.9]"


In [56]:
def escape(string):  # 正規表現の中でそのまま使いたい文字列をエスケープ
    res = !echo '{string}' | sed -e 's/[]\/$*.^[]/\\&/g'
    return res[0]

def escape_sed(string):  # sed の置換後の文字列用のエスケープ
    res = !echo '{string}' | sed -e 's/[\/&]/\\&/g'
    return res[0]

def read_stream(name, in_file, out_file):
    for line in in_file:
        print(f"[{name}] {line.strip()}", file=out_file)

def run(cmd, name):
    proc = Popen(cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=isinstance(cmd, str))
    stdout_thread = Thread(target=read_stream, args=(name, proc.stdout, sys.stdout))
    stderr_thread = Thread(target=read_stream, args=(name, proc.stderr, sys.stderr))
    stdout_thread.start()
    stderr_thread.start()
    proc.wait()
    return proc

def objective(trial, in_dir, work_dir):
    n_internal_parallel = 3
    
    index_parallel = f"{trial.number:04d}"
    print(f"{index_parallel=}")
    
    work_dir = Path(work_dir)
    directory_input = Path(in_dir)  # 中のすべてのファイルに対して実行される
    #parameters_changed_filename = Path(gettempdir()) / str(uuid1())
    parameters_changed_filename = work_dir / f"{index_parallel}_{FILENAME}"
    
    run(["mkdir", f"{work_dir / index_parallel}_out"], "mkdir")
    run(["mkdir", f"{work_dir / index_parallel}_score"], "mkdir")
    
    # ファイル作成
    run(f"cp {FILENAME} {parameters_changed_filename}", "cp")
    sed_options = [f"-i {parameters_changed_filename}"]
    for variable in variables_optimize:
        if variable["method"] == "suggest_categorical":
            val =  trial.suggest_categorical(variable["name"], variable["choices"])
        else:
            val = getattr(trial, variable["method"])(variable["name"], variable["min"], variable["max"], log=variable["log"])
        left = variable["left"]
        right = variable["right"]
        sed_options.append(f"""-e 's/^{escape(left)}.*{escape(right)}$/{escape_sed(left)}{val}{escape_sed(right)}/'""")
    command_sed = f"sed {' '.join(sed_options)}"
    #print(command_sed)
    run(command_sed, "sed")
    
    # コンパイル
    command_compile = f"g++ {parameters_changed_filename} -std=gnu++17 -O2 -DONLINE_JUDGE -o {parameters_changed_filename}.out"
    #print(command_compile)
    run(command_compile, "compile")
    # 実行・採点コマンド (@ はファイル名)
    command_exec = (
        #f"./a.out < ./tools/in/{i:04d}.txt > {out_file} && ./tools/target/release/vis ./tools/in/{i:04d}.txt {out_file}"
        #f"../tools/target/release/tester $(pwd)/{parameters_changed_filename}.out < {directory_input}/@ 2>&1 | grep Score"
        #f"../tools/target/release/tester $(pwd)/{parameters_changed_filename}.out < {directory_input}/@ 2>&1 | grep Score | sed -E s/[^0-9]+// > ./{index_parallel}_score/@;"
        #f"cargo run --release --manifest-path ../tools/Cargo.toml --bin tester {directory_input}/@ $(pwd)/{parameters_changed_filename}.out 2>&1 | grep Score | sed -E s/[^0-9]+// > ./{index_parallel}_score/@;"
        f"./{parameters_changed_filename}.out < {directory_input}/@ > {work_dir / index_parallel}_out/@;"
        f"../tools/target/release/vis {directory_input}/@ {work_dir / index_parallel}_out/@ 2> /dev/null > {work_dir / index_parallel}_score/@;"
    )
    # 並列実行 (sed はパスのディレクトリ部分を消してファイル名にしてる)
    run(f"find {directory_input}/* | sed 's!^.*/!!' | xargs -I@ -P {n_internal_parallel} sh -c '{command_exec}'", "exec")
    
    # 集計
    scores = []
    for file_path in Path(f"{work_dir / index_parallel}_score/").iterdir():  
        with open(file_path) as f:
            scores.append(int(f.readline().strip().split()[-1]))
    mean_score = sum(scores) / len(scores)
    
    # 後始末
    run(f"rm -rf {work_dir / index_parallel}_out", "rm")
    run(f"rm -rf {work_dir / index_parallel}_score", "rm")
    #run(f"rm {parameters_changed_filename}", "rm")
    run(f"rm {parameters_changed_filename}.out", "rm")
    
    return mean_score

In [57]:
K = 2
N_CLASS = 0

storage_path = f"test_study.db"
storage = f"sqlite:///{storage_path}"
study_name = "study"
study = optuna.create_study(storage=storage, load_if_exists=True, study_name=study_name, direction="maximize")

in_dir = Path(f"in_{K}_{N_CLASS}")
work_dir = Path(f"work_{K}_{N_CLASS}")

def callback(study, trial):
    try:
        index_parallel = f"{trial.number:04d}"
        parameters_changed_filename = work_dir / f"{index_parallel}_{FILENAME}"
        if study.best_value == trial.value:
            print(f"Updated! {study.best_value}")
            !cp {parameters_changed_filename} {work_dir / "best_parameters.cpp"}
        !rm {parameters_changed_filename}
    except:
        print(":(")


[32m[I 2022-08-15 21:07:33,339][0m Using an existing study with name 'study' instead of creating a new one.[0m


In [58]:
study.optimize(
    partial(objective, in_dir=in_dir, work_dir=work_dir),
    n_trials=3000,
    timeout=86400,
    callbacks=[callback]
)

[exec] 413188 iterations
[exec] 373641 iterations
[exec] 384472 iterations
[exec] 396267 iterations
[exec] 380289 iterations
[exec] 371339 iterations
[exec] 366048 iterations
[exec] 369709 iterations
[exec] 377723 iterations
[exec] 341808 iterations
[exec] 366764 iterations
[exec] 366732 iterations
[exec] 382926 iterations
[exec] 372054 iterations
[exec] 412923 iterations
[exec] 388410 iterations
[exec] 381991 iterations
[exec] 390625 iterations
[exec] 422019 iterations
[exec] 382104 iterations
[exec] 376644 iterations
[exec] 347639 iterations
[exec] 382319 iterations
[exec] 386241 iterations
[exec] 383269 iterations
[exec] 387693 iterations
[exec] 367692 iterations
[exec] 359295 iterations
[exec] 368299 iterations
[exec] 340603 iterations
[exec] 361103 iterations
[exec] 367963 iterations
[exec] 383600 iterations
[exec] 374134 iterations
[exec] 381757 iterations
[exec] 379396 iterations
[exec] 387163 iterations
[exec] 382898 iterations
[exec] 340465 iterations
[exec] 381330 iterations


Updated! 6094.264705882353
cp: cannot stat '0006_main.cpp': No such file or directory
rm: cannot remove '0006_main.cpp': No such file or directory


[32m[I 2022-08-15 21:14:14,645][0m Trial 7 finished with value: 5890.229411764706 and parameters: {'kErase': 5, 'kRadius': 5, 'kAnnealingA': -6.701858967453656, 'kAnnealingB': 1.6480678037035361, 'kAnnealingStart': 34.278158160040135, 'kSkipRatio': 0.2386839669757565, 'kTargetDeterminationTrials': 5, 'kAttractionRatio': 0.8878020038213953, 'kMaxAttractionDistance': 5, 'kStartAttraction': 0.30268360646865367}. Best is trial 6 with value: 6094.264705882353.[0m


rm: cannot remove '0007_main.cpp': No such file or directory


KeyboardInterrupt: 

# 結果を取得

In [2]:
import re
import sys
from uuid import uuid1
from time import sleep
from pprint import pprint
from pathlib import Path
from tempfile import gettempdir
from threading import Thread
from functools import partial
from subprocess import Popen, PIPE

from tqdm.notebook import tqdm
import numpy as np
import optuna
import pandas as pd
import matplotlib.pyplot as plt

loaded_study = optuna.load_study(study_name="study", storage="sqlite:///test_study.db")

In [71]:
studies = [[None] * 4 for _ in range(6)]
for k in range(2, 6):
    for n_class in range(4):
        in_dir = Path(f"in_{k}_{n_class}")
        work_dir = Path(f"work_{k}_{n_class}")

        study_name = f"study_{k}_{n_class}"
        storage_path = work_dir / "study.db"
        storage = f"sqlite:///{storage_path}"

        studies[k][n_class] = optuna.load_study(study_name=study_name, storage=storage)

In [72]:
best_values = []
for k in range(2, 6):
    for n_class in range(4):
        best_values.append(studies[k][n_class].best_value)
sum(best_values) / len(best_values)

7659.633913180649

In [70]:
param_names = [
    "kErase",
    "kRadius",
    "kAnnealingA",
    "kAnnealingB",
    "kAnnealingStart",
    "kSkipRatio",
    "kTargetDeterminationTrials",
    "kAttractionRatio",
    "kMaxAttractionDistance",
    "kStartAttraction",
]

print("array<array<Parameters, 4>, 6>{array<Parameters, 4>{}, {},")
for k in range(2, 6):
    print("    {")
    for n_class in range(4):
        p = studies[k][n_class].best_params
        print("        Parameters{" + ",".join(str(p[name]) for name in param_names) + "},")
    print("    },")
print("};")

array<array<Parameters, 4>, 6>{array<Parameters, 4>{}, {},
    {
        Parameters{3,2,-1.5473559790003477,2.394679104775665,49.113776624126906,0.7036113146961994,16,0.036126246564059016,77,0.002536726332503413},
        Parameters{2,3,-2.360743455920828,0.8410800563870859,51.14954978170781,0.7076718990460277,13,0.01745078050787936,29,0.03209401045907427},
        Parameters{3,3,-0.8599083914393143,2.925181215107081,34.252726962443575,0.7658343930163709,14,0.011372323886580011,80,0.1773136876531248},
        Parameters{2,2,1.0216275683640035,1.6589228593332568,32.22277523281214,0.7572275213669666,12,0.3387849907268061,27,0.6912284040206147},
    },
    {
        Parameters{1,2,-0.6935734187927314,2.3319332246782767,44.31452703280603,0.5039548990326592,10,0.5782158264971925,86,0.0014289958779786388},
        Parameters{2,4,6.746480615222879,1.5901862644204725,22.664344989910102,0.7302122360707419,5,0.3794791595976484,55,0.5733449839402591},
        Parameters{2,4,12.801970168802779,0.3

In [63]:
optuna.visualization.plot_optimization_history(studies[2][2])

In [64]:
optuna.visualization.plot_param_importances(studies[2][2])