# rustを実行し，最適なパラメータを探索

In [1]:
import subprocess
import os
import glob
import re
import math
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
# 3桁区切りのカンマ形式での表示設定
pd.options.display.float_format = '{:,.0f}'.format
from scipy.stats import gmean

from tqdm import tqdm
import optuna

In [2]:
def extract_data_from_log(log_content):
    data = {}

    # data['input'] = re.search(r'Processing input: (\d+)', log_content).group(1) if re.search(r'Processing input: (\d+)', log_content) else None
    data['L'] = int(re.search(r'L=(\d+)', log_content).group(1)) if re.search(r'L=(\d+)', log_content) else None
    data['N'] = int(re.search(r'N=(\d+)', log_content).group(1)) if re.search(r'N=(\d+)', log_content) else None
    data['S'] = int(re.search(r'S=(\d+)', log_content).group(1)) if re.search(r'S=(\d+)', log_content) else None
    # data['pass_flg'] = re.search(r'pass_flg=(\w+)', log_content).group(1) == 'True' if re.search(r'pass_flg=(\w+)', log_content) else None
    # temp_search = re.search(r'max_temp(erture|erature)=(\d+)', log_content)
    # data['max_temperature'] = int(temp_search.group(2)) if temp_search else None

    data['interval'] = int(re.search(r'interval=(\d+)', log_content).group(1)) if re.search(r'interval=(\d+)', log_content) else None
    data['interval_num'] = int(re.search(r'interval_num=(\d+)', log_content).group(1)) if re.search(r'interval_num=(\d+)', log_content) else None

    data['Score'] = int(re.search(r'Score = (\d+)', log_content).group(1)) if re.search(r'Score = (\d+)', log_content) else None
    data['Number of wrong answers'] = int(re.search(r'Number of wrong answers = (\d+)', log_content).group(1)) if re.search(r'Number of wrong answers = (\d+)', log_content) else None
    data['Placement cost'] = int(re.search(r'Placement cost = (\d+)', log_content).group(1)) if re.search(r'Placement cost = (\d+)', log_content) else None
    data['Measurement cost'] = int(re.search(r'Measurement cost = (\d+)', log_content).group(1)) if re.search(r'Measurement cost = (\d+)', log_content) else None
    data['Measurement count'] = int(re.search(r'Measurement count = (\d+)', log_content).group(1)) if re.search(r'Measurement count = (\d+)', log_content) else None

    return data

In [3]:
def run_rust_script(S="0000", timeout_duration=10, rust_directory="./tools/", seed=1234, interval=10, interval_num=10):
    cmd = f"cargo run --release --bin tester python ../ahc022_a.py --seed {seed} --interval {interval} --interval_num {interval_num} < in/{S}.txt > out/{S}.txt"

    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=rust_directory)

    try:
        stdout, stderr = process.communicate(timeout=timeout_duration)
        stdout = stdout.decode('utf-8')
        stderr = stderr.decode('utf-8')
        return stdout, stderr

    except subprocess.TimeoutExpired:  
        process.terminate()  
        print(f"Command timed out after {timeout_duration} seconds.")
        return None, None

In [4]:
# 使用例
sample = "0000"
timeout_duration = 10  # 10秒
rust_directory = "./tools/"
stdout, stderr = run_rust_script(sample, timeout_duration, rust_directory, seed=1234, interval=5, interval_num=10)

if stdout:
    print("Standard Output:")
    print(stdout)
if stderr:
    print("Error Output:")
    print(stderr)

Error Output:
    Finished release [optimized] target(s) in 0.04s
     Running `target/release/tester python ../ahc022_a.py --seed 1234 --interval 5 --interval_num 10`
L=50 N=95 S=36
z=1.96 interval=5 interval_num=10 max_temperature=45 max_dist=5 n_observations=19
# iter=9 best_cost = 13931800
# Placement cost = 1042668
# predict Measurement count = 9975 Measurement cost = 11913000
Score = 9461
Number of wrong answers = 30
Placement cost = 1042668
Measurement cost = 11943000
Measurement count = 10000



In [5]:
extract_data_from_log(stderr)

{'L': 50,
 'N': 95,
 'S': 36,
 'interval': 5,
 'interval_num': 10,
 'Score': 9461,
 'Number of wrong answers': 30,
 'Placement cost': 1042668,
 'Measurement cost': 11913000,
 'Measurement count': 9975}

In [100]:
def objective(trial, sample):
    # ステップ1: ハイパーパラメータを指定
    interval = trial.suggest_int("interval", 1, 1000, log=True)
    # interval_num = trial.suggest_int("interval_num", 2, 1000//interval + 1)
    interval_num = trial.suggest_int("interval_num", 2, 4)

    # S = "0069"
    # S = "0010"
    timeout_duration = 10  # 10秒
    rust_directory = "./tools/"
    stdout, stderr = run_rust_script(sample, timeout_duration, rust_directory, seed=1234, interval=interval, interval_num=interval_num)
    data = extract_data_from_log(stderr)
    return data['Score']

In [102]:
# ステップ3: Studyオブジェクトの作成
study = optuna.create_study(direction="maximize")

# OptunaのログレベルをWARNINGに設定
# optuna.logging.set_verbosity(optuna.logging.WARNING)
# ログレベルをINFOに設定
optuna.logging.set_verbosity(optuna.logging.INFO)

# ステップ4: 最適化プロセスの実行
# study.optimize(objective, n_trials=100)
# lambda関数を使って、objectiveにsome_argumentを渡す
study.optimize(lambda trial: objective(trial, sample="0081"), n_trials=100, show_progress_bar=True)

print(f"Best trial Score: {study.best_value:,.0f}")
print(f"Params: {study.best_params}")

[I 2023-08-15 17:06:44,971] A new study created in memory with name: no-name-d569ba8d-4303-4e4f-9ebb-871ed0228c05


  0%|          | 0/100 [00:00<?, ?it/s]

[I 2023-08-15 17:06:45,827] Trial 0 finished with value: 437599.0 and parameters: {'interval': 830, 'interval_num': 3}. Best is trial 0 with value: 437599.0.
[I 2023-08-15 17:06:47,523] Trial 1 finished with value: 6772.0 and parameters: {'interval': 42, 'interval_num': 4}. Best is trial 0 with value: 437599.0.
[I 2023-08-15 17:06:49,163] Trial 2 finished with value: 1.0 and parameters: {'interval': 1, 'interval_num': 3}. Best is trial 0 with value: 437599.0.
[I 2023-08-15 17:06:50,282] Trial 3 finished with value: 579036.0 and parameters: {'interval': 736, 'interval_num': 2}. Best is trial 3 with value: 579036.0.
[I 2023-08-15 17:06:51,138] Trial 4 finished with value: 439705.0 and parameters: {'interval': 828, 'interval_num': 3}. Best is trial 3 with value: 579036.0.
[I 2023-08-15 17:06:51,923] Trial 5 finished with value: 1189483.0 and parameters: {'interval': 221, 'interval_num': 4}. Best is trial 5 with value: 1189483.0.
[I 2023-08-15 17:06:52,706] Trial 6 finished with value: 993

In [87]:
results = []
for i in tqdm(range(100)):
# for i in tqdm(range(2)):
    sample = f"{i:04}"

    with open(f"./tools/in/{sample}.txt") as f:
        L, N, S = map(int, f.readline().split())
    # ステップ3: Studyオブジェクトの作成
    study = optuna.create_study(direction="maximize")

    # OptunaのログレベルをWARNINGに設定
    optuna.logging.set_verbosity(optuna.logging.WARNING)

    # ステップ4: 最適化プロセスの実行
    # lambda関数を使って、objectiveにsome_argumentを渡す
    n_trials = 100
    study.optimize(lambda trial: objective(trial, sample=sample), n_trials=n_trials)
    result = {"sample": sample, "L": L, "N": N, "S": S, "best_value": study.best_value, "interval": study.best_params["interval"], "interval_num": study.best_params["interval_num"]}
    results.append(result)

    # print(f"{sample} L: {L} N:{N} S:{S} Best trial Score: {study.best_value:,.0f} Params: {study.best_params}")


100%|██████████| 100/100 [1:53:09<00:00, 67.90s/it]


In [88]:
results

[{'sample': '0000',
  'L': 50,
  'N': 95,
  'S': 36,
  'best_value': 7585960.0,
  'interval': 16,
  'interval_num': 5},
 {'sample': '0001',
  'L': 35,
  'N': 66,
  'S': 289,
  'best_value': 1393282.0,
  'interval': 1,
  'interval_num': 380},
 {'sample': '0002',
  'L': 23,
  'N': 89,
  'S': 324,
  'best_value': 1452119.0,
  'interval': 93,
  'interval_num': 6},
 {'sample': '0003',
  'L': 35,
  'N': 61,
  'S': 4,
  'best_value': 75017179.0,
  'interval': 14,
  'interval_num': 2},
 {'sample': '0004',
  'L': 33,
  'N': 81,
  'S': 529,
  'best_value': 280656.0,
  'interval': 566,
  'interval_num': 2},
 {'sample': '0005',
  'L': 35,
  'N': 93,
  'S': 529,
  'best_value': 241340.0,
  'interval': 348,
  'interval_num': 3},
 {'sample': '0006',
  'L': 30,
  'N': 89,
  'S': 784,
  'best_value': 156022.0,
  'interval': 838,
  'interval_num': 2},
 {'sample': '0007',
  'L': 49,
  'N': 71,
  'S': 36,
  'best_value': 8019226.0,
  'interval': 2,
  'interval_num': 55},
 {'sample': '0008',
  'L': 30,
  '

In [89]:
df = pd.DataFrame(results)

In [90]:
df

Unnamed: 0,sample,L,N,S,best_value,interval,interval_num
0,0,50,95,36,7585960,16,5
1,1,35,66,289,1393282,1,380
2,2,23,89,324,1452119,93,6
3,3,35,61,4,75017179,14,2
4,4,33,81,529,280656,566,2
5,5,35,93,529,241340,348,3
6,6,30,89,784,156022,838,2
7,7,49,71,36,8019226,2,55
8,8,30,87,784,144999,870,2
9,9,10,61,196,10522311,14,23


In [91]:
df.to_csv('ahc022_best_params.csv', index=False)

In [92]:
results = []
for i in tqdm(range(100, 580)):
    sample = f"{i:04}"

    with open(f"./tools/in/{sample}.txt") as f:
        L, N, S = map(int, f.readline().split())
    # ステップ3: Studyオブジェクトの作成
    study = optuna.create_study(direction="maximize")

    # OptunaのログレベルをWARNINGに設定
    optuna.logging.set_verbosity(optuna.logging.WARNING)

    # ステップ4: 最適化プロセスの実行
    # lambda関数を使って、objectiveにsome_argumentを渡す
    n_trials = 100
    study.optimize(lambda trial: objective(trial, sample=sample), n_trials=n_trials)
    result = {"sample": sample, "L": L, "N": N, "S": S, "best_value": study.best_value, "interval": study.best_params["interval"], "interval_num": study.best_params["interval_num"]}
    results.append(result)

    # print(f"{sample} L: {L} N:{N} S:{S} Best trial Score: {study.best_value:,.0f} Params: {study.best_params}")


100%|██████████| 480/480 [8:49:29<00:00, 66.19s/it]  


In [93]:
df = pd.DataFrame(results)

In [94]:
df

Unnamed: 0,sample,L,N,S,best_value,interval,interval_num
0,100,15,65,1,119681647,5,15
1,101,15,75,1,141034176,8,5
2,102,15,85,1,102606198,10,7
3,103,15,95,1,95339064,12,3
4,104,25,65,1,104969685,9,6
5,105,25,75,1,37644480,47,2
6,106,25,85,1,114992227,8,3
7,107,25,95,1,19304677,1,152
8,108,35,65,1,99373945,11,3
9,109,35,75,1,131971389,1,29


In [95]:
df.to_csv('ahc022_best_params_480.csv', index=False)