In [1]:
import optuna
import subprocess
import json
import shutil
import os
from train import main as train_main

def objective(trial):
    # パラメータの提案
    gamma = trial.suggest_float("gamma", 0.90, 0.99)
    lr = trial.suggest_float("lr", 1e-7, 1e-4, log=True)
    episodes = trial.suggest_int("episodes", 100, 300, step=20)  # 100, 110, 120, ...
    
    # モデルパスの定義
    model_path = f"save/model_trial_{trial.number}_lr{lr:.5f}_g{gamma:.3f}.pth"

    # 訓練スタート
    train_main(
        lr=lr,
        gamma=gamma,
        episodes=episodes,
        save_path=model_path  # ←動的に渡す！
    )
    
    # パスを trial に記録
    trial.set_user_attr("model_path", model_path)
    
    # 訓練結果を保存
    result = subprocess.run(["python", "inference.py", model_path], capture_output=True, text=True)
    # デバッグ出力を追加！
    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)

    distance = float(result.stdout.strip())
    return distance

In [2]:
# 最適化開始
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=3, show_progress_bar=True)
best_model_path = study.best_trial.user_attrs["model_path"]
# 結果表示
print("Best trial:")
print(study.best_trial)
print("Best model path:", best_model_path)

[I 2025-06-07 18:04:31,918] A new study created in memory with name: no-name-6c8ee476-5856-4061-8a11-4731de069fff


  0%|          | 0/3 [00:00<?, ?it/s]

Episode: 0, Mean: -14.444, Std: 0.000, Min: -14.443862825632095, Max: -14.443862825632095
Episode: 50, Mean: -10.917, Std: 0.000, Min: -10.917299658060074, Max: -10.917299658060074
Episode: 100, Mean: -11.972, Std: 0.000, Min: -11.97197949886322, Max: -11.97197949886322
Episode: 150, Mean: -11.400, Std: 0.000, Min: -11.399938814342022, Max: -11.399938814342022
Episode: 200, Mean: -12.581, Std: 0.000, Min: -12.580994971096516, Max: -12.580994971096516
Episode: 250, Mean: -11.104, Std: 0.000, Min: -11.104011751711369, Max: -11.104011751711369
STDOUT:
 9.086280014365911

STDERR:
  checkpoint = torch.load(path, map_location=self.device)

[I 2025-06-07 18:04:59,187] Trial 0 finished with value: 9.086280014365911 and parameters: {'gamma': 0.9059336978836815, 'lr': 2.5962949207346446e-06, 'episodes': 280}. Best is trial 0 with value: 9.086280014365911.
Episode: 0, Mean: -11.654, Std: 0.000, Min: -11.654260069131851, Max: -11.654260069131851
Episode: 50, Mean: -12.239, Std: 0.000, Min: -12.239

In [None]:
from inference import main as inference_main

# best モデルでプロットしたい場合
inference_main(best_model_path, episodes=1000, plot=True)

In [2]:
# from train import main as train_main

# # モデルパスの定義
# model_test_path = f"save/model_test.pth"

# # 訓練スタート
# train_main(
#     lr=0.0001,
#     gamma=0.95,
#     episodes=1000,
#     save_path=model_test_path  # ←動的に渡す！
# )

In [1]:
from inference import main as inference_main

model_test_path = f"save/model_trial_2_lr0.00000_g0.950.pth"

# best モデルでプロットしたい場合
inference_main(model_test_path, episodes=10000, plot=True)

  checkpoint = torch.load(path, map_location=self.device)


visit_order_list: [5, 9, 13, 24, 11, 0, 10, 15, 19, 8, 6, 21, 16, 17, 4, 23, 1, 2, 12, 18, 3, 22, 14, 7, 20]
best_reward: [-4.79955031]
4.7995503060519695
