In [4]:
import sys
import logging

import torch

import gymnasium as gym

from myAgent import PPOAgent

import os
import glob
from types import SimpleNamespace

In [5]:
# -----------------------------
# 3) 非ブロッキング key 入力（q で終了）
#   - Windows: msvcrt
#   - Unix端末: termios + select
#   - Jupyter等: 無効（Ctrl+C で停止）
# -----------------------------
def make_nonblocking_key_reader():
    restore_fn = None

    # Windows
    if os.name == "nt":
        import msvcrt

        def read_key():
            if msvcrt.kbhit():
                return msvcrt.getwch()
            return None

        return read_key, restore_fn

    # Unix系（端末）
    if sys.stdin.isatty():
        import termios
        import tty
        import select

        fd = sys.stdin.fileno()
        old = termios.tcgetattr(fd)
        tty.setcbreak(fd)

        def restore():
            termios.tcsetattr(fd, termios.TCSADRAIN, old)

        restore_fn = restore

        def read_key():
            if select.select([sys.stdin], [], [], 0.0)[0]:
                return sys.stdin.read(1)
            return None

        return read_key, restore_fn
    
        # Jupyter 等
    def read_key():
        return None

    return read_key, restore_fn

In [6]:
# -----------------------------
# 4) モデルファイル選択（最新を自動で拾う）
# -----------------------------
MODEL_GLOB = "./models/ppo_final_*.pth"
candidates = sorted(glob.glob(MODEL_GLOB))
if len(candidates) == 0:
    raise FileNotFoundError(f"No checkpoint found: {MODEL_GLOB}")
MODEL_PATH = candidates[-1]
print("Using checkpoint:", MODEL_PATH)


# -----------------------------
# 5) checkpoint から Config を復元して agent を作る
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 先に ckpt を読み、config dict を取り出す
ckpt_for_cfg = torch.load(MODEL_PATH, map_location=device, weights_only=False)
cfg_dict = ckpt_for_cfg["config"]  # save_all() で保存した config

# dict -> 属性アクセスできる形にする（SimpleNamespace）
# 例: cfg.Q_net_sizes, cfg.u_ulim, ...
cfg = SimpleNamespace(**cfg_dict)

agent = PPOAgent(Config=cfg, device=device)
agent.load_all(MODEL_PATH, map_location=device)

print("cuda available:", torch.cuda.is_available())
print("agent device:", agent.device)
print("P_net device:", next(agent.P_net.parameters()).device)


# -----------------------------
# 6) 環境を作って推論ループ（描画あり）
# -----------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    stream=sys.stdout,
    datefmt="%H:%M:%S",
)

env = gym.make("Pendulum-v1", render_mode="human")

read_key, restore_key_state = make_nonblocking_key_reader()

try:
    episode = 0
    while True:
        obs, info = env.reset()
        terminated = False
        truncated = False
        ep_return = 0.0
        ep_steps = 0
        episode += 1

        while True:
            # q で停止（端末でのみ有効な場合がある）
            ch = read_key()
            if ch is not None and str(ch).lower() == "q":
                print("\nQuit requested by keypress.")
                raise KeyboardInterrupt

            # 推論（ノイズ無し）
            action = agent.step(obs)

            # 1 step
            obs, reward, terminated, truncated, info = env.step(action)
            env.render()

            ep_return += float(reward)
            ep_steps += 1

            if terminated or truncated:
                print(f"Episode {episode:4d} | return = {ep_return: .3f} | steps = {ep_steps}")
                break

            # 見やすさのため（任意）
            # time.sleep(1.0 / 60.0)

except KeyboardInterrupt:
    pass

finally:
    if restore_key_state is not None:
        try:
            restore_key_state()
        except Exception:
            pass
    env.close()
    print("Evaluation finished.")


Using checkpoint: ./models/ppo_final_20260206_015253.pth
cuda available: True
agent device: cuda
P_net device: cuda:0
Episode    1 | return = -361.280 | steps = 200
Episode    2 | return = -1.139 | steps = 200
Episode    3 | return = -1.788 | steps = 200
Evaluation finished.


In [None]:
import os
import sys
import glob
import logging
import time
import torch
import gymnasium as gym
import imageio
import numpy as np  # 追加: 暗転フレームを作るために必要
from types import SimpleNamespace

# 自作モジュールのインポート（適宜環境に合わせてください）
# from your_module import PPOAgent

# -----------------------------
# 4) モデルファイル選択
# -----------------------------
MODEL_GLOB = "./models/ppo_final_*.pth"
candidates = sorted(glob.glob(MODEL_GLOB))
if len(candidates) == 0:
    raise FileNotFoundError(f"No checkpoint found: {MODEL_GLOB}")
MODEL_PATH = candidates[-1]
print("Using checkpoint:", MODEL_PATH)

# -----------------------------
# 5) Agent 準備
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ckpt_for_cfg = torch.load(MODEL_PATH, map_location=device, weights_only=False)
cfg = SimpleNamespace(**ckpt_for_cfg["config"])

agent = PPOAgent(Config=cfg, device=device)
agent.load_all(MODEL_PATH, map_location=device)

# -----------------------------
# 6) 環境を作って推論＆一本のGIFにまとめる
# -----------------------------
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")

MOVIE_DIR = "./movie"
os.makedirs(MOVIE_DIR, exist_ok=True)

# 画像取得モードで起動
env = gym.make("Pendulum-v1", render_mode="rgb_array")

MAX_EPISODES = 5  # 何エピソード分つなげるか
output_path = os.path.join(MOVIE_DIR, "ppo_combined_episodes.gif")

all_frames = []  # ここに全エピソードの画像を全部溜め込む

print(f"Start recording {MAX_EPISODES} episodes...")

try:
    for episode in range(1, MAX_EPISODES + 1):
        obs, info = env.reset()
        terminated = False
        truncated = False
        ep_return = 0.0
        ep_steps = 0
        
        # --- 1エピソード分のループ ---
        while True:
            # 推論
            action = agent.step(obs)
            
            # Step
            obs, reward, terminated, truncated, info = env.step(action)
            
            # 画像取得 & 追加
            frame = env.render()
            all_frames.append(frame)

            ep_return += float(reward)
            ep_steps += 1

            if terminated or truncated:
                print(f"Episode {episode:2d}/{MAX_EPISODES} finished | Return: {ep_return:.2f}")
                
                # --- 工夫ポイント: エピソードの繋ぎ目に「暗転（黒画面）」を入れる ---
                # これがないと、次のエピソードへ瞬間移動して目が疲れます
                if episode < MAX_EPISODES:
                    black_frame = np.zeros_like(frame) # 同じサイズの黒画像を作成
                    # 15フレーム分（約0.5秒）黒画面を追加
                    all_frames.extend([black_frame] * 15)
                
                break

    # --- ループ終了後、まとめて保存 ---
    print(f"Saving GIF to {output_path} ... (This may take a moment)")
    imageio.mimsave(output_path, all_frames, fps=30, loop=0)
    print("Done!")

except KeyboardInterrupt:
    print("Interrupted. Saving what we have so far...")
    if len(all_frames) > 0:
        imageio.mimsave(output_path, all_frames, fps=30, loop=0)

finally:
    env.close()

Using checkpoint: ./models/ppo_final_20260201_234103.pth
Start recording 5 episodes...
Episode  1/5 finished | Return: -1491.11
Episode  2/5 finished | Return: -388.10
Episode  3/5 finished | Return: -129.89
Episode  4/5 finished | Return: -124.99
Episode  5/5 finished | Return: -1.99
Saving GIF to ./movie/ppo_combined_episodes.gif ... (This may take a moment)
Done!
