In [1]:
from deep_reinforcement_learning.agent.ppo_agent import (
    create_ppo_for_hvac,
    actions_to_series,
)

In [2]:
import gymnasium as gym
import numpy as np
import torch
import types
from tianshou.data import Batch

In [3]:
set_cols = [
    "A/C Set Temperature__A-25",
    "A/C Set Temperature__A-26",
    "A/C Set Temperature__D-1南1",
    "A/C Set Temperature__D-2北1",
    "A/C Set Temperature__D-3南2",
    "A/C Set Temperature__D-4北2",
    "A/C Set Temperature__D-5南1",
    "A/C Set Temperature__D-6北1",
    "A/C Set Temperature__D-7南2",
    "A/C Set Temperature__D-8北2",
    "A/C Set Temperature__E-10南2",
    "A/C Set Temperature__E-11南3",
    "A/C Set Temperature__E-12南4",
    "A/C Set Temperature__E-13北1",
    "A/C Set Temperature__E-14北2",
    "A/C Set Temperature__E-15北3",
    "A/C Set Temperature__E-16北4",
    "A/C Set Temperature__E-17",
    "A/C Set Temperature__E-9南1",
    "A/C Set Temperature__F-18",
    "A/C Set Temperature__F-19",
    "A/C Set Temperature__F-20",
    "A/C Set Temperature__G-21",
    "A/C Set Temperature__G-22",
    "A/C Set Temperature__G-23",
    "A/C Set Temperature__G-24",
]
mode_cols = [
    "A/C Mode__A-25",
    "A/C Mode__A-26",
    "A/C Mode__D-1南1",
    "A/C Mode__D-2北1",
    "A/C Mode__D-3南2",
    "A/C Mode__D-4北2",
    "A/C Mode__D-5南1",
    "A/C Mode__D-6北1",
    "A/C Mode__D-7南2",
    "A/C Mode__D-8北2",
    "A/C Mode__E-10南2",
    "A/C Mode__E-11南3",
    "A/C Mode__E-12南4",
    "A/C Mode__E-13北1",
    "A/C Mode__E-14北2",
    "A/C Mode__E-15北3",
    "A/C Mode__E-16北4",
    "A/C Mode__E-17",
    "A/C Mode__E-9南1",
    "A/C Mode__F-18",
    "A/C Mode__F-19",
    "A/C Mode__F-20",
    "A/C Mode__G-21",
    "A/C Mode__G-22",
    "A/C Mode__G-23",
    "A/C Mode__G-24",
]
fan_cols = [
    "A/C Fan Speed__A-25",
    "A/C Fan Speed__A-26",
    "A/C Fan Speed__D-1南1",
    "A/C Fan Speed__D-2北1",
    "A/C Fan Speed__D-3南2",
    "A/C Fan Speed__D-4北2",
    "A/C Fan Speed__D-5南1",
    "A/C Fan Speed__D-6北1",
    "A/C Fan Speed__D-7南2",
    "A/C Fan Speed__D-8北2",
    "A/C Fan Speed__E-10南2",
    "A/C Fan Speed__E-11南3",
    "A/C Fan Speed__E-12南4",
    "A/C Fan Speed__E-13北1",
    "A/C Fan Speed__E-14北2",
    "A/C Fan Speed__E-15北3",
    "A/C Fan Speed__E-16北4",
    "A/C Fan Speed__E-17",
    "A/C Fan Speed__E-9南1",
    "A/C Fan Speed__F-18",
    "A/C Fan Speed__F-19",
    "A/C Fan Speed__F-20",
    "A/C Fan Speed__G-21",
    "A/C Fan Speed__G-22",
    "A/C Fan Speed__G-23",
    "A/C Fan Speed__G-24",
]
onoff_cols = [
    "A/C ON/OFF__A-25",
    "A/C ON/OFF__A-26",
    "A/C ON/OFF__D-1南1",
    "A/C ON/OFF__D-2北1",
    "A/C ON/OFF__D-3南2",
    "A/C ON/OFF__D-4北2",
    "A/C ON/OFF__D-5南1",
    "A/C ON/OFF__D-6北1",
    "A/C ON/OFF__D-7南2",
    "A/C ON/OFF__D-8北2",
    "A/C ON/OFF__E-10南2",
    "A/C ON/OFF__E-11南3",
    "A/C ON/OFF__E-12南4",
    "A/C ON/OFF__E-13北1",
    "A/C ON/OFF__E-14北2",
    "A/C ON/OFF__E-15北3",
    "A/C ON/OFF__E-16北4",
    "A/C ON/OFF__E-17",
    "A/C ON/OFF__E-9南1",
    "A/C ON/OFF__F-18",
    "A/C ON/OFF__F-19",
    "A/C ON/OFF__F-20",
    "A/C ON/OFF__G-21",
    "A/C ON/OFF__G-22",
    "A/C ON/OFF__G-23",
    "A/C ON/OFF__G-24",
]

In [4]:
set_temp_list = [18, 20, 22, 24, 26]
set_mode_list = ["cool", "dry", "heat", "auto"]
set_wind_list = ["low", "mid", "high"]
set_activate_list = ["OFF", "ON"]
n_devices = 26

# 行動空間（そのまま）
action_space = gym.spaces.MultiDiscrete(
    np.array(
        [
            len(set_temp_list),
            len(set_mode_list),
            len(set_wind_list),
            len(set_activate_list),
        ]
        * n_devices
    )
)
# ★ 観測の shape を“実際の環境に合わせて”設定（例：64次元ベクトル）
obs_shape = (154,)
observation_space = gym.spaces.Box(
    low=-np.inf, high=np.inf, shape=obs_shape, dtype=np.float32
)

# ★ ダミー環境（必要な属性だけ持つ）
single_env = types.SimpleNamespace(
    observation_space=observation_space,
    action_space=action_space,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1) policy を作成
policy = create_ppo_for_hvac(
    single_env=single_env,
    device=device,
    lr=3e-4,
    set_temp_list=set_temp_list,
    set_mode_list=set_mode_list,
    set_wind_list=set_wind_list,
    set_on_off_list=["OFF", "ON"],  # ここに OFF を含める
    n_devices=n_devices,
    deterministic_eval=True,
)

# 現在の温度インデックス（例：全デバイス22℃=index2、B=1）
cur = np.full((1, n_devices), 2, dtype=np.int64)  # -1 を入れたデバイスはマスクしない

obs = np.random.randn(1, *obs_shape).astype(np.float32)
batch = Batch(obs=obs, info=Batch(current_temp_index=cur))

policy.eval()
out = policy.forward(batch, deterministic=True)  # ここで ±1 マスクが効く
act = out.act  # 形は [1, 4*n_devices] または [4*n_devices]

# 3) 列名へマッピング
series = actions_to_series(
    act,
    set_temp_list=set_temp_list,
    set_mode_list=set_mode_list,
    set_wind_list=set_wind_list,
    set_on_off_list=["OFF", "ON"],
    n_devices=n_devices,
    set_cols=set_cols,
    mode_cols=mode_cols,
    fan_cols=fan_cols,
    onoff_cols=onoff_cols,
)

In [6]:
import pandas as pd

with pd.option_context(
    "display.max_rows",
    None,  # 行の省略なし
    "display.max_colwidth",
    None,  # 長い文字列も省略なし
    "display.width",
    0,  # 横幅に合わせて自動改行
):
    print(series.to_string())

A/C Set Temperature__A-25        20
A/C Mode__A-25                 auto
A/C Fan Speed__A-25             low
A/C ON/OFF__A-25                 ON
A/C Set Temperature__A-26        20
A/C Mode__A-26                 auto
A/C Fan Speed__A-26            high
A/C ON/OFF__A-26                 ON
A/C Set Temperature__D-1南1       20
A/C Mode__D-1南1                cool
A/C Fan Speed__D-1南1            mid
A/C ON/OFF__D-1南1                ON
A/C Set Temperature__D-2北1       20
A/C Mode__D-2北1                auto
A/C Fan Speed__D-2北1            low
A/C ON/OFF__D-2北1               OFF
A/C Set Temperature__D-3南2       20
A/C Mode__D-3南2                auto
A/C Fan Speed__D-3南2           high
A/C ON/OFF__D-3南2                ON
A/C Set Temperature__D-4北2       20
A/C Mode__D-4北2                 dry
A/C Fan Speed__D-4北2           high
A/C ON/OFF__D-4北2                ON
A/C Set Temperature__D-5南1       22
A/C Mode__D-5南1                cool
A/C Fan Speed__D-5南1            low
A/C ON/OFF__D-5南1           