In [1]:
from deep_reinforcement_learning.agent.ppo_agent import (
    create_ppo_for_hvac,
    actions_to_frame,
)

In [2]:
import gymnasium as gym
import numpy as np
import torch
import types
import pandas as pd
from tianshou.data import Batch
from datetime import datetime

In [3]:
set_cols = [
    "A/C Set Temperature__A-25",
    "A/C Set Temperature__A-26",
    "A/C Set Temperature__D-1南1",
    "A/C Set Temperature__D-2北1",
    "A/C Set Temperature__D-3南2",
    "A/C Set Temperature__D-4北2",
    "A/C Set Temperature__D-5南1",
    "A/C Set Temperature__D-6北1",
    "A/C Set Temperature__D-7南2",
    "A/C Set Temperature__D-8北2",
    "A/C Set Temperature__E-10南2",
    "A/C Set Temperature__E-11南3",
    "A/C Set Temperature__E-12南4",
    "A/C Set Temperature__E-13北1",
    "A/C Set Temperature__E-14北2",
    "A/C Set Temperature__E-15北3",
    "A/C Set Temperature__E-16北4",
    "A/C Set Temperature__E-17",
    "A/C Set Temperature__E-9南1",
    "A/C Set Temperature__F-18",
    "A/C Set Temperature__F-19",
    "A/C Set Temperature__F-20",
    "A/C Set Temperature__G-21",
    "A/C Set Temperature__G-22",
    "A/C Set Temperature__G-23",
    "A/C Set Temperature__G-24",
]
mode_cols = [
    "A/C Mode__A-25",
    "A/C Mode__A-26",
    "A/C Mode__D-1南1",
    "A/C Mode__D-2北1",
    "A/C Mode__D-3南2",
    "A/C Mode__D-4北2",
    "A/C Mode__D-5南1",
    "A/C Mode__D-6北1",
    "A/C Mode__D-7南2",
    "A/C Mode__D-8北2",
    "A/C Mode__E-10南2",
    "A/C Mode__E-11南3",
    "A/C Mode__E-12南4",
    "A/C Mode__E-13北1",
    "A/C Mode__E-14北2",
    "A/C Mode__E-15北3",
    "A/C Mode__E-16北4",
    "A/C Mode__E-17",
    "A/C Mode__E-9南1",
    "A/C Mode__F-18",
    "A/C Mode__F-19",
    "A/C Mode__F-20",
    "A/C Mode__G-21",
    "A/C Mode__G-22",
    "A/C Mode__G-23",
    "A/C Mode__G-24",
]
fan_cols = [
    "A/C Fan Speed__A-25",
    "A/C Fan Speed__A-26",
    "A/C Fan Speed__D-1南1",
    "A/C Fan Speed__D-2北1",
    "A/C Fan Speed__D-3南2",
    "A/C Fan Speed__D-4北2",
    "A/C Fan Speed__D-5南1",
    "A/C Fan Speed__D-6北1",
    "A/C Fan Speed__D-7南2",
    "A/C Fan Speed__D-8北2",
    "A/C Fan Speed__E-10南2",
    "A/C Fan Speed__E-11南3",
    "A/C Fan Speed__E-12南4",
    "A/C Fan Speed__E-13北1",
    "A/C Fan Speed__E-14北2",
    "A/C Fan Speed__E-15北3",
    "A/C Fan Speed__E-16北4",
    "A/C Fan Speed__E-17",
    "A/C Fan Speed__E-9南1",
    "A/C Fan Speed__F-18",
    "A/C Fan Speed__F-19",
    "A/C Fan Speed__F-20",
    "A/C Fan Speed__G-21",
    "A/C Fan Speed__G-22",
    "A/C Fan Speed__G-23",
    "A/C Fan Speed__G-24",
]
onoff_cols = [
    "A/C ON/OFF__A-25",
    "A/C ON/OFF__A-26",
    "A/C ON/OFF__D-1南1",
    "A/C ON/OFF__D-2北1",
    "A/C ON/OFF__D-3南2",
    "A/C ON/OFF__D-4北2",
    "A/C ON/OFF__D-5南1",
    "A/C ON/OFF__D-6北1",
    "A/C ON/OFF__D-7南2",
    "A/C ON/OFF__D-8北2",
    "A/C ON/OFF__E-10南2",
    "A/C ON/OFF__E-11南3",
    "A/C ON/OFF__E-12南4",
    "A/C ON/OFF__E-13北1",
    "A/C ON/OFF__E-14北2",
    "A/C ON/OFF__E-15北3",
    "A/C ON/OFF__E-16北4",
    "A/C ON/OFF__E-17",
    "A/C ON/OFF__E-9南1",
    "A/C ON/OFF__F-18",
    "A/C ON/OFF__F-19",
    "A/C ON/OFF__F-20",
    "A/C ON/OFF__G-21",
    "A/C ON/OFF__G-22",
    "A/C ON/OFF__G-23",
    "A/C ON/OFF__G-24",
]

In [4]:
from deep_reinforcement_learning.environment.control_env import AirControlEnv
from deep_reinforcement_learning.environment.prediction.model import load_residual_model
from deep_reinforcement_learning.const import (
    set_fan_range,
    set_mode_range,
    set_temp_range,
    set_on_off_range,
)

In [5]:
base_df = pd.read_csv("data/base/hourly_filled.csv")
base_df["Datetime_hour"] = pd.to_datetime(base_df["Datetime_hour"])
base_df = base_df.set_index("Datetime_hour", drop=True)


def split_df(df, term):
    _df = df.copy()
    historical_df = _df[_df.index < term]
    validate_df = _df[_df.index >= term]
    return historical_df, validate_df


# ===== 予測したい時刻 =====
t0 = pd.Timestamp("2025-09-10 07:00:00")
tend = pd.Timestamp("2025-09-11 07:00:00")
# ===== 学習/検証に分割（t0を境に“過去”と“将来”）=====
historical_df, validate_df = split_df(base_df, datetime(2025, 9, 10, 7))

In [6]:
model = load_residual_model("models/xgb_weight.joblib")
Env = AirControlEnv(
    model=model,
    base_df=historical_df,
    start_term=t0,
    end_term=tend,
    weather_forecast=validate_df,
)
obs, info = Env.reset()

In [7]:
Env.action_space

MultiDiscrete([23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2
 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2
 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2
 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2 23  3  5  2
 23  3  5  2 23  3  5  2])

In [8]:
obs.shape

(133,)

In [9]:
info

{'current_temp_index': array([ 0, 14,  4,  9,  8,  9, 10, 10, 10, 10,  6,  6,  6, 10, 10,  8,  8,
        16, 10, 14, 14, 14, 10, 14,  4, 14], dtype=int64),
 'time': Timestamp('2025-09-10 06:00:00')}

In [10]:
set_temp_list = set_temp_range
set_mode_list = set_mode_range
set_wind_list = set_fan_range
set_activate_list = set_on_off_range
n_devices = len(onoff_cols)

# ★ 観測の shape を“実際の環境に合わせて”設定（例：64次元ベクトル）
obs_shape = obs.shape
observation_space = gym.spaces.Box(
    low=-np.inf, high=np.inf, shape=obs_shape, dtype=np.float32
)

# ★ ダミー環境（必要な属性だけ持つ）
single_env = types.SimpleNamespace(
    observation_space=observation_space,
    action_space=Env.action_space,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1) policy を作成
policy = create_ppo_for_hvac(
    single_env=single_env,
    device=device,
    lr=3e-4,
    set_temp_list=set_temp_list,
    set_mode_list=set_mode_list,
    set_wind_list=set_wind_list,
    set_on_off_list=set_activate_list,  # ここに OFF を含める
    n_devices=n_devices,
    deterministic_eval=True,
)
obs_batched = np.asarray(obs, dtype=np.float32).reshape(1, -1)

# ② info も Batch 化（current_temp_index が (1, n_devices) ならそのままでOK）
info_batched = Batch(**info)

# ③ Batch を作って forward
batch = Batch(obs=obs_batched, info=info_batched)

policy.eval()
out = policy.forward(batch, deterministic=True)  # ここで ±1 マスクが効く
act = out.act  # 形は [1, 4*n_devices] または [4*n_devices]

# 3) 列名へマッピング
series = actions_to_frame(
    act,
    current_time=pd.Timestamp("2025-09-10 07:00:00"),
    set_temp_list=set_temp_list,
    set_mode_list=set_mode_list,
    set_wind_list=set_wind_list,
    set_on_off_list=["OFF", "ON"],
    n_devices=n_devices,
    set_cols=set_cols,
    mode_cols=mode_cols,
    fan_cols=fan_cols,
    onoff_cols=onoff_cols,
)

In [12]:
device

device(type='cuda')

In [11]:
series

Unnamed: 0_level_0,A/C Set Temperature__A-25,A/C Mode__A-25,A/C Fan Speed__A-25,A/C ON/OFF__A-25,A/C Set Temperature__A-26,A/C Mode__A-26,A/C Fan Speed__A-26,A/C ON/OFF__A-26,A/C Set Temperature__D-1南1,A/C Mode__D-1南1,...,A/C Fan Speed__G-22,A/C ON/OFF__G-22,A/C Set Temperature__G-23,A/C Mode__G-23,A/C Fan Speed__G-23,A/C ON/OFF__G-23,A/C Set Temperature__G-24,A/C Mode__G-24,A/C Fan Speed__G-24,A/C ON/OFF__G-24
Datetime_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-09-10 07:00:00,19.5,0,4,OFF,26,0,4,OFF,21.5,0,...,2,ON,21.5,0,4,OFF,26.5,2,0,ON
