In [1]:
import pandas as pd
from pathlib import Path
from datetime import datetime

In [2]:
from deep_reinforcement_learning.environment.prediction.transform_input_data import (
    build_features_residualized,
    build_targets_residualized,
    pick_cols,
    make_time_features,
    InputDataBuilderDP,
)
from deep_reinforcement_learning.environment.prediction.model import (
    load_residual_model,
    predict_full_period_with_residual_model,
)

In [3]:
from datetime import datetime

import pandas as pd
from prettytable import PrettyTable


class DataManager:
    @classmethod
    def show_status(self, df: pd.DataFrame):
        table = PrettyTable(
            ["Variable", "Type", "Missing Values", "Duplicates", "Outliers"]
        )

        for col in df.columns:
            # データ型の判定
            if pd.api.types.is_numeric_dtype(df[col]):
                col_type = "Numerical"
            else:
                col_type = "Categorical"

            # 欠損値
            missing_values = df[col].isnull().sum()

            # 重複数
            duplicates = df.duplicated(subset=[col]).sum()

            # 外れ値（数値型のみ）
            if col_type == "Numerical":
                mean = df[col].mean()
                std = df[col].std()
                outliers = ((df[col] - mean).abs() > 3 * std).sum()
            else:
                outliers = "N/A"

            table.add_row([col, col_type, missing_values, duplicates, outliers])
        print(table)

    @classmethod
    def extract_term_data(
        cls, target_col: str, df: pd.DataFrame, start_time: datetime, end_time: datetime
    ):
        df[target_col] = pd.to_datetime(df[target_col])
        filtered_df = df[(df[target_col] >= start_time) & (df[target_col] <= end_time)]
        return filtered_df

    @classmethod
    def check_date_sequence(cls, df: pd.DataFrame, date_col: str = "date"):
        dates = pd.to_datetime(df[date_col]).sort_values().reset_index(drop=True)
        expected = pd.date_range(start=dates.min(), end=dates.max(), freq="D")
        missing = expected.difference(dates)
        return {
            "is_continuous": len(missing) == 0,
            "missing_dates": missing.strftime("%Y-%m-%d").tolist(),
        }

In [4]:
import os

os.getcwd()

'/Users/toukouken/Documents/MENTERU/AIrux8_opti_logic'

In [5]:
base_df = pd.read_csv("data/base/hourly_filled.csv")
base_df["Datetime_hour"] = pd.to_datetime(base_df["Datetime_hour"])
base_df = base_df.set_index("Datetime_hour", drop=True)

In [6]:
def split_df(df, term):
    _df = df.copy()
    historical_df = _df[_df.index < term]
    validate_df = _df[_df.index >= term]
    return historical_df, validate_df

In [7]:
from datetime import datetime
import pandas as pd

# ===== 予測したい時刻 =====
t0 = pd.Timestamp("2025-09-10 07:00:00")

# ===== 学習/検証に分割（t0を境に“過去”と“将来”）=====
historical_df, validate_df = split_df(base_df, datetime(2025, 9, 10, 7))

# ===== DPビルダーは“過去のみ”でfit（重要：リーク防止）=====
builder = InputDataBuilderDP(
    days=7, lags_hours=(1,), include_weather_raw=True, include_original_controls=True
)
_ = builder.fit(historical_df)
builder.begin_online(historical_df)
# ===== その時刻の weather/controls を用意（なければフォールバック）=====
weather_cols = ["Outdoor Temp.", "Outdoor Humidity", "Solar Radiation"]

set_cols = [
    "A/C Set Temperature__A-25",
    "A/C Set Temperature__A-26",
    "A/C Set Temperature__D-1南1",
    "A/C Set Temperature__D-2北1",
    "A/C Set Temperature__D-3南2",
    "A/C Set Temperature__D-4北2",
    "A/C Set Temperature__D-5南1",
    "A/C Set Temperature__D-6北1",
    "A/C Set Temperature__D-7南2",
    "A/C Set Temperature__D-8北2",
    "A/C Set Temperature__E-10南2",
    "A/C Set Temperature__E-11南3",
    "A/C Set Temperature__E-12南4",
    "A/C Set Temperature__E-13北1",
    "A/C Set Temperature__E-14北2",
    "A/C Set Temperature__E-15北3",
    "A/C Set Temperature__E-16北4",
    "A/C Set Temperature__E-17",
    "A/C Set Temperature__E-9南1",
    "A/C Set Temperature__F-18",
    "A/C Set Temperature__F-19",
    "A/C Set Temperature__F-20",
    "A/C Set Temperature__G-21",
    "A/C Set Temperature__G-22",
    "A/C Set Temperature__G-23",
    "A/C Set Temperature__G-24",
]
mode_cols = [
    "A/C Mode__A-25",
    "A/C Mode__A-26",
    "A/C Mode__D-1南1",
    "A/C Mode__D-2北1",
    "A/C Mode__D-3南2",
    "A/C Mode__D-4北2",
    "A/C Mode__D-5南1",
    "A/C Mode__D-6北1",
    "A/C Mode__D-7南2",
    "A/C Mode__D-8北2",
    "A/C Mode__E-10南2",
    "A/C Mode__E-11南3",
    "A/C Mode__E-12南4",
    "A/C Mode__E-13北1",
    "A/C Mode__E-14北2",
    "A/C Mode__E-15北3",
    "A/C Mode__E-16北4",
    "A/C Mode__E-17",
    "A/C Mode__E-9南1",
    "A/C Mode__F-18",
    "A/C Mode__F-19",
    "A/C Mode__F-20",
    "A/C Mode__G-21",
    "A/C Mode__G-22",
    "A/C Mode__G-23",
    "A/C Mode__G-24",
]
fan_cols = [
    "A/C Fan Speed__A-25",
    "A/C Fan Speed__A-26",
    "A/C Fan Speed__D-1南1",
    "A/C Fan Speed__D-2北1",
    "A/C Fan Speed__D-3南2",
    "A/C Fan Speed__D-4北2",
    "A/C Fan Speed__D-5南1",
    "A/C Fan Speed__D-6北1",
    "A/C Fan Speed__D-7南2",
    "A/C Fan Speed__D-8北2",
    "A/C Fan Speed__E-10南2",
    "A/C Fan Speed__E-11南3",
    "A/C Fan Speed__E-12南4",
    "A/C Fan Speed__E-13北1",
    "A/C Fan Speed__E-14北2",
    "A/C Fan Speed__E-15北3",
    "A/C Fan Speed__E-16北4",
    "A/C Fan Speed__E-17",
    "A/C Fan Speed__E-9南1",
    "A/C Fan Speed__F-18",
    "A/C Fan Speed__F-19",
    "A/C Fan Speed__F-20",
    "A/C Fan Speed__G-21",
    "A/C Fan Speed__G-22",
    "A/C Fan Speed__G-23",
    "A/C Fan Speed__G-24",
]
onoff_cols = [
    "A/C ON/OFF__A-25",
    "A/C ON/OFF__A-26",
    "A/C ON/OFF__D-1南1",
    "A/C ON/OFF__D-2北1",
    "A/C ON/OFF__D-3南2",
    "A/C ON/OFF__D-4北2",
    "A/C ON/OFF__D-5南1",
    "A/C ON/OFF__D-6北1",
    "A/C ON/OFF__D-7南2",
    "A/C ON/OFF__D-8北2",
    "A/C ON/OFF__E-10南2",
    "A/C ON/OFF__E-11南3",
    "A/C ON/OFF__E-12南4",
    "A/C ON/OFF__E-13北1",
    "A/C ON/OFF__E-14北2",
    "A/C ON/OFF__E-15北3",
    "A/C ON/OFF__E-16北4",
    "A/C ON/OFF__E-17",
    "A/C ON/OFF__E-9南1",
    "A/C ON/OFF__F-18",
    "A/C ON/OFF__F-19",
    "A/C ON/OFF__F-20",
    "A/C ON/OFF__G-21",
    "A/C ON/OFF__G-22",
    "A/C ON/OFF__G-23",
    "A/C ON/OFF__G-24",
]
all_control_cols = set_cols + mode_cols + fan_cols + onoff_cols


# t0の行を最優先に取得。無ければ「直近過去で前方埋め」フォールバック。
def _row_at_t0_or_ffill(df: pd.DataFrame, cols, t0):
    """
    cols に 'Indoor Temp.__A-25' / 'indoor_temp__A-25' のどちらが渡されても、
    df 側のカラムがどちらか一方だけでもあれば同じものとして扱う。
    戻り値のカラム名は、cols 側の名前に揃えて返す。
    """
    # 要求カラム名 → 実際に df に存在するカラム名 のマップを作る
    col_map: dict[str, str] = {}
    for c in cols:
        candidates = [c]
        if c.startswith("Indoor Temp.__"):
            candidates.append(c.replace("Indoor Temp.__", "indoor_temp__"))
        if c.startswith("indoor_temp__"):
            candidates.append(c.replace("indoor_temp__", "Indoor Temp.__"))

        found = None
        for cand in candidates:
            if cand in df.columns:
                found = cand
                break
        if found is not None:
            col_map[c] = found

    # マッチする列が1つもなければ空DataFrameを返す
    if not col_map:
        return pd.DataFrame(index=pd.DatetimeIndex([t0]))

    use_cols = list(col_map.values())

    # t0 があればその行、なければ直近過去1行
    if t0 in df.index:
        out = df.loc[[t0], use_cols]
    else:
        out = df.loc[:t0, use_cols].tail(1)

    # df側のカラム名 → 呼び出し元が要求したカラム名 に戻す
    out = out.rename(columns={v: k for k, v in col_map.items()})

    # 要求された順に並べる（存在したものだけ）
    out = out.reindex(columns=list(col_map.keys()))

    return out


# 入力3点セット（1行だけ）
time_info = pd.DataFrame(index=pd.DatetimeIndex([t0]))
weather_df = _row_at_t0_or_ffill(validate_df, weather_cols, t0).copy()
control_df = _row_at_t0_or_ffill(validate_df, all_control_cols, t0).copy()
# ===== Xを1行だけ生成（DP内部でBL/ラグは historical_df に基づく）=====
X = builder.make_input_next(t0, weather_df, control_df)

# ===== モデル読込＆列順を学習時に強制整列 =====
model = load_residual_model("models/xgb_weight.joblib")

# ===== 予測（残差→元スケールへ加算）=====
res_all = predict_full_period_with_residual_model(
    model=model,
    X_full=X,
    model_target_names=list(model.y_cols),
    wanted_target_cols=list(model.y_cols),  # 全ターゲット返す。必要なら部分列に変更
    bl_prefix="bl__",
    add_back_baseline=True,
)
y_7 = res_all["y_pred"]

In [18]:
Real = _row_at_t0_or_ffill(validate_df, y_7.columns, t0).copy()

In [19]:
Real.columns

Index(['total_kwh__41-1', 'total_kwh__43-1', 'total_kwh__43-2',
       'total_kwh__43-3', 'total_kwh__43-4', 'total_kwh__44-1',
       'total_kwh__44-2', 'total_kwh__44-3', 'total_kwh__44-4',
       'total_kwh__44-5', 'total_kwh__44-6', 'total_kwh__44-7',
       'total_kwh__44-8', 'total_kwh__49-1', 'total_kwh__49-2',
       'total_kwh__49-3', 'total_kwh__49-4', 'total_kwh__49-6',
       'total_kwh__49-7', 'total_kwh__49-8', 'total_kwh__49-9',
       'indoor_temp__A-25', 'indoor_temp__A-26', 'indoor_temp__D-1南1',
       'indoor_temp__D-2北1', 'indoor_temp__D-3南2', 'indoor_temp__D-4北2',
       'indoor_temp__D-5南1', 'indoor_temp__D-6北1', 'indoor_temp__D-7南2',
       'indoor_temp__D-8北2', 'indoor_temp__E-10南2', 'indoor_temp__E-11南3',
       'indoor_temp__E-12南4', 'indoor_temp__E-13北1', 'indoor_temp__E-14北2',
       'indoor_temp__E-15北3', 'indoor_temp__E-16北4', 'indoor_temp__E-17',
       'indoor_temp__E-9南1', 'indoor_temp__F-18', 'indoor_temp__F-19',
       'indoor_temp__F-20', 'indoor_tem

In [21]:
indoor_cols = pick_cols(Real, "indoor")
Real[indoor_cols]

Unnamed: 0_level_0,indoor_temp__A-25,indoor_temp__A-26,indoor_temp__D-1南1,indoor_temp__D-2北1,indoor_temp__D-3南2,indoor_temp__D-4北2,indoor_temp__D-5南1,indoor_temp__D-6北1,indoor_temp__D-7南2,indoor_temp__D-8北2,...,indoor_temp__E-16北4,indoor_temp__E-17,indoor_temp__E-9南1,indoor_temp__F-18,indoor_temp__F-19,indoor_temp__F-20,indoor_temp__G-21,indoor_temp__G-22,indoor_temp__G-23,indoor_temp__G-24
Datetime_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-09-10 07:00:00,25.916667,27.0,42.25,31.166667,29.916667,28.916667,26.833333,28.666667,26.083333,29.166667,...,25.0,27.0,25.75,26.166667,29.583333,32.0,28.5,30.0,41.333333,39.5


In [12]:
y_7

Unnamed: 0,total_kwh__41-1,total_kwh__43-1,total_kwh__43-2,total_kwh__43-3,total_kwh__43-4,total_kwh__44-1,total_kwh__44-2,total_kwh__44-3,total_kwh__44-4,total_kwh__44-5,...,indoor_temp__E-16北4,indoor_temp__E-17,indoor_temp__E-9南1,indoor_temp__F-18,indoor_temp__F-19,indoor_temp__F-20,indoor_temp__G-21,indoor_temp__G-22,indoor_temp__G-23,indoor_temp__G-24
2025-09-10 07:00:00,78443.648438,52070.695312,38895.34375,1183.956543,39375.148438,23930.113281,11187.183594,3641.900879,2379.361328,23103.982422,...,25.427946,26.833538,26.216682,25.58349,29.526522,31.909819,28.853558,29.912533,42.044949,41.28896


In [None]:
builder.accumulate_actuals(y_7)

In [24]:
t1 = pd.Timestamp("2025-09-10 08:00:00")
time_info = pd.DataFrame(index=pd.DatetimeIndex([t1]))
weather_df = _row_at_t0_or_ffill(validate_df, weather_cols, t1).copy()
control_df = _row_at_t0_or_ffill(validate_df, all_control_cols, t1).copy()
# ===== Xを1行だけ生成（DP内部でBL/ラグは historical_df に基づく）=====
X = builder.make_input_next(t1, weather_df, control_df)

In [25]:
Duration_cols = pick_cols(X, "lag1h__indoor")
X[Duration_cols]

Unnamed: 0,lag1h__indoor_temp__A-25,lag1h__indoor_temp__A-26,lag1h__indoor_temp__D-1南1,lag1h__indoor_temp__D-2北1,lag1h__indoor_temp__D-3南2,lag1h__indoor_temp__D-4北2,lag1h__indoor_temp__D-5南1,lag1h__indoor_temp__D-6北1,lag1h__indoor_temp__D-7南2,lag1h__indoor_temp__D-8北2,...,lag1h__indoor_temp__E-16北4,lag1h__indoor_temp__E-17,lag1h__indoor_temp__E-9南1,lag1h__indoor_temp__F-18,lag1h__indoor_temp__F-19,lag1h__indoor_temp__F-20,lag1h__indoor_temp__G-21,lag1h__indoor_temp__G-22,lag1h__indoor_temp__G-23,lag1h__indoor_temp__G-24
2025-09-10 08:00:00,27.75,27.0,44.916667,28.75,29.833333,28.916667,27.333333,29.25,27.5,29.833333,...,25.333333,26.916667,26.0,26.0,28.0,32.0,27.727273,29.727273,44.272727,41.454545


In [10]:
DataManager.show_status(X)

+------------------------------+-----------+----------------+------------+----------+
|           Variable           |    Type   | Missing Values | Duplicates | Outliers |
+------------------------------+-----------+----------------+------------+----------+
|             hour             | Numerical |       0        |     0      |    0     |
|            month             | Numerical |       0        |     0      |    0     |
|           weekday            | Numerical |       0        |     0      |    0     |
|          is_weekend          | Numerical |       0        |     0      |    0     |
|   lag1h__indoor_temp__A-25   | Numerical |       0        |     0      |    0     |
|   lag1h__indoor_temp__A-26   | Numerical |       0        |     0      |    0     |
|  lag1h__indoor_temp__D-1南1  | Numerical |       0        |     0      |    0     |
|  lag1h__indoor_temp__D-2北1  | Numerical |       0        |     0      |    0     |
|  lag1h__indoor_temp__D-3南2  | Numerical |       0     

In [11]:
weather_df

Unnamed: 0_level_0,Outdoor Temp.,Outdoor Humidity,Solar Radiation
Datetime_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-09-10 07:00:00,28.5,80.0,82.4


In [12]:
time_info

2025-09-10 07:00:00


In [13]:
y_7["total_kwh__49-9"]

2025-09-10 07:00:00    3969.956787
Name: total_kwh__49-9, dtype: float32

In [14]:
historical_df

Unnamed: 0_level_0,A/C Set Temperature__A-25,A/C Set Temperature__A-26,A/C Set Temperature__D-1南1,A/C Set Temperature__D-2北1,A/C Set Temperature__D-3南2,A/C Set Temperature__D-4北2,A/C Set Temperature__D-5南1,A/C Set Temperature__D-6北1,A/C Set Temperature__D-7南2,A/C Set Temperature__D-8北2,...,total_kwh__49-3,total_kwh__49-4,total_kwh__49-6,total_kwh__49-7,total_kwh__49-8,total_kwh__49-9,datetime,Outdoor Temp.,Outdoor Humidity,Solar Radiation
Datetime_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-06-30 15:00:00,24.0,25.0,27.0,27.000000,27.0,27.000000,27.0,27.0,27.0,27.0,...,325.0,420.0,520.0,695.0,610.0,770.0,2024-06-30 15:00:00,27.3,77.88,420.7
2024-06-30 16:00:00,24.0,25.0,27.0,27.000000,27.0,27.000000,27.0,27.0,27.0,27.0,...,260.0,240.0,455.0,700.0,575.0,1050.0,2024-06-30 16:00:00,27.1,79.76,188.7
2024-06-30 17:00:00,24.0,25.0,27.0,27.000000,27.0,27.000000,27.0,27.0,27.0,27.0,...,65.0,185.0,455.0,420.0,380.0,1820.0,2024-06-30 17:00:00,26.5,81.63,109.5
2024-06-30 18:00:00,24.0,25.0,27.0,27.000000,27.0,27.000000,27.0,27.0,27.0,27.0,...,325.0,480.0,130.0,400.0,605.0,1260.0,2024-06-30 18:00:00,26.5,81.14,34.6
2024-06-30 19:00:00,24.0,25.0,27.0,27.000000,27.0,27.000000,27.0,27.0,27.0,27.0,...,390.0,360.0,260.0,485.0,480.0,1190.0,2024-06-30 19:00:00,25.2,91.41,8.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-09-10 02:00:00,18.0,26.0,21.0,23.000000,23.0,23.750000,24.0,24.0,24.0,24.0,...,360.0,14780.0,20570.0,9805.0,10340.0,5780.0,2025-09-10 02:00:00,27.5,85.63,0.0
2025-09-10 03:00:00,18.0,26.0,21.0,23.250000,23.0,23.333333,24.0,24.0,24.0,24.0,...,420.0,26715.0,9590.0,17530.0,8935.0,7690.0,2025-09-10 03:00:00,27.5,85.32,0.0
2025-09-10 04:00:00,18.0,26.0,21.0,24.000000,23.0,24.000000,24.0,24.0,24.0,24.0,...,240.0,21940.0,24670.0,15465.0,9940.0,8230.0,2025-09-10 04:00:00,27.5,85.62,0.0
2025-09-10 05:00:00,18.0,26.0,21.0,23.583333,23.0,23.583333,24.0,24.0,24.0,24.0,...,12385.0,18620.0,29820.0,12025.0,14725.0,6505.0,2025-09-10 05:00:00,27.4,85.97,0.0


In [15]:
time_info

2025-09-10 07:00:00
