In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

import numpy as np
import random
from math import sqrt
import pandas as pd
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.nn.utils.rnn import pad_sequence

## データ準備

In [2]:
# ====== ユーザ設定 ======

excel_path = r"C:\Users\ryoya\MasterThesis\MT_Furuie\data\Miwa_LSTM_Data\CrossVal_4\Miwa_hourlyAve_for_LSTM_CrossVal_4.xlsx"
flood_idx_path_train = r"C:\Users\ryoya\MasterThesis\MT_Furuie\data\Miwa_LSTM_Data\CrossVal_4\Miwa_flood_idx_CrossVal_4_train.xlsx"

# 列番号（0始まり）で指定
enc_cols   = [5, 3, 4]      # エンコーダ入力の列番号（例：3変数）
dec_cols   = [5, 3]  # デコーダ入力の列番号（例：2変数）
y_cols      = [4]          # 出力（目的変数）の列番号（例：1変数）

Te = 72    # エンコーダのタイムステップ長
Td = 24    # デコーダのタイムステップ長


# 洪水区間（1始まり行番号で指定してOK。Python内部で0始まりに直す）
df_ranges_train = pd.read_excel(flood_idx_path_train, header=0)
flood_ranges_train_1based = [tuple(x) for x in df_ranges_train.to_numpy()]

# 0-basedに変換（pandasは0-based）
flood_ranges_train = [(s-1, e-1) for (s, e) in flood_ranges_train_1based]



# ====== 読み込み ======
df = pd.read_excel(excel_path, header=0)

# 必要列だけ抽出（順番固定）
use_cols = enc_cols + dec_cols + y_cols
data = df.iloc[:, use_cols].copy()


In [3]:
# ====== 標準化：train dataから平均・標準偏差を算出 ======

flood_data_train_parts = [data.iloc[s:e+1, :] for (s, e) in flood_ranges_train]
flood_data_train = pd.concat(flood_data_train_parts, axis=0)

mean = flood_data_train.mean(numeric_only=True)
std  = flood_data_train.std(numeric_only=True).replace(0, 1.0)
data_norm = (data - mean) / std # 全期間のデータを標準化

print('---平均（train）----')
print(mean)

print('----標準偏差（train）----')
print(std)

---平均（train）----
CumRain_24h     13.542308
Qin(m3/s)       41.130710
Tur(ppm)       386.520820
CumRain_24h     13.542308
Qin(m3/s)       41.130710
Tur(ppm)       386.520820
dtype: float64
----標準偏差（train）----
CumRain_24h     25.957971
Qin(m3/s)       40.794214
Tur(ppm)       935.757710
CumRain_24h     25.957971
Qin(m3/s)       40.794214
Tur(ppm)       935.757710
dtype: float64


## 各出水ごとにDataSetを作成

In [4]:
class FloodSeq2SeqDataset(Dataset):
    def __init__(self, name, triplets):
        self.name = name
        self.triplets = triplets  # list of (enc_X, dec_X, y)

    def __len__(self):
        return len(self.triplets)

    def __getitem__(self, idx):
        return self.triplets[idx]

def collate_fn(batch):
    enc_seqs, dec_seqs, ys = zip(*batch)
    # ここでは全サンプル同一長さ前提（Te/Td固定）なので単純stack
    enc_x = torch.stack(enc_seqs, dim=0)  # [B, Te, Fe]
    dec_x = torch.stack(dec_seqs, dim=0)  # [B, Td, Fd]
    y     = torch.stack(ys,       dim=0)  # [B, Td, Fo]
    
    # マスク（将来可変長のときに使う）
    B, Td, _ = y.shape
    mask = torch.ones(B, Td, 1, dtype=torch.float32)
    return enc_x, dec_x, y, mask
    

In [5]:
datasets = []
idx = 0 # インデックスの初期化
Fe, Fd, Fo = len(enc_cols), len(dec_cols), len(y_cols)

for (s, e) in flood_ranges_train:
    seg = data_norm.iloc[s:e+1]  # 区間データ（両端含む）
    n = len(seg)

    samples_train = []
    
    if n < Te + Td: # 1サンプルは Te + Tdの長さが必要
        continue
        
    for start in range(0, n - (Te + Td) + 1):
        enc_window = seg.iloc[start : start + Te]
        dec_window = seg.iloc[start + Te : start + Te + Td]
        # テンソル化
        enc_X = torch.tensor(enc_window.iloc[:, 0:Fe].to_numpy(dtype=np.float32))     # [Te, Fe]
        dec_X = torch.tensor(dec_window.iloc[:, Fe:Fe+Fd].to_numpy(dtype=np.float32))     # [Td, Fd]
        y     = torch.tensor(dec_window.iloc[:, Fe+Fd:Fe+Fd+Fo].to_numpy(dtype=np.float32))      # [Td, Fo]
        samples_train.append((enc_X, dec_X, y))

    idx += 1
    datasets.append(FloodSeq2SeqDataset(f"Dataset_{idx}", samples_train))


## 損失関数

In [6]:
def split_batch(batch):
    """collate_fnの戻り値が (enc,dec,y) か (enc,dec,y,mask) のどちらでも対応"""
    if len(batch) == 3:
        enc_x, dec_x, y = batch
        mask = torch.ones_like(y[..., :1])  # [B,Td,1]
    elif len(batch) == 4:
        enc_x, dec_x, y, mask = batch
    else:
        raise ValueError("Unexpected batch format")
    return enc_x.to(device), dec_x.to(device), y.to(device), mask.to(device)

def masked_mse(pred, target, mask):
    # pred/target: [B,T,Out], mask: [B,T,1] (1=valid, 0=pad)
    diff2 = (pred - target) ** 2
    diff2 = diff2 * mask
    denom = mask.sum(dim=(1,2)).clamp_min(1.0)  # per-sample
    per_sample = diff2.sum(dim=(1,2)) / denom
    return per_sample.mean()

def masked_rmse(pred, target, mask):
    return torch.sqrt(masked_mse(pred, target, mask))

def masked_r2(pred, target, mask):
    # R² = 1 - SSE/SST, マスク版
    mean = (target * mask).sum(dim=(1,2), keepdim=True) / mask.sum(dim=(1,2), keepdim=True).clamp_min(1.0)
    sse = ((pred - target) ** 2 * mask).sum(dim=(1,2))
    sst = ((target - mean) ** 2 * mask).sum(dim=(1,2)).clamp_min(1e-12)
    r2  = 1.0 - sse / sst
    return r2.mean()

def masked_corr(pred, target, mask):
    # pred/target: [B,T,Out], mask: [B,T,1]
    pred = pred * mask
    target = target * mask
    valid = mask.sum(dim=(1,2)).clamp_min(1.0)

    # 平均
    mean_pred = pred.sum(dim=(1,2)) / valid
    mean_target = target.sum(dim=(1,2)) / valid

    # 偏差
    diff_pred = (pred - mean_pred.view(-1,1,1)) * mask
    diff_target = (target - mean_target.view(-1,1,1)) * mask

    # 共分散と分散
    cov = (diff_pred * diff_target).sum(dim=(1,2)) / valid
    var_pred = (diff_pred**2).sum(dim=(1,2)) / valid
    var_target = (diff_target**2).sum(dim=(1,2)) / valid

    corr = cov / (torch.sqrt(var_pred * var_target) + 1e-12)
    return corr.mean()

## モデル定義（Seq2SeqLSTM）

In [7]:
class StateBridge(nn.Module):
    """
    Encoderの(h, c)をDecoder初期状態へ写像するブリッジ。
    - 層数/隠れ次元が異なってもOK
    - bridge_mode:
        - "zero_pad":  層合わせ=0埋め or 切り落とし、隠れ次元は線形射影
        - "repeat_top":層合わせ=最上層の繰り返し/切り落とし、隠れ次元は線形射影
        - "linear_stack": [B, L_enc, H_enc] -> 線形で [B, L_dec, H_dec] へ（層方向も学習で混合）

    - enc_layers: エンコーダの層の深さ
    - dec_layers: デコーダの層の深さ
    - enc_hidden: エンコーダのノード数
    - dec_hidden: デコーダのノード数
    """
    def __init__(self, enc_layers, dec_layers, enc_hidden, dec_hidden, mode="zero_pad"):
        super().__init__()
        self.enc_layers = enc_layers
        self.dec_layers = dec_layers
        self.enc_hidden = enc_hidden
        self.dec_hidden = dec_hidden
        self.mode = mode

        # 隠れ次元の変換（h/c共用）
        if mode in ("zero_pad", "repeat_top"):
            self.proj = nn.Linear(enc_hidden, dec_hidden, bias=True)
        elif mode == "linear_stack":
            # 層方向もまとめて線形変換
            self.proj_h = nn.Linear(enc_layers * enc_hidden, dec_layers * dec_hidden, bias=True)
            self.proj_c = nn.Linear(enc_layers * enc_hidden, dec_layers * dec_hidden, bias=True)
        else:
            raise ValueError(f"Unknown bridge mode: {mode}")

    def _match_layers(self, x, how="zero_pad"):
        """
        x: [L_enc, B, H_enc] を層数だけ合わせる（隠れ次元は未変換）
        return: [L_dec, B, H_enc]

        B: バッチサイズ
        """
        L_enc, B, H = x.shape
        L_dec = self.dec_layers

        if L_dec == L_enc:
            return x

        if L_dec < L_enc:
            # 上位層を優先して切り落とす（直観的には最上層が一番抽象的）
            return x[:L_dec, :, :]

        # L_dec > L_enc の場合
        pad_count = L_dec - L_enc
        if how == "repeat_top":
            top = x[-1:, :, :].expand(pad_count, B, H)  # 最上層を複製
            return torch.cat([x, top], dim=0)
        else:  # zero_pad
            pad = x.new_zeros(pad_count, B, H)
            return torch.cat([x, pad], dim=0)

    def forward(self, h_enc, c_enc):
        """
        h_enc, c_enc: [L_enc, B, H_enc]
        返り値: (h0_dec, c0_dec) それぞれ [L_dec, B, H_dec]
        """
        if self.mode in ("zero_pad", "repeat_top"):
            # 層合わせ（まだ enc_hidden 次元のまま）
            h = self._match_layers(h_enc, "repeat_top" if self.mode=="repeat_top" else "zero_pad")
            c = self._match_layers(c_enc, "repeat_top" if self.mode=="repeat_top" else "zero_pad")
            # 次元射影
            L, B, H = h.shape
            h = self.proj(h)  # broadcasting: [L,B,H_enc]->[L,B,H_dec]
            c = self.proj(c)
            return h, c

        else:  # linear_stack
            # [L_enc,B,H_enc] -> [B, L_enc*H_enc]
            L_enc, B, H_enc = h_enc.shape
            flat_h = h_enc.transpose(0,1).reshape(B, L_enc*H_enc)
            flat_c = c_enc.transpose(0,1).reshape(B, L_enc*H_enc)
            # 線形写像
            out_h = self.proj_h(flat_h)  # [B, L_dec*H_dec]
            out_c = self.proj_c(flat_c)
            # [L_dec,B,H_dec] に戻す
            L_dec, H_dec = self.dec_layers, self.dec_hidden
            h = out_h.view(B, L_dec, H_dec).transpose(0,1).contiguous()
            c = out_c.view(B, L_dec, H_dec).transpose(0,1).contiguous()
            return h, c

In [8]:
class Seq2SeqLSTM(nn.Module):
    def __init__(
        self,
        in_enc: int, # エンコーダ入力変数の数
        in_dec: int, # デコーダ入力変数の数
        out_dim: int, # 出力変数の数
        enc_hidden: int = 128,
        dec_hidden: int = 128,
        enc_layers: int = 2,
        dec_layers: int = 3,
        bridge_mode: str = "zero_pad",  # "zero_pad" | "repeat_top" | "linear_stack"
        dropout: float = 0.0, # LSTMの層間ドロップアウト率
        bidirectional_enc: bool = False,  # エンコーダのみ双方向にするかどうか（必要ならEncoderを双方向にも）
        head_activation="relu", # 活性化関数
        use_conv: bool = True,
        conv_kernel: int = 3,           # 時間方向のカーネル幅（奇数推奨）
        conv_channels: int = None,      # 省略時は dec_hidden を維持
    ):
        super().__init__()
        self.bidirectional_enc = bidirectional_enc
        enc_dir = 2 if bidirectional_enc else 1
        enc_hidden_eff = enc_hidden * enc_dir  # 双方向なら出力次元が倍

        self.use_conv = bool(use_conv)

        self.enc = nn.LSTM(
            input_size=in_enc,
            hidden_size=enc_hidden,
            num_layers=enc_layers,
            batch_first=True,
            dropout=dropout if enc_layers > 1 else 0.0,
            bidirectional=bidirectional_enc
        )

        self.dec = nn.LSTM(
            input_size=in_dec,
            hidden_size=dec_hidden,
            num_layers=dec_layers,
            batch_first=True,
            dropout=dropout if dec_layers > 1 else 0.0
        )

        # Encoderが双方向のときは、(h_fwd, h_bwd) を結合した次元 enc_hidden_eff を
        # Decoder hidden 次元へ写像する必要がある
        self.bridge = StateBridge(
            enc_layers=enc_layers * enc_dir,
            dec_layers=dec_layers,
            enc_hidden=enc_hidden,
            dec_hidden=dec_hidden,
            mode=bridge_mode
        ) if (enc_layers != dec_layers or enc_dir != 1 or enc_hidden != dec_hidden or bridge_mode=="linear_stack") else None

        
        # ---------- 中間層（1次元畳み込み） ----------
        if conv_channels is None:
            conv_channels = dec_hidden
        self.conv_channels = conv_channels
        if self.use_conv:
            # Conv1dは [B, C=特徴, T=時間] を受け取るので後で転置する
            padding = conv_kernel // 2  # SAME相当（奇数カーネル推奨）
            self.conv1d = nn.Conv1d(
                in_channels=dec_hidden,
                out_channels=conv_channels,
                kernel_size=conv_kernel,
                padding=padding
            )
            self.conv_act = {
                "identity": nn.Identity(),
                "relu": nn.ReLU(),
                "tanh": nn.Tanh(),
                "sigmoid": nn.Sigmoid(),
            }.get(head_activation, nn.ReLU())
            self.conv_dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

            # 出力ヘッドは conv_channels → out_dim
            self.head = nn.Linear(conv_channels, out_dim)
            # ここでの self.act はヘッド直前では使わない（Conv後に適用済み）
            self.act = nn.Identity()
        else:
            # 畳み込みを使わない場合は dec_hidden → out_dim
            self.head = nn.Linear(dec_hidden, out_dim)
            self.act = {
                "identity": nn.Identity(),
                "relu": nn.ReLU(),
                "tanh": nn.Tanh(),
                "sigmoid": nn.Sigmoid(),
            }[head_activation]

    def _extract_final_states(self, out, hc):
        """
        LSTMの出力から (h_T, c_T) を取り出して成形。
        双方向Encoderの場合は各層ごとに [fwd, bwd] を層方向に並べる。
        """
        h, c = hc  # [num_layers * num_directions, B, H]
        return h, c

    def forward(self, enc_x, dec_x):
        """
        enc_x: [B, Te, in_enc]
        dec_x: [B, Td, in_dec]
        return: yhat [B, Td, out_dim]
        """
        # ----- Encoder -----
        _, (h_T, c_T) = self.enc(enc_x)  # h_T,c_T: [L_enc * dir, B, H_enc]

        # ----- Bridge -----
        if self.bridge is not None:
            h0_dec, c0_dec = self.bridge(h_T, c_T)  # [L_dec, B, H_dec]
        else:
            h0_dec, c0_dec = h_T, c_T

        # ----- Decoder -----
        dec_out, _ = self.dec(dec_x, (h0_dec, c0_dec))  # [B, Td, H_dec]

        if self.use_conv:
            # 時間方向のConv1d: [B, H_dec, Td] -> Conv -> [B, C, Td] -> [B, Td, C]
            x = dec_out.transpose(1, 2)             # [B, H_dec, Td]
            x = self.conv1d(x)                      # [B, conv_channels, Td]
            x = self.conv_act(x)
            x = self.conv_dropout(x)
            x = x.transpose(1, 2)                   # [B, Td, conv_channels]
            yhat = self.head(x)                     # [B, Td, out_dim]
        else:
            # 従来パス：活性化→線形
            yhat = self.head(self.act(dec_out))     # [B, Td, out_dim]

        return yhat
        

In [9]:
# 重み初期化用の関数

def init_weights(m):
    if isinstance(m, nn.Linear):
        # 例: Xavier（正規分布）
        # init.xavier_normal_(m.weight)
        # ReLU系のLinearならHeにしたい場合:
        init.kaiming_normal_(m.weight, nonlinearity="relu")
        if m.bias is not None:
            init.zeros_(m.bias)

    elif isinstance(m, nn.Conv1d):
        # Conv1d も Linear と同様に初期化可能
        # 例: Xavier（正規分布）
        # init.xavier_normal_(m.weight)
        # ReLU系のLinearならHeにしたい場合:
        init.kaiming_normal_(m.weight, nonlinearity="relu")
        if m.bias is not None:
            init.zeros_(m.bias)

    elif isinstance(m, nn.LSTM):
        # LSTM の場合は named_parameters() で内部ゲートを個別に初期化
        for name, param in m.named_parameters():
            if "weight_ih" in name:
                # 入力→隠れ：Xavier が安定
                with torch.no_grad():
                    init.xavier_uniform_(param)
            elif "weight_hh" in name:
                # 隠れ→隠れ：Orthogonal が定番
                with torch.no_grad():
                    init.orthogonal_(param)
            elif "bias" in name:
                with torch.no_grad():
                    init.zeros_(param)
                    # 必要なら忘却ゲートバイアスを +1 初期化することも可能
                    # hidden_size = param.shape[0] // 4
                    # param[hidden_size:2*hidden_size] = 1.0



## ハイパーパラメータの設定
### ※保存Excelファイルのパスも指定

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cfg = {
    # Model
    "in_enc": 3,
    "in_dec": 2,
    "out_dim": 1,
    "enc_hidden": 16, # 【要変更】
    "dec_hidden": 16, # 【要変更】
    "enc_layers": 2, # 【要変更】
    "dec_layers": 2, # 【要変更】
    "bridge_mode": "zero_pad",   # "zero_pad" | "repeat_top" | "linear_stack"
    "batch_size": 512, # 【要変更】
    "dropout": 0.1,
    "bidirectional_enc": False,
    "head_activation": "relu", # "identity", "relu", "tanh", "sigmoid" など 【要変更】
    # Train
    "epochs": 100,
    "lr": 1e-2,
    "weight_decay": 0.0, # L2正規化係数。0なら無効
    "grad_clip": 1.0, # 勾配クリッピングの閾値。０かNoneなら無効
    "print_every": 1, # 学習の進捗を何エポックごとに出力するか
    "patience": 10, # early stopping
    "use_conv": True # 畳み込み層を使うかどうか【要変更】
}

save_path = r"C:\Users\ryoya\MasterThesis\MT_Furuie\results\Miwa_LSTM\CrossVal_4/CrossVal_4_result_3_6_2.xlsx" # 【要変更】


## 学習関数・評価関数の定義

In [11]:
# 学習関数・評価関数の定義

def train_one_epoch(model, loader, optimizer):
    model.train()
    total_loss = 0.0
    n = 0
    for batch in loader:
        enc_x, dec_x, y, mask = split_batch(batch)
        yhat = model(enc_x, dec_x)
        loss = masked_mse(yhat, y, mask)

        optimizer.zero_grad()
        loss.backward()
        if cfg["grad_clip"]:
            nn.utils.clip_grad_norm_(model.parameters(), cfg["grad_clip"])
        optimizer.step()

        bs = enc_x.size(0) # バッチサイズ
        total_loss += loss.item() * bs
        n += bs
    return total_loss / max(n,1)

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    total_loss = 0.0; total_rmse = 0.0; total_r2 = 0.0; total_corr = 0.0; n = 0
    for batch in loader:
        enc_x, dec_x, y, mask = split_batch(batch)
        yhat = model(enc_x, dec_x)

        loss = masked_mse(yhat, y, mask)
        rmse = masked_rmse(yhat, y, mask)
        r2   = masked_r2(yhat, y, mask)
        corr = masked_corr(yhat, y, mask)

        bs = enc_x.size(0)
        total_loss += loss.item() * bs
        total_rmse += rmse.item() * bs
        total_r2   += r2.item() * bs
        total_corr += corr.item() * bs
        n += bs

    return {
        "loss": total_loss / max(n,1),
        "rmse": total_rmse / max(n,1),
        "r2":   total_r2   / max(n,1),
        "corr": total_corr / max(n,1),
    }

In [12]:
def inverse_standardize_y_by_index(y_std, mean, std, y_pos):
    """
    y_std:  標準化スケールの出力テンソル [B, T, Fo]
    mean, std: pandas.Series（学習時にfitしたもの。index=列名）
    y_pos: 出力列の「列番号（位置）」リスト（例: [Fe+Fd, Fe+Fd+1, ...]）
    return: 元スケールの y [B, T, Fo]
    """
    m = torch.tensor(mean.iloc[y_pos].to_numpy(dtype=float),
                     dtype=y_std.dtype, device=y_std.device).view(1, 1, -1)
    s = torch.tensor(std.iloc[y_pos].to_numpy(dtype=float),
                     dtype=y_std.dtype, device=y_std.device).view(1, 1, -1)
    return y_std * s + m


def masked_corr(pred, target, mask, eps: float = 1e-12):
    """
    マスク付きピアソン相関係数（バッチ平均）
    pred/target: [B, T, Fo], mask: [B, T, 1]（1=有効, 0=無効）
    返り値: スカラー（バッチ平均の相関）
    """
    # 有効点数（サンプルごと）
    valid = mask.sum(dim=(1, 2)).clamp_min(1.0)  # [B]

    # 平均（サンプルごと）
    mean_p = (pred * mask).sum(dim=(1, 2), keepdim=True) / valid.view(-1, 1, 1)
    mean_t = (target * mask).sum(dim=(1, 2), keepdim=True) / valid.view(-1, 1, 1)

    # 偏差
    dp = (pred - mean_p) * mask
    dt = (target - mean_t) * mask

    # 共分散・分散（サンプルごと）
    cov = dp.mul(dt).sum(dim=(1, 2)) / valid        # [B]
    var_p = dp.pow(2).sum(dim=(1, 2)) / valid       # [B]
    var_t = dt.pow(2).sum(dim=(1, 2)) / valid       # [B]

    corr = cov / (torch.sqrt(var_p * var_t) + eps)  # [B]
    return corr.mean()


@torch.no_grad()
def evaluate_original_scale_by_index(model, loader, mean, std, data_columns, y_pos):
    model.eval()
    device = next(model.parameters()).device

    # 厳密集計用
    total_sse = 0.0   # 全有効点での誤差二乗和
    total_cnt = 0.0   # 全有効点数（マスク=1の総数）
    total_r2  = 0.0   # R² のバッチ加重平均用
    total_corr = 0.0  # 相関R のバッチ加重平均用
    n = 0             # サンプル数（バッチ内のBの合計）

    for batch in loader:
        if len(batch) == 3:
            enc_x, dec_x, y_std = batch
            mask = torch.ones_like(y_std[..., :1])
        elif len(batch) == 4:
            enc_x, dec_x, y_std, mask = batch
        else:
            raise ValueError("Unexpected batch format")

        # 統一デバイス・dtype
        enc_x = enc_x.to(device=device, dtype=torch.float32)
        dec_x = dec_x.to(device=device, dtype=torch.float32)
        y_std = y_std.to(device=device, dtype=torch.float32)
        mask  = mask.to(device=device, dtype=torch.float32)

        # 予測（標準化スケール）
        yhat_std = model(enc_x, dec_x)

        # 出力だけ逆標準化
        yhat = inverse_standardize_y_by_index(yhat_std, mean, std, y_pos)
        y    = inverse_standardize_y_by_index(y_std,     mean, std, y_pos)

        # 厳密RMSE用：SSEと有効点数を直接合算
        sse_batch = ((yhat - y) ** 2 * mask).sum().item()
        cnt_batch = mask.sum().item()
        total_sse += sse_batch
        total_cnt += max(cnt_batch, 1.0)

        # R² と 相関R はサンプル数で加重平均
        r2    = masked_r2(yhat, y, mask).item()
        corr  = masked_corr(yhat, y, mask).item()
        bs = enc_x.size(0)
        total_r2   += r2   * bs
        total_corr += corr * bs
        n += bs

    mse  = total_sse / max(total_cnt, 1.0)
    rmse = mse ** 0.5
    r2   = total_r2   / max(n, 1)
    corr = total_corr / max(n, 1)

    return {"mse": mse, "rmse": rmse, "r2": r2, "corr": corr}

## 学習（交差検証のループ）
### ※各ループにおいて、重みやバイアスの初期値を変えて、5回繰り返す

In [13]:
num_floods = len(datasets)
seeds = [0, 1, 2, 3, 4]               # モデルの初期値を決める際のシード値
y_pos = [Fe+Fd]


rmse_list = np.zeros((num_floods, 5))
r2_list   = np.zeros((num_floods, 5))
rmse_list_train = np.zeros((num_floods, 5))
r2_list_train   = np.zeros((num_floods, 5))

for i in range(num_floods):
    # ----- 1. val と train の分割 -----
    ds_val = datasets[i]
    ds_train = ConcatDataset([datasets[j] for j in range(num_floods) if j != i])

    valLoader = DataLoader(ds_val, batch_size=cfg["batch_size"], shuffle=False, collate_fn=collate_fn)
    trainLoader = DataLoader(ds_train, batch_size=cfg["batch_size"], shuffle=True, collate_fn=collate_fn)

    
    for s_id, seed in enumerate(seeds):
        
        # モデルの初期化
        torch.manual_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        
        model = Seq2SeqLSTM(in_enc=cfg["in_enc"], in_dec=cfg["in_dec"], out_dim=cfg["out_dim"],
                            enc_hidden=cfg["enc_hidden"], dec_hidden=cfg["dec_hidden"],
                            enc_layers=cfg["enc_layers"], dec_layers=cfg["dec_layers"],
                            bridge_mode=cfg["bridge_mode"], dropout=cfg["dropout"],
                            bidirectional_enc=cfg["bidirectional_enc"],
                            head_activation=cfg["head_activation"],
                            use_conv=cfg["use_conv"]
                           ).to(device)
        optimizer = optim.Adam(model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"])

        model.apply(init_weights)


        best_val = float("inf")
        best_epoch = 0
        epochs_no_improve = 0
        best_model_state = None
        best_opt_state = None

        patience = cfg["patience"]
        min_delta = 1e-4

        # 学習ループ（early stoppingあり）
        for epoch in range(1, cfg["epochs"] + 1):
            # ---- 1. 学習 ----
            train_loss = train_one_epoch(model, trainLoader, optimizer)

            # ---- 2. 検証 ----
            val_metrics = evaluate(model, valLoader)
            val_loss = float(val_metrics["loss"])

            # ---- 3. ログ出力 ----
            if epoch % cfg["print_every"] == 0:
                print(f"[{epoch}/{cfg['epochs']}] "
                      f"train_loss={train_loss:.4f} | "
                      f"val_loss={val_loss:.4f} "
                      f"val_rmse={val_metrics['rmse']:.4f} "
                      f"val_r2={val_metrics['r2']:.4f}"
                      f"val_corr={val_metrics['corr']:.4f}")

            # ---- 4. 改善チェック ----
            if best_val - val_loss > min_delta:
                best_val = val_loss
                best_epoch = epoch
                epochs_no_improve = 0
                
                # ★ モデル重みをcloneして保持
                best_model_state = {k: v.detach().clone() for k, v in model.state_dict().items()}
                # ★ Optimizerの状態もcloneして保持（必要に応じて）
                best_opt_state = {
                    "state": {
                        k: {kk: (vv.detach().clone() if torch.is_tensor(vv) else vv)
                            for kk, vv in v.items()}
                        for k, v in optimizer.state_dict()["state"].items()
                    },
                    "param_groups": [dict(g) for g in optimizer.state_dict()["param_groups"]],
                }
        
            else:
                epochs_no_improve += 1

            # ---- 5. Early Stopping 発動 ----
            if epochs_no_improve >= patience:
                print(f"Early stopping at epoch {epoch} "
                      f"(best epoch={best_epoch}, val_loss={best_val:.4f})")
                break


        # 学習終了後に、実スケールでの標準化パラメータを記録
        metrics = evaluate_original_scale_by_index(model, valLoader, mean, std, data.columns, y_pos)
        metrics_train = evaluate_original_scale_by_index(model, trainLoader, mean, std, data.columns, y_pos)
        

        rmse_list[i, s_id] = metrics["rmse"]
        r2_list[i, s_id] = metrics["r2"]
        rmse_list_train[i, s_id] = metrics_train["rmse"]
        r2_list_train[i, s_id] = metrics_train["r2"]
        
        print(f"{i+1} 番目の {s_id+1} 回目の学習が終了しました。")
        

print("すべての学習が終了しました。")


[1/100] train_loss=0.9322 | val_loss=0.1953 val_rmse=0.4365 val_r2=-98403.5698val_corr=0.1160
[2/100] train_loss=0.7585 | val_loss=0.1341 val_rmse=0.3525 val_r2=-48662.5853val_corr=0.0940
[3/100] train_loss=0.5183 | val_loss=0.2155 val_rmse=0.4562 val_r2=-94221.2457val_corr=0.0897
[4/100] train_loss=0.4976 | val_loss=0.1237 val_rmse=0.3385 val_r2=-41366.5299val_corr=0.1139
[5/100] train_loss=0.4095 | val_loss=0.1195 val_rmse=0.3392 val_r2=-55366.4548val_corr=0.1299
[6/100] train_loss=0.3714 | val_loss=0.0927 val_rmse=0.2970 val_r2=-27819.9314val_corr=0.1369
[7/100] train_loss=0.3040 | val_loss=0.0647 val_rmse=0.2460 val_r2=-8760.2179val_corr=0.1354
[8/100] train_loss=0.2744 | val_loss=0.0658 val_rmse=0.2492 val_r2=-12126.0094val_corr=0.1258
[9/100] train_loss=0.2578 | val_loss=0.0578 val_rmse=0.2343 val_r2=-10131.5868val_corr=0.1391
[10/100] train_loss=0.2469 | val_loss=0.0517 val_rmse=0.2244 val_r2=-10512.2095val_corr=0.1599
[11/100] train_loss=0.2227 | val_loss=0.0880 val_rmse=0.2899

## 学習結果の平均化・保存

In [14]:
# 行ごとの平均
rmse_row_mean = rmse_list.mean(axis=1)
r2_row_mean = r2_list.mean(axis=1)

rmse_row_mean_train = rmse_list_train.mean(axis=1)
r2_row_mean_train = r2_list_train.mean(axis=1)

# さらに全体の平均
rmse_mean = rmse_row_mean.mean()
r2_mean   = r2_row_mean.mean()
rmse_mean_train = rmse_row_mean_train.mean()
r2_mean_train   = r2_row_mean_train.mean()

# 結果をまとめる
df_result = pd.DataFrame({
    "RMSE_row_mean_val": rmse_row_mean,
    "R2_row_mean_val": r2_row_mean,
    "RMSE_row_mean_train": rmse_row_mean_train,
    "R2_row_mean_train": r2_row_mean_train})

df_result.loc["overall_mean"] = [rmse_mean, r2_mean, rmse_mean_train, r2_mean_train]

# --- Excel 出力 ---
with pd.ExcelWriter(save_path, engine="openpyxl") as writer:
    df_result.to_excel(writer, sheet_name="average")
    pd.DataFrame(rmse_list).to_excel(writer, sheet_name="rmse_list", index=False, header=False)
    pd.DataFrame(r2_list).to_excel(writer, sheet_name="r2_list", index=False, header=False)
    pd.DataFrame(rmse_list_train).to_excel(writer, sheet_name="rmse_list_train", index=False, header=False)
    pd.DataFrame(r2_list_train).to_excel(writer, sheet_name="r2_list_train", index=False, header=False)
    
print("結果をExcelファイルに保存しました。")

結果をExcelファイルに保存しました。
