In [1]:
import torch
import torch.nn as nn
import torch.optim as optim


import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

In [None]:

# 動作確認用コード（後で消す）
data_path = r"C:\Users\ryoya\MasterThesis\MT_Furuie\data\Miwa_hourlyAve_2019_2025_processed.xlsx"

df = pd.read_excel(data_path, header=0)
df = df.apply(pd.to_numeric, errors="coerce")  # 数値以外は NaN にする
all_data = df.to_numpy(dtype=np.float32)

print(all_data[1:10,:])

In [None]:
# 動作確認用コード（後で消す）

flood_idx_path = r"C:\Users\ryoya\MasterThesis\MT_Furuie\data\Miwa_LSTM_Data\Trial_3\Miwa_flood_idx.xlsx"
flood_idx = pd.read_excel(flood_idx_path, header=0).values.astype('int')

print(flood_idx)
print(flood_idx[1, 0])
print(flood_idx.shape[0])
print(flood_idx[2:10,:])

In [None]:
# 動作確認用コード（後で消す）


for i in range(flood_idx.shape[0]):
    sIdx = flood_idx[i, 0] - 72 - 1 # pythonはindexの最初が0なので、1を引く
    eIdx = flood_idx[i, 1] # pythonは終了インデックスを含まない

    if i == 0:
        flood_data = all_data[sIdx:eIdx, :]
    else:
        flood_data_temp = all_data[sIdx:eIdx, :]
        flood_data = np.vstack((flood_data, flood_data_temp))

    print(sIdx, eIdx)

print(flood_data.shape[0])
print(flood_data.shape[1])


In [None]:
mean = np.mean(flood_data, axis=0)
std = np.std(flood_data, axis=0)

std_prms = np.vstack((mean, std))

print(std_prms)

print(std_prms.shape)

In [None]:
all_data_std = (all_data - mean) / std

print(all_data_std[0:10,:])
print(all_data_std.shape)

In [None]:
x_std_prm = std_prms[:, [4, 8]]

print(std_prms)
print(x_std_prm)

In [None]:
a = [[1, 2, 3],
    [1, 3, 5],
    [1, 5, 7],
    [1, 7, 9]]

mean = np.mean(a, axis=0)
std = np.std(a, axis=0)

standardized = (a - mean) / std

print(mean)
print(std)
print(standardized)


# 書きかけコード（不採用）

In [None]:
# ==== 1. データセット定義 ====

class FloodSeq2SeqDataset(Dataset):
    def __init__(self, data_path, flood_idx_path, xCol_1, xCol_2, yCol, x_std_prm_1, x_std_prm_2, y_std_prm, look_back=72, lead_time=240, phase="val"):
        '''
        data_path: 全期間のデータ含むExcelファイル
        flood_idx_path: 対象洪水の開始行番号、終了行番号を格納したExcelファイル
        xCol_1: 入力値の列番号（エンコーダ）
        xCol_2: 入力値の列番号（デコーダ）
        yCol: 出力値の列番号
        x_std_prm: 入力値の平均・標準偏差（1行目：平均、2行目：標準偏差）
        y_std_prm: 出力値の平均・標準偏差（1行目：平均、2行目：標準偏差）
        look_back: 予測のルックバック期間（hour）
        lead_time: 予測のリードタイム（hour） 
        phase: phaseがtrainの時は、標準化パラメータを計算する
        '''
        # Excelデータ・対象洪水インデックスの読み込み
        df = pd.read_excel(data_path, header=0)
        df = df.apply(pd.to_numeric, errors="coerce")  # 数値以外は NaN にする
        self.all_data_raw = df.to_numpy(dtype=np.float32)
        
        self.flood_idx = pd.read_excel(flood_idx_path, header=0).values.astype('int')
        
        self.xCol_1 = xCol_1
        self.xCol_2 = xCol_2
        self.yCol = yCol

        self.look_back = look_back
        self.lead_time = lead_time

        if phase == 'train':
            # 対象洪水期間のデータをすべて抽出・結合し、標準化パラメータを計算
            for i in range(self.flood_idx.shape[0]):
                sIdx = self.flood_idx[i, 0] - look_back - 1 # pythonはindexの最初が0なので、1を引く
                eIdx = self.flood_idx[i, 1] # pythonは終了インデックスは含まない
    
                if i == 0:
                    flood_all = self.all_data_raw[sIdx:eIdx, :]
                else:
                    flood_temp = self.all_data_raw[sIdx:eIdx, :]
                    flood_all = np.vstack((flood_all, flood_temp))

            mean = np.mean(flood_all, axis=0)
            std = np.std(flood_all, axis=0)

            self.all_data = (self.all_data_raw - mean) / std # 標準化

            std_prm = np.vstack((mean, std))

            self.x_std_prm_1 = std_prm(:, xCol_1)
            self.x_std_prm_2 = std_prm(:, xCol_2)
            self.y_std_prm = std_prm(:, yCol)
            
        else:
            self.x_std_prm_1 = x_std_prm_1
            self.x_std_prm_2 = x_std_prm_2
            self.y_std_prm = y_std_prm
            
        

        # 有効なサンプルインデックスのみを保持
        self.valid_indices = []
        for idx in self.flood_indices:
            sample_start = idx - self.look_back
            sample_end = idx + self.forecast_len

            if sample_start < 0:
                continue
            if sample_end > len(self.enc_data) or sample_end > len(self.dec_data) or sample_end > len(self.label_data):
                continue
            self.valid_indices.append(idx)

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, index):
        idx = self.valid_indices[index]

        sample_start = idx - self.look_back
        sample_end = idx + self.forecast_len

In [None]:
# ==== 2. DataLoader作成 ====
excel_file = "timeseries_data.xlsx"  # 読み込み元のExcel
dataset = TimeSeriesDataset(excel_file, look_back=72, forecast_len=240)

batch_size = 32
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# ===== モデル定義 =====
class Seq2SeqLSTM(nn.Module):
    def __init__(self, input_size, hidden_enc, hidden_dec, output_size, num_layers_dec=2):
        super().__init__()
        # エンコーダ（1層）
        self.encoder = nn.LSTM(input_size, hidden_enc, num_layers=1, batch_first=True)
        # デコーダ（2層）
        self.decoder = nn.LSTM(output_size, hidden_dec, num_layers=num_layers_dec, batch_first=True)
        self.fc = nn.Linear(hidden_dec, output_size)

        # 隠れ層の次元が異なる場合、エンコーダ状態をデコーダ用に変換
        if hidden_enc != hidden_dec:
            self.enc_to_dec_h = nn.Linear(hidden_enc, hidden_dec)
            self.enc_to_dec_c = nn.Linear(hidden_enc, hidden_dec)
        else:
            self.enc_to_dec_h = None
            self.enc_to_dec_c = None

    def forward(self, src, tgt_len):
        """
        src: (batch, past_seq_len=72, input_size)
        tgt_len: 予測する時間ステップ数（240）
        """
        batch_size = src.size(0)
        device = src.device

        # --- エンコーダ ---
        _, (h, c) = self.encoder(src)  # h, c: (1, batch, hidden_enc)

        # --- エンコーダ状態をデコーダ用に変換 ---
        if self.enc_to_dec_h:
            h = self.enc_to_dec_h(h.transpose(0,1)).transpose(0,1)
            c = self.enc_to_dec_c(c.transpose(0,1)).transpose(0,1)

        # デコーダの層数に合わせる（上層はゼロ初期化）
        num_layers_dec = self.decoder.num_layers
        h_dec = torch.zeros(num_layers_dec, batch_size, self.decoder.hidden_size, device=device)
        c_dec = torch.zeros(num_layers_dec, batch_size, self.decoder.hidden_size, device=device)
        h_dec[0] = h[0]
        c_dec[0] = c[0]

        # --- デコーダ ---
        outputs = []
        dec_input = torch.zeros(batch_size, 1, self.fc.out_features, device=device)  # 初期入力0

        for _ in range(tgt_len):
            out, (h_dec, c_dec) = self.decoder(dec_input, (h_dec, c_dec))
            pred = self.fc(out)  # (batch, 1, output_size)
            outputs.append(pred)
            dec_input = pred  # 次のタイムステップの入力（teacher forcingしない場合）

        outputs = torch.cat(outputs, dim=1)  # (batch, tgt_len, output_size)
        return outputs

In [None]:
# ===== 使用例 =====
batch_size = 16
input_size = 10    # 入力特徴数
output_size = 1    # 出力特徴数
hidden_enc = 64
hidden_dec = 64
look_back = 72
forecast_len = 240

model = Seq2SeqLSTM(input_size, hidden_enc, hidden_dec, output_size)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# ダミーデータ
src = torch.randn(batch_size, look_back, input_size)
target = torch.randn(batch_size, forecast_len, output_size)

# 学習ステップ例
model.train()
optimizer.zero_grad()
pred = model(src, forecast_len)
loss = criterion(pred, target)
loss.backward()
optimizer.step()

print("Loss:", loss.item())


In [None]:
# 学習ループ
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    pred = model(src, forecast_len)
    loss = criterion(pred, target)

    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item():.6f}")