In [1]:
import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import pandas as pd
from loguru import logger
import matplotlib.pyplot as plt
from matplotlib import rcParams

# 设置字体为 Microsoft YaHei（微軟正黑體）

rcParams["font.sans-serif"] = ["Microsoft YaHei"]
rcParams["axes.unicode_minus"] = False  # 解决负号显示问题

In [5]:
def create_time_window_sequences(
    numerical_features, one_hot_features, target, window_size, step=1
):
    sequences, targets = [], []

    # 提取每坪單價（假設這是 numerical_features 的第一列）
    unit_price = numerical_features[:, 0].reshape(-1, 1)  # 每坪單價（目標變數的一部分）
    other_features = numerical_features[:, 1:]  # 其他數值特徵

    # 逐步創建時間窗口
    for i in range(0, len(other_features) - window_size, step):
        # 提取過去 window_size 筆的數值特徵和歷史房價
        num_features_window = other_features[i : i + window_size]
        unit_price_window = unit_price[i : i + window_size]

        # 提取對應的 One-Hot 特徵（前 30 筆）
        one_hot_window = one_hot_features[i : i + window_size]

        # 將數值特徵、房價和 One-Hot 特徵拼接為一個窗口
        combined_window = np.hstack(
            [num_features_window, unit_price_window, one_hot_window]
        )

        # 提取第 31 筆資料的特徵
        next_num_feature = other_features[i + window_size]
        # next_unit_price = unit_price[i + window_size]
        next_one_hot_feature = one_hot_features[i + window_size]
        # 初始化第31天房價=0
        next_unit_price = 0

        # 拼接第 31 筆的特徵
        next_combined = np.hstack(
            [next_num_feature, next_unit_price, next_one_hot_feature]
        )

        # 將第 31 筆資料與前 30 筆資料拼接
        full_window = np.vstack([combined_window, next_combined])

        # 將拼接後的完整窗口添加到序列
        sequences.append(full_window)

        # 將第 31 筆的房價作為目標
        targets.append(target[i + window_size])

    # 將序列和目標轉換為 NumPy 陣列
    sequences = np.array(sequences)  # [num_samples, seq_length + 1, num_features]
    targets = np.array(targets)

    return sequences, targets

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # === 定義全連接層（前置 Dense 層）===
        # self.pre_dense1 = nn.Linear(input_dim, 128)  # 將輸入維度轉為 128
        # self.pre_dense2 = nn.Linear(128, 64)  # 將 128 維轉為 64 維
        # self.pre_dense_activation = nn.GELU()  # 激活函數
        # self.dropout = nn.Dropout(dropout)  # Dropout

        # === 定義 LSTM 層 ===
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
        )

        # === 定義後續的全連接層 ===
        # self.fc1 = nn.Linear(hidden_dim*31, 8)
        self.fc1 = nn.Linear(hidden_dim, 8)
        # self.bn1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(8, 4)
        # self.bn2 = nn.BatchNorm1d(8)
        self.fc3 = nn.Linear(4, output_dim)

        # 激活函數
        self.gelu = nn.GELU()
        self.tahn = nn.Tanh()
        self.flatten = nn.Flatten()

    def forward(self, x):
        # === 通過 Dense 層處理輸入 ===
        # out = self.pre_dense1(x)
        # out = self.pre_dense_activation(out)
        # out = self.dropout(out)

        # out = self.pre_dense2(out)
        # out = self.pre_dense_activation(out)
        # out = self.dropout(out)

        # 初始化 LSTM 的隱藏狀態和細胞狀態
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # === 前向傳播 LSTM ===
        out, _ = self.lstm(
            x, (h0, c0)
        )  # LSTM 層輸入形狀 (batch_size, time_steps, input_size)
        out = out[:, -1, :]  # 取最後一個時間步的輸出
        # out = self.flatten(out)
        # out = out.reshape(out.size(0), -1)

        # === 通過後續的全連接層 ===
        out = self.tahn(self.fc1(out))
        out = self.gelu(self.fc2(out))
        out = self.fc3(out)

        return out



In [6]:
best_params = {
    "hidden_dim": 64,
    "num_layers": 2,
    "dropout": 0.1,
    "learning_rate": 1e-4,
    "weight_decay": 1e-5,
    "batch_size": 8,
}

x_mean = np.array([
    167.1636969, 3.389284115, 1.927202627, 2.585403711, 0.982602856,
    25.57844386, 12582.38437, 0.001363799, 0.946743943, 2.385796727,
    3.582349137, 5.061554152, 13.0768519, 20.77470275, 2018.693194, 6.699606985
])

x_std = np.array([
    265.9473672, 1.146308572, 0.441717588, 1.178277433, 0.1307462,
    36.91268605, 3884.340303, 0.000676091, 1.327738937, 2.735231276,
    4.175769028, 6.34560646, 12.99347623, 20.84464324, 3.455128533, 3.394999201
])

# 重塑以便於在標準化時使用
x_mean = x_mean[np.newaxis, np.newaxis, ...]  # [1, 1, num_features]
x_std = x_std[np.newaxis, np.newaxis, ...]  # [1, 1, num_features]

print("x_mean shape:", x_mean.shape)
print("x_std shape:", x_std.shape)

x_mean shape: (1, 1, 16)
x_std shape: (1, 1, 16)


In [None]:
# 新數據：數值特徵
new_numerical_data = np.array([
    [50, 120.5, 5, 3000, 3, 2, 1, 0, 10, 1000, 20, 15, 30, 40, 5, 2, 3, 0, 1, 0]
])

# 新數據：One-Hot 編碼特徵
new_one_hot_data = np.array([
    [1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
])
# 組合數值特徵和 One-Hot 特徵
X_new = np.hstack([new_numerical_data, new_one_hot_data])
X_new = (X_new - x_mean.reshape(-1)) / x_std.reshape(-1)  # 標準化
X_new = np.nan_to_num(X_new, nan=0.0)  # 將 NaN 轉換為 0
X_new_tensor = torch.FloatTensor(X_new).unsqueeze(0)  # 添加 batch 維度
