In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# 1. 讀取與基本處理
file_path = 'IMPACT.sensors.csv'
data = pd.read_csv(file_path)

timestamp_column = 'createdAt'
data[timestamp_column] = pd.to_datetime(data[timestamp_column])
data = data.sort_values(by=timestamp_column).reset_index(drop=True)

# 2. 選擇特徵（PM2.5），並四捨五入到小數點後兩位
features = ['pm25']
data_features = data[features].round(2)

# 3. 進行 MinMaxScaler
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data_features)

# 4. 定義序列長度與預測步數
sequence_length = 12  # 過去 120 分鐘作為輸入
prediction_steps = [6, 12, 18, 24, 30, 36, 42, 48]  # 預測 1-8 小時

# 建立序列數據
def create_future_sequences(data, sequence_length, prediction_steps):
    X, y = [], []
    for i in range(len(data) - sequence_length - max(prediction_steps)):
        X.append(data[i:i + sequence_length])
        future_values = [
            np.mean(data[i + sequence_length + p : i + sequence_length + p + 6])
            for p in prediction_steps
        ]
        y.append(np.array(future_values).flatten())
    return np.array(X), np.array(y)

X, y = create_future_sequences(data_normalized, sequence_length, prediction_steps)

# 分割訓練與測試集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 5. 建立 GRU 模型
input_layer = Input(shape=(sequence_length, len(features)))  # len(features) = 1
x = GRU(64, activation='relu', return_sequences=True)(input_layer)
x = GRU(32, activation='relu', return_sequences=False)(x)
dense1 = Dense(64, activation='relu')(x)
output_layer = Dense(len(prediction_steps), activation='linear')(dense1)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mse')

# 6. 訓練模型
history = model.fit(
    X_train, y_train,
    epochs=42,
    batch_size=32,
    validation_data=(X_test, y_test)
)

# 7. 在「縮放後」的測試集上先看 MSE
test_loss = model.evaluate(X_test, y_test)
print("Test Loss (MSE, scaled):", test_loss)

# 8. 做預測
y_pred = model.predict(X_test)  # shape=(samples, len(prediction_steps))

# 9. 反轉換 (inverse_transform) 回到原始尺度後再取 2 位小數
y_pred_reshaped = y_pred.reshape(-1, 1)  # (samples * len(prediction_steps), 1)
y_pred_orig = scaler.inverse_transform(y_pred_reshaped)
y_pred_orig = y_pred_orig.reshape(-1, len(prediction_steps))

y_test_reshaped = y_test.reshape(-1, 1)  # (samples * len(prediction_steps), 1)
y_test_orig = scaler.inverse_transform(y_test_reshaped)
y_test_orig = y_test_orig.reshape(-1, len(prediction_steps))

# 最後再 round(2)
y_pred_orig = np.round(y_pred_orig, 2)
y_test_orig = np.round(y_test_orig, 2)

# 10. 評估 (在原始尺度上)
def evaluate_predictions(y_true_orig, y_pred_orig, prediction_steps):
    print("\nStep-wise Performance Evaluation (Original Scale):")
    for idx, step in enumerate(prediction_steps):
        hours = (step * 10) // 60
        minutes = (step * 10) % 60
        y_true_step = y_true_orig[:, idx]
        y_pred_step = y_pred_orig[:, idx]

        r2 = r2_score(y_true_step, y_pred_step)
        mae = mean_absolute_error(y_true_step, y_pred_step)
        print(f"{hours}h{minutes:02d}min: R²={r2:.2f}, MAE={mae:.2f}")

    # 計算整體（所有步數一起展開）R² 與 MAE
    overall_r2 = r2_score(y_true_orig.flatten(), y_pred_orig.flatten())
    overall_mae = mean_absolute_error(y_true_orig.flatten(), y_pred_orig.flatten())
    print("\nOverall Performance (Original Scale):")
    print(f"Overall R² Score: {overall_r2:.2f}")
    print(f"Overall Mean Absolute Error (MAE): {overall_mae:.2f}")

evaluate_predictions(y_test_orig, y_pred_orig, prediction_steps)

# 11. 保存模型
model.save('pm25_prediction_gru_model.keras')


Epoch 1/42
Epoch 2/42
Epoch 3/42
Epoch 4/42
Epoch 5/42
Epoch 6/42
Epoch 7/42
Epoch 8/42
Epoch 9/42
Epoch 10/42
Epoch 11/42
Epoch 12/42
Epoch 13/42
Epoch 14/42
Epoch 15/42
Epoch 16/42
Epoch 17/42
Epoch 18/42
Epoch 19/42
Epoch 20/42
Epoch 21/42
Epoch 22/42
Epoch 23/42
Epoch 24/42
Epoch 25/42
Epoch 26/42
Epoch 27/42
Epoch 28/42
Epoch 29/42
Epoch 30/42
Epoch 31/42
Epoch 32/42
Epoch 33/42
Epoch 34/42
Epoch 35/42
Epoch 36/42
Epoch 37/42
Epoch 38/42
Epoch 39/42
Epoch 40/42
Epoch 41/42
Epoch 42/42
Test Loss (MSE, scaled): 0.0012595310108736157

Step-wise Performance Evaluation (Original Scale):
1h00min: R²=0.90, MAE=1.24
2h00min: R²=0.90, MAE=1.36
3h00min: R²=0.86, MAE=1.54
4h00min: R²=0.84, MAE=1.59
5h00min: R²=0.81, MAE=1.74
6h00min: R²=0.80, MAE=1.84
7h00min: R²=0.79, MAE=1.93
8h00min: R²=0.78, MAE=2.03

Overall Performance (Original Scale):
Overall R² Score: 0.83
Overall Mean Absolute Error (MAE): 1.66
