In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import load_model

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge
from sklearn.ensemble import StackingRegressor
from xgboost import XGBRegressor
import joblib

import numpy as np
import pandas as pd
import os

import warnings
warnings.filterwarnings("ignore")

In [82]:
#設定LSTM往前看的筆數和預測筆數
LookBackNum = 12 #LSTM往前看的筆數
ForecastNum = 48 #預測筆數

#載入訓練資料
DataName = os.getcwd()+'\ExampleTrainData(AVG)\AvgDATA_17.csv'
SourceData = pd.read_csv(DataName, encoding='utf-8')

#迴歸分析 選擇要留下來的資料欄位
#(風速,大氣壓力,溫度,濕度,光照度)
#(發電量)
Regression_X_train = SourceData[['WindSpeed(m/s)','Pressure(hpa)','Temperature(°C)','Humidity(%)','Sunlight(Lux)']].values
Regression_y_train = SourceData[['Power(mW)']].values

#LSTM 選擇要留下來的資料欄位
#(風速,大氣壓力,溫度,濕度,光照度)
AllOutPut = SourceData[['WindSpeed(m/s)','Pressure(hpa)','Temperature(°C)','Humidity(%)','Sunlight(Lux)']].values

#正規化
LSTM_MinMaxModel = MinMaxScaler().fit(AllOutPut)
AllOutPut_MinMax = LSTM_MinMaxModel.transform(AllOutPut)
Regression_X_train_norm = LSTM_MinMaxModel.fit_transform(Regression_X_train)

X_train = []
y_train = []

#設定每i-12筆資料(X_train)就對應到第i筆資料(y_train)
for i in range(LookBackNum,len(AllOutPut_MinMax)):
  X_train.append(AllOutPut_MinMax[i-LookBackNum:i, :])
  y_train.append(AllOutPut_MinMax[i, :])


X_train = np.array(X_train)
y_train = np.array(y_train)

# Reshaping
#(samples 是訓練樣本數量,timesteps 是每個樣本的時間步長,features 是每個時間步的特徵數量)
X_train = np.reshape(X_train,(X_train.shape [0], X_train.shape [1], 5))

LSTM model training

In [None]:
regressor = Sequential ()

regressor.add(LSTM(units = 128, return_sequences = True, input_shape = (X_train.shape[1], 5)))

regressor.add(LSTM(units = 64))

regressor.add(Dropout(0.2))

# output layer
regressor.add(Dense(units = 5))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

#開始訓練
regressor.fit(X_train, y_train, epochs = 100, batch_size = 128)

#保存模型
from datetime import datetime
regressor.save('WheatherLSTM.h5')
print('Model Saved')

Stacking model training

In [49]:
# 使用 LSTM 模型預測特徵
lstm_features = regressor.predict(X_train)

# 融合特徵
stack_input = np.hstack([Regression_X_train_norm[LookBackNum:], lstm_features])

# 分割資料集
X_train_stack, X_test_stack, y_train_stack, y_test_stack = train_test_split(stack_input, Regression_y_train[LookBackNum:], test_size=0.2, random_state=42)

# 建立基礎模型
ridge = LinearRegression()
xgb = XGBRegressor(n_estimators=100, learning_rate=0.1)

# 建立堆疊回歸模型
stacking_regressor = StackingRegressor(
    estimators=[('ridge', ridge), ('xgb', xgb)],
    final_estimator=Ridge()
)

# 訓練堆疊回歸模型
stacking_regressor.fit(X_train_stack, y_train_stack)

# 評估模型
score = stacking_regressor.score(X_test_stack, y_test_stack)
joblib.dump(stacking_regressor, 'stacking_regressor.pkl')
print(f'Stacking Regressor R^2 score: {score}')

[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
Stacking Regressor R^2 score: 0.9613281998677713


First: use LSTM model to predict

In [None]:
#載入模型
regressor = load_model('WheatherLSTM.h5')

# 載入測試資料
DataName = os.getcwd() + r'\ExampleTestData\upload.csv'
SourceData = pd.read_csv(DataName, encoding='utf-8')
target = ['序號']
EXquestion = SourceData[target].values

inputs = []  # 存放參考資料
PredictOutput = []  # 存放預測值

LookBackNum = 12
ForecastNum = 48
expected_features = 5

count = 0
while count < len(EXquestion):
    print('count : ', count)
    LocationCode = int(EXquestion[count])  # 裝置編號
    strLocationCode = str(LocationCode)[-2:]  # 裝置編號字串
    if LocationCode < 10:
        strLocationCode = '0' + str(LocationCode)

    DataName = os.getcwd() + '\ExampleTrainData(IncompleteAVG)\IncompleteAvgDATA_' + strLocationCode + '.csv'
    SourceData1 = pd.read_csv(DataName, encoding='utf-8')
    ReferTitle = SourceData1[['Serial']].values
    ReferData = SourceData1[['WindSpeed(m/s)', 'Pressure(hpa)', 'Temperature(°C)', 'Humidity(%)', 'Sunlight(Lux)']].values

    inputs = []  # 重置存放參考資料

    # 找到相同的一天，把12個資料都加進inputs
    for DaysCount in range(len(ReferTitle)):
        if str(int(ReferTitle[DaysCount]))[:8] == str(int(EXquestion[count]))[:8]:
            inputs.append(ReferData[DaysCount])

    # 確保 inputs 的長度一致
    while len(inputs) < LookBackNum:
        inputs.append([0] * expected_features)  # 使用零填充

    # 用迴圈不斷使新的預測值塞入參考資料，並預測下一筆資料
    for i in range(ForecastNum):

        # 將新的預測值加入參考資料(用自己的預測值往前看)
        if i > 0:
            inputs.append(PredictOutput[i - 1])

        # 切出新的參考資料12筆(往前看12筆)
        X_test = []
        X_test.append(inputs[0 + i:LookBackNum + i])

        # Reshaping
        NewTest = np.array(X_test)
        NewTest = np.reshape(NewTest, (NewTest.shape[0], NewTest.shape[1], expected_features))  # 確保有5個特徵

        # 確保 NewTest 的形狀與 LSTM 模型期望的形狀一致
        print(f"NewTest shape: {NewTest.shape}")

        predicted = regressor.predict(NewTest)
        PredictOutput.append(round(predicted[0, 0], 2))

    # 每次預測都要預測48個，因此加48個會切到下一天
    count += 48

In [None]:
print(f"Regression_X_train_norm shape: {Regression_X_train_norm.shape}")
print(f"lstm_features shape: {lstm_features.shape}")

Second: use Stacking model to predict

In [None]:
stacking_regressor = load_model('stacking_regressor.pkl')

# 將 LSTM 模型的預測結果作為新的特徵
lstm_features = np.array(PredictOutput).reshape(-1, 1)

# 準備堆疊模型的輸入特徵
# 假設 Regression_X_norm 是之前正規化過的特徵
test_features = np.hstack([Regression_X_train[LookBackNum:], lstm_features])

# 使用堆疊模型進行最終預測
stacking_predictions = stacking_regressor.predict(test_features)

Output the prediction

In [None]:
# 將預測結果寫成新的CSV檔案
df = pd.DataFrame(stacking_predictions, columns=['答案'])
df['target'] = SourceData['target']
df = df[['target', '答案']]

# 將 DataFrame 寫入 CSV 檔案
df.to_csv('output.csv', index=False, header=True)
print('Output CSV File Saved')

Output CSV File Saved
