In [None]:
import os
import glob
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from catboost import CatBoostRegressor
import joblib
from datetime import datetime

In [None]:
# 設定參數
LookBackNum = 12  # LSTM往前看的筆數
ForecastNum = 48  # 預測筆數

In [None]:
# 合併所有 17 個檔案的數據
def load_all_data():
    data_path = os.getcwd() + r'\'
    all_files = glob.glob(data_path + r'\\AvgDATA_*.csv')
    all_data = pd.concat([pd.read_csv(file, encoding='utf-8') for file in all_files], ignore_index=True)
    return all_data

In [None]:
all_data = load_all_data()

In [None]:
# 分別準備 LSTM 和回歸模型需要的數據
LSTM_X_data = all_data[['WindSpeed(m/s)', 'Pressure(hpa)', 'Temperature(°C)', 'Humidity(%)', 'Sunlight(Lux)']].values
Regression_X_train = all_data[['WindSpeed(m/s)', 'Pressure(hpa)', 'Temperature(°C)', 'Humidity(%)', 'Sunlight(Lux)']].values
Regression_y_train = all_data[['Power(mW)']].values

In [None]:
# 正規化數據
LSTM_MinMaxModel = MinMaxScaler().fit(LSTM_X_data)
LSTM_X_data_scaled = LSTM_MinMaxModel.transform(LSTM_X_data)

In [None]:
# 準備 LSTM 的訓練數據
X_train, y_train = [], []

for i in range(LookBackNum, len(LSTM_X_data_scaled)):
    X_train.append(LSTM_X_data_scaled[i - LookBackNum:i, :])
    y_train.append(LSTM_X_data_scaled[i, :])

X_train = np.array(X_train)
y_train = np.array(y_train)

In [None]:
# 重新設定形狀
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 5))

In [None]:
# 建置 LSTM 模型
def build_lstm_model():
    model = Sequential()
    model.add(LSTM(units=256, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.3))
    model.add(LSTM(units=128, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(units=64))
    model.add(Dense(units=5))
    model.compile(optimizer='adam', loss='mean_absolute_error')  # 改用 MAE
    return model

In [None]:
# 訓練 LSTM 模型
lstm_model = build_lstm_model()
lstm_model.fit(X_train, y_train, epochs=100, batch_size=128)

In [None]:
# 保存 LSTM 模型
NowDateTime = datetime.now().strftime("%Y-%m-%dT%H_%M_%SZ")
lstm_model.save(f'WeatherLSTM_{NowDateTime}.h5')
print('LSTM Model Saved')

In [None]:
CBModel = CatBoostRegressor(
    iterations=500,
    learning_rate=0.05,
    depth=6,
    verbose=0          # 不輸出訓練過程
)

In [None]:
X_reg_scaled = LSTM_MinMaxModel.transform(Regression_X_train)
CBModel.fit(X_reg_scaled, Regression_y_train)

In [None]:
CBModel.save_model(f'WeatherCatBoost_{NowDateTime}.cbm')
print('CatBoost Model Saved')

In [None]:
r2 = CBModel.score(X_reg_scaled, Regression_y_train)
print(f"CatBoost R^2: {r2:.4f}")

In [None]:
lstm_model = load_model(f'WeatherLSTM_{NowDateTime}.h5')
CBModel = CatBoostRegressor()
CBModel.load_model(f'WeatherCatBoost_{NowDateTime}.cbm')

In [None]:
test_path = os.getcwd() + r'upload.csv'
test_df = pd.read_csv(test_path, encoding='utf-8')
EXquestion = test_df[['序號']].values  # 待預測的裝置序號
base = os.getcwd()

In [None]:
results = []
for idx in range(0, len(EXquestion), ForecastNum):
    loc_code = int(EXquestion[idx])
    suffix = str(loc_code).zfill(2)[-2:]
    
    # **修正點**：自動尋找對應的不完整 AVG 檔案
    incomplete_dir = os.path.join(base, '')
    pattern = os.path.join(incomplete_dir, f'IncompleteAvgDATA_{suffix}.csv')
    matches = glob.glob(pattern)
    if not matches:
        raise FileNotFoundError(f"No file matches pattern: {pattern}")
    hist_df = pd.read_csv(matches[0], encoding='utf-8')
    
    hist_values = hist_df[['WindSpeed(m/s)', 'Pressure(hpa)', 'Temperature(°C)', 'Humidity(%)', 'Sunlight(Lux)']].values
    hist_scaled = LSTM_MinMaxModel.transform(hist_values)
    inputs = [hist_scaled[i].reshape(1, -1) for i in range(LookBackNum)]
    
    PredictOutput, PredictPower = [], []
    for t in range(ForecastNum):
        if t > 0:
            inputs.append(PredictOutput[-1])
        X_seq = np.array([inputs[t:t+LookBackNum]]).reshape((1, LookBackNum, len(['WindSpeed(m/s)', 'Pressure(hpa)', 'Temperature(°C)', 'Humidity(%)', 'Sunlight(Lux)'])))
        lstm_pred = lstm_model.predict(X_seq)
        PredictOutput.append(lstm_pred)
        
        cb_input = lstm_pred.reshape(-1, len(['WindSpeed(m/s)', 'Pressure(hpa)', 'Temperature(°C)', 'Humidity(%)', 'Sunlight(Lux)']))
        power_pred = CBModel.predict(cb_input)
        PredictPower.append(np.round(power_pred, 2).flatten())
    
    for p in PredictPower:
        results.append(p[0])

In [None]:
out_df = pd.DataFrame({
    '序號': EXquestion.flatten(),
    '答案': [float(x) for x in results]
})
out_df.to_csv('upload_catboost.csv', index=False)
print('Output CSV File Saved')