In [None]:
#pip install pandas numpy sklearn tensorflow matplotlib
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# --- 1. 資料載入與初步處理 ---
try:
    df = pd.read_csv('all_merged.csv', index_col='Date', parse_dates=True, na_values=['-', 'N/A', '', ' '])
except FileNotFoundError:
    exit()

df = df.sort_index()

print("\n原始資料前5行：")
print(df.head())
print("\n各欄位缺失值數量 (初次載入後)：")
print(df.isnull().sum())

# --- 2. 處理缺失值 ---
for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

df_processed = df.ffill()

df_processed = df_processed.interpolate(method='linear')

# 從日期索引中提取時間特徵
df_processed['DayOfWeek'] = df_processed.index.dayofweek 
df_processed['Month'] = df_processed.index.month 
df_processed['Quarter'] = df_processed.index.quarter 
df_processed['Year'] = df_processed.index.year 

print(df_processed.head())

# --- 加入滯後值 ---
lag_days = [1, 7, 30]
for lag in lag_days:
    df_processed[f'TWD_USD_Lag_{lag}'] = df_processed['TWD_USD'].shift(lag)
print(df_processed.head())

original_rows = df_processed.shape[0]
df_processed.dropna(inplace=True)
rows_after_drop = df_processed.shape[0]

# --- 3. 特徵選擇與標準化 ---
features = [col for col in df_processed.columns if col != 'TWD_USD']
X = df_processed[features].values
y = df_processed['TWD_USD'].values.reshape(-1, 1) 

# MinMaxScaler 將特徵和目標變數縮放到 [0, 1] 範圍
scaler_X = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)

scaler_y = MinMaxScaler(feature_range=(0, 1))
y_scaled = scaler_y.fit_transform(y)

# --- 4. 準備 LSTM 輸入數據 (創建序列 ---
look_back = 30 
forecast_horizon = 7

def create_sequences_multi_step(data, target, look_back, forecast_horizon):
    X, y = [], []
    for i in range(len(data) - look_back - forecast_horizon + 1):
        X.append(data[i:(i + look_back), :])
        y.append(target[(i + look_back):(i + look_back + forecast_horizon), 0])
    return np.array(X), np.array(y)

X_seq, y_seq = create_sequences_multi_step(X_scaled, y_scaled, look_back, forecast_horizon)

print(f"\nLSTM 輸入 X_seq 形狀：{X_seq.shape}") # (樣本數, 時間步長, 特徵數)
print(f"LSTM 輸入 y_seq 形狀：{y_seq.shape}") # (樣本數, 預測步長)

# --- 5. 訓練集與測試集劃分 ---
train_size = int(len(X_seq) * 0.8) # 80% 用於訓練
X_train, X_test = X_seq[0:train_size], X_seq[train_size:len(X_seq)]
y_train, y_test = y_seq[0:train_size], y_seq[train_size:len(y_seq)]

print(f"\n訓練集 X_train 形狀：{X_train.shape}, y_train 形狀：{y_train.shape}")
print(f"測試集 X_test 形狀：{X_test.shape}, y_test 形狀：{y_test.shape}")

# --- 6. 建立 LSTM 模型 ---
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]))) 
model.add(Dropout(0.3)) 
model.add(LSTM(units=100, return_sequences=False)) 
model.add(Dropout(0.3)) 
model.add(Dense(units=forecast_horizon)) 

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

# --- 7. 模型訓練 ---
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

print("\n開始訓練 LSTM 模型...")
history = model.fit(X_train, y_train, epochs=200, batch_size=32, 
                    validation_split=0.1, 
                    callbacks=[early_stopping], verbose=1)
print("模型訓練完成。")

# 損失曲線
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# --- 8. 模型預測與評估 ---
y_pred_scaled = model.predict(X_test)

y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_test) 

from sklearn.metrics import mean_squared_error, mean_absolute_error

rmse_per_step = [np.sqrt(mean_squared_error(y_true[:, i], y_pred[:, i])) for i in range(forecast_horizon)]
mae_per_step = [mean_absolute_error(y_true[:, i], y_pred[:, i]) for i in range(forecast_horizon)]

print(f"\n測試集 RMSE (每個預測步長): {rmse_per_step}")
print(f"測試集 MAE (每個預測步長): {mae_per_step}")


num_plot_samples = 5 
plt.figure(figsize=(18, 10))
for i in range(num_plot_samples):
    sample_index = np.random.randint(0, len(y_test))
    plt.plot(range(forecast_horizon), y_true[sample_index], label=f'True Sample {sample_index}', linestyle='--', marker='o')
    plt.plot(range(forecast_horizon), y_pred[sample_index], label=f'Predicted Sample {sample_index}', linestyle='-', marker='x')

plt.title(f'TWD_USD Multi-Step Prediction vs True Values (First {num_plot_samples} Random Samples)')
plt.xlabel(f'Forecast Horizon (Days from first prediction day)')
plt.ylabel('TWD_USD')
plt.legend()
plt.grid(True)
plt.show()

# 特定預測步長的預測值 vs 真實值
plt.figure(figsize=(15, 7))
# 預測第一天 (index 0) 的結果
plt.plot(y_true[:, 0], label='True TWD_USD (Day 1 of Forecast)')
plt.plot(y_pred[:, 0], label='Predicted TWD_USD (Day 1 of Forecast)')
plt.title('TWD_USD Prediction vs True Values (First Day of Forecast)')
plt.xlabel('Time Steps')
plt.ylabel('TWD_USD')
plt.legend()
plt.show()

# --- 9. 未來多步驟預測範例 ---
last_sequence_for_forecast = X_scaled[-look_back:]
last_sequence_for_forecast = last_sequence_for_forecast.reshape(1, look_back, X_scaled.shape[1]) 

# 進行多步預測
future_predictions_scaled = model.predict(last_sequence_for_forecast)
future_predictions = scaler_y.inverse_transform(future_predictions_scaled)

print(f"\n基於現有數據，預測未來 {forecast_horizon} 天的 TWD_USD 為:")
for i, pred_val in enumerate(future_predictions[0]):
    print(f"  Day {i+1}: {pred_val:.4f}")