In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# 1. 加载数据
file_path = r'E:\Pythonworkshop\Solar_data_processing\cleaned_data_final_Yulara.csv'
data = pd.read_csv(file_path)

# 删除时间戳列
if 'timestamp' in data.columns:
    data = data.drop(columns=['timestamp'])

# 添加前一时刻的功率数据作为新的特征
data['Prev_Power'] = data['Active_Power'].shift(1)
data['Prev_Power'] = data['Prev_Power'].fillna(0)  # 处理缺失值

# 数据集划分
X = data.drop(columns=['Active_Power'])
y = data['Active_Power']

train_size = int(len(X) * 0.7)
val_size = int(len(X) * 0.15)

X_train = X[:train_size]
y_train = y[:train_size]

X_val = X[train_size:train_size + val_size]
y_val = y[train_size:train_size + val_size]

X_test = X[train_size + val_size:]
y_test = y[train_size + val_size:]

# 使用训练集数据进行归一化
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# 仅使用训练集数据拟合scaler
X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))

# 使用训练集的归一化参数对验证集和测试集进行归一化
X_val_scaled = scaler_X.transform(X_val)
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1))

X_test_scaled = scaler_X.transform(X_test)
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

forecast_horizon = 6  # 未来预测的时间步长
output_size = forecast_horizon  # 输出大小

In [2]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import numpy as np

# 定义函数来计算整体和每个时间步的误差并打印结果
def calculate_and_print_errors(y_true, y_pred):
    # 展平整个预测区间来计算整体误差
    flattened_y_true = y_true.flatten()
    flattened_y_pred = y_pred.flatten()
    
    overall_rmse = np.sqrt(mean_squared_error(flattened_y_true, flattened_y_pred))
    overall_mae = mean_absolute_error(flattened_y_true, flattened_y_pred)
    overall_mape = mean_absolute_percentage_error(flattened_y_true, flattened_y_pred)

    print("Overall errors for all steps combined:")
    print(f"Overall RMSE: {overall_rmse:.4f}")
    print(f"Overall MAE: {overall_mae:.4f}")
    print(f"Overall MAPE: {overall_mape:.4%}\n")

    # 每个时间步的误差计算
    num_steps = y_true.shape[1]
    rmse_per_step = []
    mae_per_step = []
    mape_per_step = []

    for step in range(num_steps):
        rmse_step = np.sqrt(mean_squared_error(y_true[:, step], y_pred[:, step]))
        mae_step = mean_absolute_error(y_true[:, step], y_pred[:, step])
        mape_step = mean_absolute_percentage_error(y_true[:, step], y_pred[:, step])

        rmse_per_step.append(rmse_step)
        mae_per_step.append(mae_step)
        mape_per_step.append(mape_step)

        print(f"Step {step + 1} - RMSE: {rmse_step:.4f}, MAE: {mae_step:.4f}, MAPE: {mape_step:.4%}")

    # 计算每个时间步的平均误差
    avg_rmse = np.mean(rmse_per_step)
    avg_mae = np.mean(mae_per_step)
    avg_mape = np.mean(mape_per_step)

    print("\nAverage errors across all steps:")
    print(f"Average RMSE: {avg_rmse:.4f}")
    print(f"Average MAE: {avg_mae:.4f}")
    print(f"Average MAPE: {avg_mape:.4%}")

    return overall_rmse, overall_mae, overall_mape, rmse_per_step, mae_per_step, mape_per_step


In [4]:
from tensorflow.keras import layers, models
from sklearn.metrics import mean_squared_error
import numpy as np
import tensorflow as tf
# 定义函数来构建和编译 LSTM 模型
def build_lstm_model(input_shape, hidden_size=90):
    model = models.Sequential([
        layers.LSTM(hidden_size, input_shape=input_shape, return_sequences=False),
        layers.Dropout(0.2),
        layers.Dense(output_size)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model

# 定义函数来创建滑动窗口数据
def create_sliding_window_features(X, y, window_size, forecast_horizon=12, step_size=1):
    X_windows = []
    y_windows = []
    for start in range(0, len(X) - window_size - forecast_horizon + 1, step_size):
        end = start + window_size
        X_windows.append(X[start:end].values if isinstance(X, pd.DataFrame) else X[start:end])
        y_windows.append(y[end:end + forecast_horizon].reshape(-1))
    return np.array(X_windows), np.array(y_windows)

# 滑动窗口尺寸列表
window_sizes = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24]
validation_errors = []

# 遍历不同的滑动窗口尺寸并训练模型
for window_size in window_sizes:
    print(f"Training with window_size={window_size}")

    # 创建滑动窗口数据
    X_train_multi, y_train_multi = create_sliding_window_features(X_train_scaled, y_train_scaled, window_size, output_size)
    X_val_multi, y_val_multi = create_sliding_window_features(X_val_scaled, y_val_scaled, window_size, output_size)

    # 检查是否有足够的数据
    if X_train_multi.shape[0] == 0 or X_val_multi.shape[0] == 0:
        print(f"Window size {window_size} resulted in insufficient training/validation data. Skipping.")
        continue

    # 构建模型
    lstm_model = build_lstm_model((window_size, X_train_scaled.shape[1]))

    # 训练模型
    history = lstm_model.fit(
        X_train_multi,
        y_train_multi,
        epochs=50,  # 可根据需要调整
        batch_size=64,
        validation_data=(X_val_multi, y_val_multi),
        verbose=0  # 设置为0以减少输出
    )

    # 计算验证集上的平均 RMSE
    y_val_pred = lstm_model.predict(X_val_multi)
    print("Validation RMSE for window_size={window_size}")
    overall_rmse, overall_mae, overall_mape, rmse_steps, mae_steps, mape_steps = calculate_and_print_errors(y_val_multi, y_val_pred)



Training with window_size=2


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 737us/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2237
Overall MAE: 0.1001
Overall MAPE: 33.9878%

Step 1 - RMSE: 0.1589, MAE: 0.0684, MAPE: 20.9886%
Step 2 - RMSE: 0.1930, MAE: 0.0834, MAPE: 27.9097%
Step 3 - RMSE: 0.2165, MAE: 0.0954, MAPE: 32.0289%
Step 4 - RMSE: 0.2364, MAE: 0.1077, MAPE: 36.4698%
Step 5 - RMSE: 0.2523, MAE: 0.1178, MAPE: 41.4602%
Step 6 - RMSE: 0.2671, MAE: 0.1276, MAPE: 45.0695%

Average errors across all steps:
Average RMSE: 0.2207
Average MAE: 0.1001
Average MAPE: 33.9878%
Training with window_size=3


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 945us/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2339
Overall MAE: 0.1102
Overall MAPE: 35.9863%

Step 1 - RMSE: 0.1640, MAE: 0.0769, MAPE: 24.0375%
Step 2 - RMSE: 0.2019, MAE: 0.0935, MAPE: 30.7680%
Step 3 - RMSE: 0.2257, MAE: 0.1053, MAPE: 35.1773%
Step 4 - RMSE: 0.2502, MAE: 0.1234, MAPE: 39.6773%
Step 5 - RMSE: 0.2649, MAE: 0.1271, MAPE: 42.3045%
Step 6 - RMSE: 0.2775, MAE: 0.1348, MAPE: 43.9533%

Average errors across all steps:
Average RMSE: 0.2307
Average MAE: 0.1102
Average MAPE: 35.9863%
Training with window_size=4


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1000us/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2445
Overall MAE: 0.1157
Overall MAPE: 33.4203%

Step 1 - RMSE: 0.1702, MAE: 0.0787, MAPE: 21.1911%
Step 2 - RMSE: 0.2095, MAE: 0.0967, MAPE: 28.3051%
Step 3 - RMSE: 0.2366, MAE: 0.1120, MAPE: 31.9929%
Step 4 - RMSE: 0.2620, MAE: 0.1281, MAPE: 36.2863%
Step 5 - RMSE: 0.2761, MAE: 0.1342, MAPE: 39.4587%
Step 6 - RMSE: 0.2918, MAE: 0.1448, MAPE: 43.2874%

Average errors across all steps:
Average RMSE: 0.2410
Average MAE: 0.1157
Average MAPE: 33.4203%
Training with window_size=5


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2485
Overall MAE: 0.1215
Overall MAPE: 36.7308%

Step 1 - RMSE: 0.1840, MAE: 0.0936, MAPE: 22.8134%
Step 2 - RMSE: 0.2163, MAE: 0.1062, MAPE: 29.9178%
Step 3 - RMSE: 0.2396, MAE: 0.1176, MAPE: 34.0086%
Step 4 - RMSE: 0.2582, MAE: 0.1241, MAPE: 39.9012%
Step 5 - RMSE: 0.2791, MAE: 0.1372, MAPE: 43.8530%
Step 6 - RMSE: 0.2963, MAE: 0.1504, MAPE: 49.8905%

Average errors across all steps:
Average RMSE: 0.2456
Average MAE: 0.1215
Average MAPE: 36.7308%
Training with window_size=6


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2513
Overall MAE: 0.1149
Overall MAPE: 35.3920%

Step 1 - RMSE: 0.1820, MAE: 0.0839, MAPE: 23.8258%
Step 2 - RMSE: 0.2177, MAE: 0.0976, MAPE: 29.3226%
Step 3 - RMSE: 0.2453, MAE: 0.1121, MAPE: 32.0122%
Step 4 - RMSE: 0.2638, MAE: 0.1202, MAPE: 37.1802%
Step 5 - RMSE: 0.2811, MAE: 0.1309, MAPE: 42.7411%
Step 6 - RMSE: 0.2992, MAE: 0.1446, MAPE: 47.2704%

Average errors across all steps:
Average RMSE: 0.2482
Average MAE: 0.1149
Average MAPE: 35.3920%
Training with window_size=7


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2504
Overall MAE: 0.1140
Overall MAPE: 33.4166%

Step 1 - RMSE: 0.1745, MAE: 0.0763, MAPE: 20.7310%
Step 2 - RMSE: 0.2176, MAE: 0.0987, MAPE: 27.3085%
Step 3 - RMSE: 0.2447, MAE: 0.1135, MAPE: 33.2854%
Step 4 - RMSE: 0.2641, MAE: 0.1204, MAPE: 37.3695%
Step 5 - RMSE: 0.2856, MAE: 0.1378, MAPE: 39.6334%
Step 6 - RMSE: 0.2954, MAE: 0.1371, MAPE: 42.1715%

Average errors across all steps:
Average RMSE: 0.2470
Average MAE: 0.1140
Average MAPE: 33.4166%
Training with window_size=8


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2368
Overall MAE: 0.1067
Overall MAPE: 35.4374%

Step 1 - RMSE: 0.1689, MAE: 0.0745, MAPE: 21.8124%
Step 2 - RMSE: 0.2045, MAE: 0.0895, MAPE: 28.7289%
Step 3 - RMSE: 0.2294, MAE: 0.1024, MAPE: 33.6040%
Step 4 - RMSE: 0.2502, MAE: 0.1143, MAPE: 38.1197%
Step 5 - RMSE: 0.2667, MAE: 0.1236, MAPE: 43.2193%
Step 6 - RMSE: 0.2822, MAE: 0.1358, MAPE: 47.1402%

Average errors across all steps:
Average RMSE: 0.2337
Average MAE: 0.1067
Average MAPE: 35.4374%
Training with window_size=9


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2531
Overall MAE: 0.1106
Overall MAPE: 35.5126%

Step 1 - RMSE: 0.1768, MAE: 0.0741, MAPE: 20.9094%
Step 2 - RMSE: 0.2170, MAE: 0.0932, MAPE: 28.0248%
Step 3 - RMSE: 0.2433, MAE: 0.1062, MAPE: 34.5316%
Step 4 - RMSE: 0.2681, MAE: 0.1200, MAPE: 39.4501%
Step 5 - RMSE: 0.2879, MAE: 0.1304, MAPE: 43.6653%
Step 6 - RMSE: 0.3033, MAE: 0.1398, MAPE: 46.4948%

Average errors across all steps:
Average RMSE: 0.2494
Average MAE: 0.1106
Average MAPE: 35.5126%
Training with window_size=10


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2574
Overall MAE: 0.1155
Overall MAPE: 38.2720%

Step 1 - RMSE: 0.1799, MAE: 0.0788, MAPE: 22.7798%
Step 2 - RMSE: 0.2193, MAE: 0.0974, MAPE: 31.6689%
Step 3 - RMSE: 0.2482, MAE: 0.1108, MAPE: 36.6889%
Step 4 - RMSE: 0.2736, MAE: 0.1247, MAPE: 42.2943%
Step 5 - RMSE: 0.2917, MAE: 0.1349, MAPE: 46.8868%
Step 6 - RMSE: 0.3090, MAE: 0.1464, MAPE: 49.3133%

Average errors across all steps:
Average RMSE: 0.2536
Average MAE: 0.1155
Average MAPE: 38.2720%
Training with window_size=12


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2757
Overall MAE: 0.1280
Overall MAPE: 37.4091%

Step 1 - RMSE: 0.1977, MAE: 0.0896, MAPE: 23.9875%
Step 2 - RMSE: 0.2375, MAE: 0.1081, MAPE: 30.9263%
Step 3 - RMSE: 0.2704, MAE: 0.1266, MAPE: 35.6719%
Step 4 - RMSE: 0.2924, MAE: 0.1386, MAPE: 40.5599%
Step 5 - RMSE: 0.3108, MAE: 0.1484, MAPE: 44.4938%
Step 6 - RMSE: 0.3243, MAE: 0.1565, MAPE: 48.8154%

Average errors across all steps:
Average RMSE: 0.2722
Average MAE: 0.1280
Average MAPE: 37.4091%
Training with window_size=16


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2616
Overall MAE: 0.1171
Overall MAPE: 36.2588%

Step 1 - RMSE: 0.1891, MAE: 0.0828, MAPE: 24.8860%
Step 2 - RMSE: 0.2300, MAE: 0.1007, MAPE: 32.7264%
Step 3 - RMSE: 0.2549, MAE: 0.1147, MAPE: 36.1540%
Step 4 - RMSE: 0.2760, MAE: 0.1252, MAPE: 39.1749%
Step 5 - RMSE: 0.2942, MAE: 0.1358, MAPE: 40.8221%
Step 6 - RMSE: 0.3071, MAE: 0.1435, MAPE: 43.7896%

Average errors across all steps:
Average RMSE: 0.2586
Average MAE: 0.1171
Average MAPE: 36.2588%
Training with window_size=24


  super().__init__(**kwargs)


[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Validation RMSE for window_size={window_size}
Overall errors for all steps combined:
Overall RMSE: 0.2726
Overall MAE: 0.1184
Overall MAPE: 36.2375%

Step 1 - RMSE: 0.2054, MAE: 0.0860, MAPE: 25.3313%
Step 2 - RMSE: 0.2456, MAE: 0.1037, MAPE: 31.5703%
Step 3 - RMSE: 0.2721, MAE: 0.1192, MAPE: 36.1281%
Step 4 - RMSE: 0.2884, MAE: 0.1265, MAPE: 39.0760%
Step 5 - RMSE: 0.2995, MAE: 0.1339, MAPE: 41.5994%
Step 6 - RMSE: 0.3106, MAE: 0.1409, MAPE: 43.7197%

Average errors across all steps:
Average RMSE: 0.2703
Average MAE: 0.1184
Average MAPE: 36.2375%
