In [7]:
import pandas as pd  
import numpy as np  
from sklearn.preprocessing import MinMaxScaler  
from sklearn.model_selection import train_test_split  
from keras.models import Sequential  
from keras.layers import Dense, LSTM, Bidirectional  
#from keras.optimizers import adam_v2 
from keras.callbacks import EarlyStopping  
  
# 假设df是你的原始DataFrame，LABEL是'出力(MW)'  
LABEL = '出力(MW)'  

df_train = pd.read_csv('data/A榜-训练集_海上风电预测_气象变量及实际功率数据.csv', encoding='gbk')
df_test = pd.read_csv('data/A榜-测试集_海上风电预测_气象变量数据.csv', encoding='gbk')

add_df = pd.read_csv('data/A榜-训练集_海上风电预测_基本信息.csv', encoding='gbk')
print(df_test.columns)
df = pd.concat([df_train, df_test])
df = df.merge(add_df[['站点编号', '装机容量(MW)']], on='站点编号', how='left')
df['站点编号_le'] = df['站点编号'].map(lambda x: int(x[1]))

df = df[df[LABEL]!='<NULL>'].reset_index(drop=True)
df[LABEL] = df[LABEL].astype('float32')


# 划分特征和目标变量  
df['time'] = pd.to_datetime(df['时间'], format='mixed')


df_train = df[df['time']< '2023-05-01 0:0:0']
df_test = df[df['time'] >= '2023-05-01 0:0:0']

X = df_train.drop(columns=[LABEL, '时间', 'time', '站点编号'])  
y = df_train[LABEL]  

print(X)
print(y)
# 数据标准化  
Xscaler = MinMaxScaler() 
Yscaler = MinMaxScaler() 
X_scaled = Xscaler.fit_transform(X)  
y_scaled = Yscaler.fit_transform(y.values.reshape(-1, 1))  


print(X_scaled.shape)
print(X_scaled)

print(y_scaled.shape)
print(y_scaled)

# # 将时间序列数据转换为监督学习问题  
# def create_dataset(X, y, time_steps=1):  
#     Xs, ys = [], []  
#     for i in range(len(X)-time_steps-1):  
#         v = X[i:(i+time_steps), :]  
#         Xs.append(v)  
#         ys.append(y[i + time_steps])  
#     return np.array(Xs), np.array(ys)  


# time_steps = 10  # 你可以根据数据情况调整这个时间步长  
# X_train, y_train = create_dataset(X_scaled, y_scaled, time_steps)  
# print(X_train.shape)
# print(X_train)

X_train = X_scaled
y_train = y_scaled

# 重塑输入数据以匹配LSTM的期望格式：[samples, time steps, features]  
Xtrain = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))  
ytrain = np.reshape(y_train, (y_train.shape[0], 1))  
  
# 划分训练集和测试集  
# X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)  
X_train = Xtrain[df_train['time'] < '2023-02-01 0:0:0']
y_train = ytrain[df_train['time'] < '2023-02-01 0:0:0']
X_val = Xtrain[df_train['time'] >= '2023-02-01 0:0:0']
y_val = ytrain[df_train['time'] >= '2023-02-01 0:0:0']

# print(X_train.shape)
# print(X_train)
# print(y_train.shape)
# print(y_train)
# print(X_test.shape)
# print(X_test)
# print(y_test.shape)
# print(y_test)

# 构建BiLSTM模型  
model = Sequential()  
model.add(Bidirectional(LSTM(50, return_sequences=True), input_shape=(X_train.shape[1], 1)))  
model.add(Bidirectional(LSTM(50)))  
model.add(Dense(1))  
  
# 编译模型  
model.compile(optimizer='adam', loss='mean_squared_error')  
  
# 设置早停策略  
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)  
  
# 训练模型  
history = model.fit(X_train, y_train, epochs=100, batch_size=96, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=1)  
  
# 预测  
y_pred = model.predict(X_val)  

print(y_pred.shape)
print(y_pred)
# 反标准化预测值  
y_pred = Yscaler.inverse_transform(y_pred)  
y_val = Yscaler.inverse_transform(y_val)  
print(y_pred.shape)
print(y_pred)
print(y_val.shape)
print(y_val)
# 评估模型  
rmse = np.sqrt(np.mean((y_val - y_pred) ** 2))
print('Root Mean Squared Error:', rmse)  
score = 1/(1+rmse)
print('Score:', score)  


X = df_test.drop(columns=[LABEL, '时间', 'time', '站点编号'])  
X_scaled = Xscaler.fit_transform(X) 

y_pred = model.predict(X_scaled)  
print(y_pred.shape)
print(y_pred)
y_pred = Yscaler.inverse_transform(y_pred) 
print(y_pred)
y_pred = np.reshape(y_pred, y_pred.shape[0])
df_test.loc[:, LABEL] = y_pred

df_test[['站点编号','时间','出力(MW)']].to_csv('ans/bilstm_base_%.5f.csv'%score, index=False)


Index(['站点编号', '时间', '气压(Pa）', '相对湿度（%）', '云量', '10米风速（10m/s）', '10米风向（°)',
       '温度（K）', '辐照强度（J/m2）', '降水（m）', '100m风速（100m/s）', '100m风向（°)'],
      dtype='object')
             气压(Pa）  相对湿度（%）        云量  10米风速（10m/s）  10米风向（°)     温度（K）  \
0       102249.6094  74.8513  0.007812        7.7041   26.5195  286.0695   
1       102252.0355  74.7530  0.000924        7.7710   23.5766  285.8647   
2       102248.5900  74.4995  0.003009        7.8272   21.5451  285.6935   
3       102240.4725  74.1432  0.011402        7.8637   20.2394  285.5512   
4       102228.8828  73.7366  0.023438        7.8781   19.4870  285.4330   
...             ...      ...       ...           ...       ...       ...   
231820  101184.7351  79.6624  0.000000        3.8169   13.2671  290.1312   
231821  101179.6875  76.9828  0.031250        3.8725   11.7285  290.1259   
231822  101179.8332  75.1280  0.186490        3.9129   12.5525  290.0896   
231823  101182.6311  73.8004  0.382890        3.9437   14.7787  290.033

AttributeError: module 'keras.src.backend' has no attribute 'Variable'

In [16]:
print(np.sqrt(0.0069))

0.08306623862918075


In [3]:
!pip uninstall keras

^C
