In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv('data_decomposed.csv')

In [2]:
from sklearn import metrics
def print_metrics(pred, y_vals):
    print('mape: ', metrics.mean_absolute_percentage_error(y_vals, pred))
    print('mae: ', metrics.mean_absolute_error(y_vals, pred))
    print('mse: ', metrics.mean_squared_error(y_vals, pred))
    print('rmse: ', np.sqrt(metrics.mean_squared_error(y_vals, pred)))
    print('r2: ', metrics.r2_score(y_vals, pred))
    count = 0
    y_error = pred.flatten() - y_vals.flatten()
    y_error = np.array([abs(e) for e in y_error]).flatten()
    for i in range(len(y_error)):
        if(y_error[i] < 0.15 * y_vals[i]):
            count += 1
    print('15% 准确度: ', count / len(pred))

In [3]:
import os
feature="wind"
split_line = 5828
window = 8
predict_num = 1
x_row_list = []
y_row_list = []
for row in range(split_line-window-predict_num+1):
    x_row_list.append(dict(("x"+str(index),data[feature][row+index]) for index in range(window)))
    y_row_list.append(dict(("y"+str(index),data[feature][row+index+window]) for index in range(predict_num)))
X=pd.DataFrame(x_row_list)
y=pd.DataFrame(y_row_list)
X

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7
0,2.353996,3.061996,2.177998,1.043999,1.125996,3.483997,0.051998,5.151997
1,3.061996,2.177998,1.043999,1.125996,3.483997,0.051998,5.151997,2.963997
2,2.177998,1.043999,1.125996,3.483997,0.051998,5.151997,2.963997,2.063995
3,1.043999,1.125996,3.483997,0.051998,5.151997,2.963997,2.063995,3.645996
4,1.125996,3.483997,0.051998,5.151997,2.963997,2.063995,3.645996,5.103996
...,...,...,...,...,...,...,...,...
5815,6.301998,4.353996,4.503998,4.339996,5.899998,3.467999,2.217998,0.933998
5816,4.353996,4.503998,4.339996,5.899998,3.467999,2.217998,0.933998,0.831997
5817,4.503998,4.339996,5.899998,3.467999,2.217998,0.933998,0.831997,0.051998
5818,4.339996,5.899998,3.467999,2.217998,0.933998,0.831997,0.051998,0.705997


In [4]:
y

Unnamed: 0,y0
0,2.963997
1,2.063995
2,3.645996
3,5.103996
4,3.463997
...,...
5815,0.831997
5816,0.051998
5817,0.705997
5818,0.659996


In [5]:
X.shape

(5820, 8)

In [6]:
from sklearn.preprocessing import StandardScaler
yscaler = StandardScaler()
y_train = yscaler.fit_transform(y.to_numpy())
pd.DataFrame(y_train)

Unnamed: 0,0
0,-0.458198
1,-0.839920
2,-0.168938
3,0.449451
4,-0.246130
...,...
5815,-1.362453
5816,-1.693278
5817,-1.415894
5818,-1.435405


In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X.to_numpy())
from sklearn.decomposition import KernelPCA
pca = KernelPCA(kernel='rbf')
X_train = pca.fit_transform(X_train)
X_train.shape

(5820, 5810)

In [8]:
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_train.shape

(5820, 1, 5810)

In [9]:
from tensorflow import keras
modelname='wind_gru'
if os.path.isdir(modelname):
    model = keras.models.load(modelname)
else:
    model = keras.models.Sequential()
    model.add(keras.layers.GRU(1024,input_shape = (predict_num, X_train.shape[2]),return_sequences = True))
    model.add(keras.layers.GRU(512,activation = 'gelu', recurrent_activation = 'gelu',return_sequences = True))
    model.add(keras.layers.Dense(predict_num))
    es_callback = keras.callbacks.EarlyStopping(patience = 1, restore_best_weights = True, monitor="loss")
    model.compile(loss = keras.losses.Huber(), optimizer = keras.optimizers.Nadam(0.001))
    model.summary()
    keras.utils.plot_model(model, to_file=modelname+'model_plot.pdf', show_shapes=True, show_layer_names=True)
    history=model.fit(X_train, y_train, epochs = 10, verbose = 1, shuffle = True, callbacks = [es_callback])
    import matplotlib.pyplot as plt
    %matplotlib notebook
    plt.rcParams['font.size']=64
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    pic = plt.figure(figsize=(32,32))
    plt.plot(history.history['loss'])
    plt.title(u'基于风速历史的ICEEDMAN-GRU模型训练损失变化')
    plt.xlabel(u'Epoch')
    plt.ylabel(u'损失')
    pic.savefig(modelname + "_training_loss.pdf")
    model.save(modelname)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 1, 1024)           21000192  
                                                                 
 gru_1 (GRU)                 (None, 1, 512)            2362368   
                                                                 
 dense (Dense)               (None, 1, 1)              513       
                                                                 
Total params: 23,363,073
Trainable params: 23,363,073
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<IPython.core.display.Javascript object>



INFO:tensorflow:Assets written to: wind_gru\assets


INFO:tensorflow:Assets written to: wind_gru\assets


In [10]:
predicted = yscaler.inverse_transform(model.predict(X_train)).flatten()
predicted[predicted < 0] = 0
row_list = y_row_list.copy()
for pointer in range(X.shape[0],len(data)-split_line+X.shape[0]):
    input_np = np.array([row_list[row]['y0'] for row in range(pointer-window, pointer)])
    input_np = scaler.transform(input_np.reshape(1, -1))
    input_np = pca.transform(input_np)
    input_np = np.reshape(input_np, (input_np.shape[0], 1, input_np.shape[1]))
    row_list.append(dict(("y"+str(index),yscaler.inverse_transform(model.predict(input_np))[0][index][0]) for index in range(predict_num)))

predictions = pd.DataFrame(row_list)[X.shape[0]:]
predictions[predictions < 0] = 0

In [11]:
y_train = y.to_numpy()

In [12]:
print_metrics(predicted, y_train)

mape:  0.4052007097440886
mae:  0.8617081686842347
mse:  1.4221749494201408
rmse:  1.1925497681103883
r2:  0.7441643084132976
15% 准确度:  0.43350515463917527


In [18]:
print_metrics(predictions.to_numpy(), data[feature][split_line:].to_numpy())

mape:  0.16705489208714336
mae:  0.3000073288671494
mse:  0.20550836976509262
rmse:  0.4533303097798476
r2:  -0.27278081509514807
15% 准确度:  0.75


In [14]:
generated = pd.DataFrame(data['ds'])[window:]
generated['y_pred'] = np.append(predicted, predictions)
generated['y_real'] = np.array(pd.DataFrame(data[feature])[window:])
generated.to_csv(modelname + ".csv")

In [15]:
import matplotlib.pyplot as plt
%matplotlib notebook
plt.rcParams['font.size']=64
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False

fore_line=4100
end_line=4200
pic = plt.figure(figsize=(32,32))
plt.plot(y_train[fore_line:end_line],'g',predicted[fore_line:end_line],'r')
plt.title('基于风速历史的ICEEDMAN-GRU模型训练集结果')
plt.plot(y_train[fore_line:end_line],'g',label=u'真实值')
plt.plot(predicted[fore_line:end_line],'r',label=u'预测值')
plt.xlabel(u'时间（3小时）')
plt.ylabel(u'近地面全风速（m/s）')
plt.legend()
pic.savefig(modelname + "_predict_train.pdf")

<IPython.core.display.Javascript object>

In [16]:
import matplotlib.pyplot as plt
%matplotlib notebook
plt.rcParams['font.size']=64
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False

pic = plt.figure(figsize=(32,32))
plt.plot(data[feature][split_line:].to_numpy(),'g',predictions.to_numpy(),'r')
plt.title('基于风速历史的ICEEDMAN-GRU模型测试集结果')
plt.plot(data[feature][split_line:].to_numpy(),'g',label=u'真实值')
plt.plot(predictions.to_numpy(),'r',label=u'预测值')
plt.xlabel(u'时间（3小时）')
plt.ylabel(u'近地面全风速（m/s）')
plt.legend()
pic.savefig(modelname + "_predict_test.pdf")

<IPython.core.display.Javascript object>