## 利用LSTM进行负荷预测

----------

程序架构：
1. 文件读取
2. 数据预处理
    - 转化为df
    - 归一化
    - 转化为监督学习df
    - 数据集分割(6:2:2)
3. 模型

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

import requests
import csv
import os
from datetime import datetime
import time

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

# Load and prepare data

In [12]:
# 读取数据

file_path = r'C:\Users\KAI\Source\kais_lstm_load_forecasting\A_10000151_1.csv'

# 第一行做列名(header)\第一列做索引(id)\解析第二列为日期
# 参考https://www.cnblogs.com/traditional/p/12514914.html
data_raw = pd.read_csv(file_path, header = 0)

# values = data_raw.values # 转化为array
# values[:,1].astype('float32') #调整数据格式

# 调整时间戳
data_raw['ts'] = pd.to_datetime(data_raw['ts'], unit='ms')
data_raw.index=data_raw['ts']
del data_raw['ts']

# normalize
scaler = MinMaxScaler(feature_range=(0, 1))
dt_scaled = scaler.fit_transform(data_raw)  # dt_scaled is now a numpy array

In [13]:
data_raw

dt_scaled

array([[0.77350946],
       [0.77553396],
       [0.77548335],
       ...,
       [0.79911934],
       [0.80149813],
       [0.79881567]])

In [14]:
# convertseries to supervised learning

def series_to_supervised(data, n_in=1, n_out=1, drop_nan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    colums, names = [],[]
    
    # 输入序列 (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        colums.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]

    # 预测序列 (t, t+1, ... t+n)
    for i in range(0, n_out):
        colums.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
        else: 
            names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]

    # put it all together
    agg = pd.concat(colums, axis=1)
    agg.columns = names

    # drop rows with NaN values
    if drop_nan:
        agg.dropna(inplace=True)

    return agg


In [15]:
# 用前十步预测当前
df_dt_reframed = series_to_supervised(dt_scaled,60,1) 

In [16]:
# 划分数据集
split_idx_1 = int(df_dt_reframed.values.shape[0] * 0.6)
split_idx_2 = int(df_dt_reframed.values.shape[0] * 0.8)
train_set, valid_set, test_set = df_dt_reframed.values[:split_idx_1, :], df_dt_reframed.values[split_idx_1:split_idx_2, :], df_dt_reframed.values[split_idx_2:, :]
'''训练集：train_set | 验证集：valid_set | 测试集：test_set'''

'训练集：train_set | 验证集：valid_set | 测试集：test_set'

In [17]:
# 划分输入输出（最后一列为输出/待预测值）
train_X, train_Y = train_set[:, :-1], train_set[:, -1]
valid_X, valid_Y = valid_set[:, :-1], valid_set[:, -1]
test_X, test_Y = test_set[:, :-1], test_set[:, -1]
train_X.shape

(24420, 60)

In [18]:
# reshape input to be 3D [samples, timesteps, dim]
train_X = train_X.reshape((-1, train_X.shape[1], 1))
valid_X = valid_X.reshape((-1, valid_X.shape[1], 1))
test_X = test_X.reshape((-1, test_X.shape[1], 1))

In [19]:
# 设计网络
model = Sequential()

model.add(LSTM(units = 50, return_sequences = True, input_shape = (train_X.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(units = 50))
model.add(Dropout(0.2))

model.add(Dense(units = 1))

# 修改监控度量（默认 loss）：metrics=['mse']
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

history = model.fit(train_X, train_Y, epochs = 50, batch_size = 100)

# # evaluatethe model
# scores =model.evaluate(test_X, test_Y)
# #print scores
# #lcd print("\n\n\t%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [20]:
# model.summary()
history.history

{'loss': [0.020044738426804543,
  0.006574369966983795,
  0.005642150528728962,
  0.004795796703547239,
  0.004317102953791618,
  0.0039339568465948105,
  0.003585958853363991,
  0.0032764943316578865,
  0.002851820085197687,
  0.0025752047076821327,
  0.0022582109086215496,
  0.0020171424839645624,
  0.0017851293087005615,
  0.001609625993296504,
  0.0014679612359032035,
  0.0013391454704105854,
  0.001189196016639471,
  0.0010916643077507615,
  0.0009741337853483856,
  0.0008762428187765181,
  0.0008334920858033001,
  0.0008078789687715471,
  0.0007480327622033656,
  0.0007036455208435655,
  0.000679460063111037,
  0.0006917577120475471,
  0.0006837324472144246,
  0.0006697788485325873,
  0.00061029102653265,
  0.0006319688982330263,
  0.0006318446830846369,
  0.0005821312661282718,
  0.0006136958836577833,
  0.0005753532750532031,
  0.000590311479754746,
  0.0005672619445249438,
  0.0005493147764354944,
  0.0005518516409210861,
  0.0005324206431396306,
  0.0005433905171230435,
  0.0

In [21]:
#预测
predicted = model.predict(test_X)
#对预测数据还原。
predicted = scaler.inverse_transform(predicted)

real_value = scaler.inverse_transform(test_Y)

ValueError: Expected 2D array, got 1D array instead:
array=[0.8144549  0.80347201 0.8001822  ... 0.79911934 0.80149813 0.79881567].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
plt.plot(real_value, color = 'red' , label = 'real_value')
plt.plot(predicted, color = 'blue', label = 'predicted')
plt.title('load predict')
plt.xlabel('Time')
plt.ylabel('Load Value')
plt.legend()
plt.show()