## 利用LSTM进行负荷预测

----------

程序架构：
1. 文件读取
2. 数据预处理
    - 转化为df
    - 归一化
    - 转化为监督学习df
    - 数据集分割(6:2:2)
3. 模型

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

import requests
import csv
import os
from datetime import datetime
import time

import tensorflow as tf
from keras.models import Sequential

from keras.layers import Dense
from keras.layers import LSTM

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


# Load and prepare data

In [2]:
# 读取数据

file_path = r'C:\Users\KAI\Source\kais_lstm_load_forecasting\真空泵空压机(A_10000151_1).csv'

# 第一行做列名(header)\第一列做索引(id)\解析第二列为日期
# 参考https://www.cnblogs.com/traditional/p/12514914.html
data_raw = pd.read_csv(file_path, header = 0)

values = data_raw.values # 转化为array
values[:,1].astype('float32') #调整数据格式

# 调整时间戳
data_raw['ts'] = pd.to_datetime(data_raw['ts'], unit='ms')
data_raw.index=data_raw['ts']
del data_raw['ts']

# normalize
scaler = MinMaxScaler(feature_range=(0, 1))
dt_scaled = scaler.fit_transform(data_raw)  # dt_scaled is now a numpy array

# 划分数据集
split_idx_1 = int(len(dt_scaled) * 0.6)
split_idx_2 = int(len(dt_scaled) * 0.8)
train_set, valid_set, test_set = dt_scaled[:split_idx_1, :], dt_scaled[split_idx_1:split_idx_2, :], dt_scaled[split_idx_2:, :]

'''训练集：train_set | 验证集：valid_set | 测试集：test_set'''

'训练集：train_set | 验证集：valid_set | 测试集：test_set'

In [3]:
train_set

array([[0.83414313],
       [0.83257415],
       [0.83206802],
       ...,
       [0.79431117],
       [0.79137565],
       [0.79188177]])

In [4]:
# convertseries to supervised learning

def series_to_supervised(data, n_in=1, n_out=1, drop_nan=True):
    """
	Frame a time series as a supervised learning dataset.
	Arguments:
		data: Sequence of observations as a list or NumPy array.
		n_in: Number of lag observations as input (X).
		n_out: Number of observations as output (y).
		dropnan: Boolean whether or not to drop rows with NaN values.
	Returns:
		Pandas DataFrame of series framed for supervised learning.
	"""
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    colums, names = [],[]
    
    # 输入序列 (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        colums.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]

    # 预测序列 (t, t+1, ... t+n)
    for i in range(0, n_out):
        colums.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
        else: 
            names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]

    # put it all together
    agg = pd.concat(colums, axis=1)
    agg.columns = names

    # drop rows with NaN values
    if drop_nan:
        agg.dropna(inplace=True)

    return agg


In [5]:
train = series_to_supervised(train_set,2,2)

In [6]:
train.head()

Unnamed: 0,var1(t-2),var1(t-1),var1(t),var1(t+1)
2,0.834143,0.832574,0.832068,0.832878
3,0.832574,0.832068,0.832878,0.787985
4,0.832068,0.832878,0.787985,0.787681
5,0.832878,0.787985,0.787681,0.789149
6,0.787985,0.787681,0.789149,0.790161


In [7]:
# split into train and test sets

values = reframed.values
n_train_hours = 365 * 24 * 4 + 7920 + 150
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]
# splitinto input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# reshapeinput to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)


NameError: name 'reframed' is not defined

In [None]:
# designnetwork

model =Sequential()
model.add(LSTM(5, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')

# fitnetwork
history =model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X,test_y), verbose=2, shuffle=False)

# evaluatethe model
scores =model.evaluate(test_X, test_y)