In [1]:
from keras.models import Sequential, Model
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers.normalization import BatchNormalization
from keras.utils import plot_model
from keras.models import load_model
from keras import metrics
import numpy as np
import os
import json
import keras
import matplotlib.pyplot as plt
import math
from keras import losses
import shutil
from keras import backend as K
from keras import optimizers
from sklearn import preprocessing

Using TensorFlow backend.


In [2]:
np.set_printoptions(suppress=True)

In [3]:
part_D_station = np.load('part_D_station.npy')

In [4]:
fileNames = os.listdir('dataImage/')
fileNames.sort()
filePaths = [os.path.join(os.path.abspath('dataImage/'), fileName) for fileName in fileNames]

In [6]:
# 发送消息
from twilio.rest import Client
def sendMessage(sentInfo):
    # Your Account SID from twilio.com/console
    account_sid = "ACc807eaedbd160252f16250798533706f"
    # Your Auth Token from twilio.com/console
    auth_token  = "3199024dc7475dcd653f98a40cd16648"
    client = Client(account_sid, auth_token)
    message = client.messages.create(
    # 这里中国的号码前面需要加86
        to="8613246857840", 
        from_="(425) 341-1571 ",
        body="{0}".format(sentInfo))
#     print(message.sid)

In [7]:
# 产生数据，返回数据为(timeStep, 56, 56, 1),内容索引为

def data_part(origin_array, target_array):
    l = []
    origin_array = np.reshape(origin_array, (-1, 3))
    for i in range(len(origin_array)):
        if(origin_array[i][0] in target_array):
            l.append(origin_array[i])
    array = np.array(l)
    array = np.reshape(array, (28, 28, 3))
    return array

def dataGenTimeStep(filePaths, start, timeStep):
    l = []
    for i in range(timeStep):
        data = np.load(filePaths[start+i])
#         print (filePaths[start+i], '读取完毕')  # 输出一个batch中的文件
        data = data[:,:,1]
        data = data[:,:,np.newaxis]
        l.append(data)
    dataArray = np.array(l)
    return dataArray

# 产生一个batch_size的数据
def dataGenBatchSize(filePaths, timeStep, start, predictFutureTime, batch_size, station_array):
    l = []
    m = []
    for i in range(batch_size):    
        data = dataGenTimeStep(filePaths, start + i , timeStep)
        l.append(data)
        targetIndex = start + i + timeStep - 1 + predictFutureTime/5
        targetIndex = int(targetIndex)
        target = np.load(filePaths[targetIndex])
#         print ('索引文件为', filePaths[targetIndex])    #测试目标文件是否正确

        target = data_part(target, station_array)
        
        target = target[:,:,1]
        target = target[:,:,np.newaxis]
        m.append(target)
    data = np.array(l)
    target = np.array(m)
    return (data, target)

# timeStep---预测的时间步
# predictFutureTime--预测未来的时间
# batch_size -- 每次输入的batch，更新一次
def dataGenerate(filePaths, timeStep, predictFutureTime, batch_size, station_array):
    while True:
        end_index = len(filePaths) - timeStep - 3
        L = np.arange(0, end_index, batch_size)
        for start in L:
            if start < L[-1]:
                data, target = dataGenBatchSize(filePaths, timeStep, start, predictFutureTime, batch_size, station_array)
            if start == L[-1]:
                batch_size_ = end_index - L[-1] + 1
                data, target = dataGenBatchSize(filePaths, timeStep, start, predictFutureTime, batch_size, station_array)
            yield (data, target)

In [8]:
# 定义损失函数 MSPE

def mean_squared_percentage_error(y_true, y_pred):
    if not K.is_tensor(y_pred):
        y_pred = K.constant(y_pred, dtype = 'float64')
    y_true = K.cast(y_true, y_pred.dtype)
    return K.mean(K.square((y_pred - y_true)/K.clip(y_true, K.epsilon(), None)))


# 定义评价函数metrics, MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    if not K.is_tensor(y_pred):
        y_pred = K.constant(y_pred, dtype = 'float64')
    y_true = K.cast(y_true, y_pred.dtype)
    return K.mean(K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), None)))


In [9]:
metrics = [mean_absolute_percentage_error]

In [16]:
seq = Sequential()

seq.add(ConvLSTM2D(filters = 32, kernel_size = (3, 3),
                   input_shape = (None, 56, 56, 1),
                   padding = 'same', return_sequences = True))

seq.add(BatchNormalization())


seq.add(ConvLSTM2D(filters = 32, kernel_size = (3, 3),
                   padding = 'same', return_sequences = True))

seq.add(BatchNormalization())

seq.add(ConvLSTM2D(filters = 1, kernel_size = (3, 3), strides = (2, 2),
                   padding = 'same', return_sequences = False))

## 损失函数定义为 mean_squared_percentage_error
adam = keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, amsgrad=True)
seq.compile(loss = mean_squared_percentage_error, optimizer = adam, metrics = metrics)

In [17]:
# checkpoint
savename = "{epoch:02d}-{val_loss:.2f}.hdf5"
os.mkdir('model')
# 中途训练效果提升, 则将文件保存, 每提升一次, 保存一次
checkpoint = keras.callbacks.ModelCheckpoint(os.path.join('model', savename), monitor='val_loss', verbose=1, save_best_only=False, mode='auto')

In [18]:
seq.summary()
# plot_model(seq, show_layer_names = True, show_shapes = True)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d_4 (ConvLSTM2D)  (None, None, 56, 56, 32)  38144     
_________________________________________________________________
batch_normalization_3 (Batch (None, None, 56, 56, 32)  128       
_________________________________________________________________
conv_lst_m2d_5 (ConvLSTM2D)  (None, None, 56, 56, 32)  73856     
_________________________________________________________________
batch_normalization_4 (Batch (None, None, 56, 56, 32)  128       
_________________________________________________________________
conv_lst_m2d_6 (ConvLSTM2D)  (None, 28, 28, 1)         1192      
Total params: 113,448
Trainable params: 113,320
Non-trainable params: 128
_________________________________________________________________


In [14]:
# seq.metrics_names

class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.mape = []

#         print ()
#         print ('*'*10, 'batch begin', '*'*10)
#         print ("logs:", logs)
#         print ('*'*10, 'batch begin', '*'*10)

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.mape.append(logs.get('mean_absolute_percentage_error'))

#         print ()
#         print ('*'*10, 'batch end', '*'*10)
#         print ("logs:", logs)
#         print ('*'*10, 'batch end', '*'*10)

In [19]:
total = len(filePaths)
EPOCHS = 10
timeStep = 36
predictFutureTime = 15
batch_size = 16

history_train = LossHistory()
station_array = np.load('part_D_station.npy')
# 训练数据集
# dataGenerate(filePaths, timeStep, predictFutureTime, batch_size)
trainStart = 0
trainEnd = math.floor(total/10*8)
train_steps = math.ceil((trainEnd - trainStart)/batch_size)
train_file_paths = filePaths[trainStart:trainEnd]
train_generator = dataGenerate(train_file_paths, timeStep, predictFutureTime, batch_size, station_array = station_array)

# 验证数据
valStart = math.floor(total/10*8)
valEnd = math.floor(total/10*9)
val_steps = math.ceil((valEnd - valStart)/batch_size)
val_file_paths = filePaths[valStart:valEnd]
val_generator = dataGenerate(val_file_paths, timeStep, predictFutureTime, batch_size, station_array = station_array)

In [13]:
seq.metrics_names

['loss', 'mean_absolute_percentage_error']

In [None]:
historyTrainEpoch = seq.fit_generator(generator = train_generator, steps_per_epoch = train_steps, \
                      epochs = EPOCHS, callbacks = [history_train, checkpoint], validation_data = val_generator, \
                      validation_steps = val_steps, verbose = 1)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/6

Epoch 00001: saving model to weights-improvement-01-139.97.hdf5
Epoch 2/6

Epoch 00002: saving model to weights-improvement-02-5.84.hdf5
Epoch 3/6

In [13]:

## save history
print (historyTrainEpoch.history)

## transfom the data to json
class MyEncoder(json.JSONEncoder):
 
    def default(self, obj):
        """
        只要检查到了是bytes类型的数据就把它转为str类型
        :param obj:
        :return:
        """
        if isinstance(obj, np.float32):
            return str(obj)
        return json.JSONEncoder.default(self, obj)

# 保存训练过程中的loss，以及验证过程中的loss、mspe
with open('process_data_each_time.json', 'w') as f:
    json.dump(historyTrainEpoch.history, f, cls=MyEncoder)
np.save('history_train_loss.npy', np.array(history_train.losses))
np.save('history_train_mape.npy', np.array(history_train.mape))

# 保存模型(使用callback对每轮的迭代进行保存，因此不用该方法保存)
# if os.path.exists('model'):
#     shutil.rmtree('model')
# os.makedirs('model')
# model_path = os.path.join('model','model.h5')
# seq.save(model_path)

NameError: name 'historyTrainEpoch' is not defined

In [None]:
# #  加载模型(模型中自定义的函数，需要使用custom_objects进行指明)
# model_path = os.path.join('model/model' ,'model.h5')
# seq = load_model(model_path, custom_objects={'my_loss': my_loss,'mean_squared_percentage_error':mean_squared_percentage_error})
# history_evaluate = LossHistory()

# # 测试数据
# testStart = math.floor(total/10*9)
# testEnd = math.floor(total)
# test_steps = math.ceil((testEnd - testStart)/batch_size)
# test_file_paths = filePaths[testStart:testEnd]
# test_data = dataGenerate(test_file_paths, timeStep, predictFutureTime, batch_size)

# seq.summary()
# result_evaluate = seq.evaluate_generator(test_data, steps = test_steps, \
#                                               callbacks = [history_evaluate], verbose = 0)

In [14]:
ConvLSTM2D?

[0;31mInit signature:[0m
[0mConvLSTM2D[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfilters[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkernel_size[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstrides[0m[0;34m=[0m[0;34m([0m[0;36m1[0m[0;34m,[0m [0;36m1[0m[0;34m)[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpadding[0m[0;34m=[0m[0;34m'valid'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdata_format[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdilation_rate[0m[0;34m=[0m[0;34m([0m[0;36m1[0m[0;34m,[0m [0;36m1[0m[0;34m)[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mactivation[0m[0;34m=[0m[0;34m'tanh'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrecurrent_activation[0m[0;34m=[0m[0;34m'hard_sigmoid'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0muse_bias[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkernel_initializer[0m[0;34m=[0m[0;34m'glorot_uniform'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m   