In [14]:
import time
import math
import tensorflow as tf
import keras 
import numpy as np
import pandas as pd
from keras.models import load_model,Model
from keras.engine.topology import Layer

In [15]:
# 定义融合层，将深度学习算法与历史均值算法融合
class Merge_Layer(Layer):
    def __init__(self, **kwargs):
        super(Merge_Layer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.para1 = self.add_weight(shape=(input_shape[0][1], input_shape[0][2]),
                                     initializer='uniform', trainable=True,
                                     name='para1')
        self.para2 = self.add_weight(shape=(input_shape[1][1], input_shape[1][2]),
                                     initializer='uniform', trainable=True,
                                     name='para2')
        super(Merge_Layer, self).build(input_shape)

    def call(self, inputs):
        mat1 = inputs[0]
        mat2 = inputs[1]
        output = mat1 * self.para1 + mat2 * self.para2
        # output = mat1 * 0.1 + mat2 * 0.9
        return output

    def compute_output_shape(self, input_shape):
        return input_shape[0]

In [16]:
#定义精度评价指标。为防止0值附近相对误差过大而导致的异常，定义mask层。
def mape_loss_func(preds, labels):
    mask = labels > 5
    return np.mean(np.fabs(labels[mask]-preds[mask])/labels[mask])

def smape_loss_func(preds, labels):
    mask= labels > 5
    return np.mean(2*np.fabs(labels[mask]-preds[mask])/(np.fabs(labels[mask])+np.fabs(preds[mask])))

def mae_loss_func(preds, labels):
    mask= labels > 5
    return np.fabs((labels[mask]-preds[mask])).mean()

def eliminate_nan(b):
    a = np.array(b)
    c = a[~np.isnan(a)]
    return c

In [17]:
near_road = np.array(pd.read_csv('2small_network_nearest_road_id.csv',header = None))
flow = np.array(pd.read_csv(r"2small_network_speed.csv", header= None)) #注意header=0 or None

# 利用滑动窗口的方式，重构数据为(n，最近路段数，输入时间窗，总路段数)的形式

time3 = time.time()

k = 5 # 参数k为需考虑的最近路段数
t_p = 29 # 参数t_p为总时间序列长度（天）
t_input = 12 #参数t_input为输入时间窗(5min颗粒度)
t_pre = 3 #参数t_pre为预测时间窗(5min颗粒度)
num_links = 30 #参数num_links为总路段数


image = []
for i in range(np.shape(near_road)[0]):
    road_id = []
    for j in range(k):
        road_id.append(near_road[i][j])
    image.append(flow[road_id, :])
image1 = np.reshape(image, [-1, k, len(flow[0,:])])
image2 = np.transpose(image1,(1,2,0))
image3 = []
label = []
day = []

for i in range(1,t_p):
    for j in range(180-t_input-t_pre):
        image3.append(image2[:, i*180+j:i*180+j+t_input, :][:])
        label.append(flow[:, i*180+j+t_input:i*180+j+t_input+t_pre][:])
        day.append(flow[:, (i-1)*180+j+t_input:(i-1)*180+j+t_input+t_pre][:])

image3 = np.asarray(image3)
label = np.asarray(label)
day =  np.asarray(day)

print(np.shape(image3))
print(np.shape(label))
print(np.shape(day))

#划分前80%数据为训练集，最后20%数据为测试集
image_train_source = image3[:np.shape(image3)[0]*1//10]
image_test_source = image3[np.shape(image3)[0]*1//10:]
label_train_source = label[:np.shape(label)[0]*1//10]
label_test_source = label[np.shape(label)[0]*1//10:]

day_train_source = day[:np.shape(day)[0]*1//10]
day_test_source = day[np.shape(day)[0]*1//10:]


time4 = time.time()
print('input done %g' % (time4-time3))

(4620, 5, 12, 30)
(4620, 30, 3)
(4620, 30, 3)
input done 0.0319412


In [18]:
near_road = np.array(pd.read_csv('small_network_nearest_road_id.csv',header = None))
flow = np.array(pd.read_csv(r"small_network_speed.csv", header= None)) #注意header=0 or None
# 利用滑动窗口的方式，重构数据为(n，最近路段数，输入时间窗，总路段数)的形式

time3 = time.time()

k = 5 # 参数k为需考虑的最近路段数
t_p = 29 # 参数t_p为总时间序列长度（天）
t_input = 12 #参数t_input为输入时间窗(5min颗粒度)
t_pre = 3 #参数t_pre为预测时间窗(5min颗粒度)
num_links = 30 #参数num_links为总路段数


image = []
for i in range(np.shape(near_road)[0]):
    road_id = []
    for j in range(k):
        road_id.append(near_road[i][j])
    image.append(flow[road_id, :])
image1 = np.reshape(image, [-1, k, len(flow[0,:])])
image2 = np.transpose(image1,(1,2,0))
image3 = []
label = []
day = []

for i in range(1,t_p):
    for j in range(180-t_input-t_pre):
        image3.append(image2[:, i*180+j:i*180+j+t_input, :][:])
        label.append(flow[:, i*180+j+t_input:i*180+j+t_input+t_pre][:])
        day.append(flow[:, (i-1)*180+j+t_input:(i-1)*180+j+t_input+t_pre][:])

image3 = np.asarray(image3)
label = np.asarray(label)
day =  np.asarray(day)

print(np.shape(image3))
print(np.shape(label))
print(np.shape(day))

#划分前80%数据为训练集，最后20%数据为测试集
image_train_target = image3[:np.shape(image3)[0]*1//10]
image_test_target = image3[np.shape(image3)[0]*1//10:]
label_train_target = label[:np.shape(label)[0]*1//10]
label_test_target = label[np.shape(label)[0]*1//10:]

day_train_target = day[:np.shape(day)[0]*1//10]
day_test_target = day[np.shape(day)[0]*1//10:]


time4 = time.time()
print('input done %g' % (time4-time3))

(4620, 5, 12, 30)
(4620, 30, 3)
(4620, 30, 3)
input done 0.0329137


In [19]:
#模型构建
input_data = keras.Input(shape=(k,t_input,num_links), name='input_data')
input_HA = keras.Input(shape=(num_links, t_pre), name='input_HA')

x = keras.layers.BatchNormalization(input_shape =(k,t_input,num_links))(input_data)

x = keras.layers.Conv2D(
                           filters = 30,
                           kernel_size = 3,
                           strides = 1,
                           padding="SAME",
                           activation='relu')(x)

x = keras.layers.AveragePooling2D(pool_size = (2,2),
                                strides = 1,
                                padding = "SAME",
                                )(x)

x = keras.layers.BatchNormalization()(x)

x = keras.layers.Conv2D(
                       filters = 30,
                       kernel_size = 3,
                       strides = 1,
                       padding="SAME",
                       activation='relu')(x)

x = keras.layers.AveragePooling2D(pool_size = (2,2),
                                strides = 1,
                                padding = "SAME",
                                )(x)
x = keras.layers.Flatten()(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(num_links*t_pre, activation='relu')(x)

output = keras.layers.Reshape((num_links,t_pre))(x)

output_final = Merge_Layer()([output, input_HA])

# construct model
finish_model = keras.models.Model([input_data,input_HA], [output_final])

finish_model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_data (InputLayer)         (None, 5, 12, 30)    0                                            
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 5, 12, 30)    120         input_data[0][0]                 
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 5, 12, 30)    8130        batch_normalization_4[0][0]      
__________________________________________________________________________________________________
average_pooling2d_3 (AveragePoo (None, 5, 12, 30)    0           conv2d_3[0][0]                   
__________________________________________________________________________________________________
batch_norm

In [20]:
#参数加载
finish_model.load_weights('source.h5')


In [21]:
#模型预测
model_pre = finish_model.predict([image_test_target,day_test_target])


In [22]:
#预测结果存储
# model_pre = np.reshape(model_pre,[103, 6])
# model_pre1 = pd.DataFrame(model_pre)
# model_pre1.to_csv('预测值.csv', index = False)

In [23]:
#transfer without FT 预测精度计算

mape_mean = mape_loss_func(model_pre, label_test_target)
smape_mean = smape_loss_func(model_pre, label_test_target)
mae_mean = mae_loss_func(model_pre, label_test_target)

print('mape = ' + str(mape_mean) + '\n' + 'smape = ' + str(smape_mean) + '\n' + 'mae = ' + str(mae_mean))

mape = 0.7780085205753506
smape = 1.3029564437137449
mae = 20.7999441352137


In [24]:

middle = Model(inputs=[input_data, input_HA],outputs=finish_model.get_layer('dense_1').output)

ValueError: No such layer: dense_1

In [12]:
middle_result_source = middle.predict([image_train_source, day_train_source])
middle_result_target = middle.predict([image_train_target, day_train_target])

In [13]:
  
import torch

def CORAL(source, target, **kwargs):
    d = source.data.shape[1]
    ns, nt = source.data.shape[0], target.data.shape[0]
    # source covariance
    xm = torch.mean(source, 0, keepdim=True) - source
    xc = xm.t() @ xm / (ns - 1)

    # target covariance
    xmt = torch.mean(target, 0, keepdim=True) - target
    xct = xmt.t() @ xmt / (nt - 1)

    # frobenius norm between source and target
    loss = torch.mul((xc - xct), (xc - xct))
    loss = torch.sum(loss) / (4*d*d)
    return loss.numpy()

OSError: [WinError 127] 找不到指定的程序。 Error loading "C:\Users\ad4\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\lib\caffe2_detectron_ops_gpu.dll" or one of its dependencies.

In [69]:
CORAL (torch.from_numpy(middle_result_source), torch.from_numpy(middle_result_target))

array(1477.2236, dtype=float32)

In [70]:
from keras import backend as K

In [71]:
loss1 = K.mean(K.square(output_final - label_train_target), axis=-1) 
loss2 = 0.001 * CORAL (torch.from_numpy(middle_result_source), torch.from_numpy(middle_result_target))
overall_loss = loss1 + loss2

In [96]:
def new_loss(output_final, label_train_target):
    middle = Model(inputs=[input_data, input_HA],outputs=finish_model.get_layer('dense_1').output)
    middle_result_source = middle.predict([image_train_source, day_train_source])
    middle_result_target = middle.predict([image_train_target, day_train_target])

    loss1 = K.mean(K.square(output_final - label_train_target), axis=-1) 
    loss2 = 0.05 * CORAL (torch.from_numpy(middle_result_source), torch.from_numpy(middle_result_target))
    overall_loss = loss1 + loss2
    return overall_loss


In [97]:
finish_model.compile(optimizer='adam',loss=new_loss)

In [98]:
finish_model.fit([image_train_target, day_train_target], label_train_target, epochs=200, batch_size=462,
validation_data=([image_test_target,day_test_target], label_test_target))

2435
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x1dae52f5148>

In [99]:
model_pre = finish_model.predict([image_test_target,day_test_target])

In [100]:
#transfer with DAN 预测精度计算

mape_mean = mape_loss_func(model_pre, label_test_target)
smape_mean = smape_loss_func(model_pre, label_test_target)
mae_mean = mae_loss_func(model_pre, label_test_target)

print('mape = ' + str(mape_mean) + '\n' + 'smape = ' + str(smape_mean) + '\n' + 'mae = ' + str(mae_mean))

mape = 0.25578248666602604
smape = 0.23014463067407498
mae = 5.700419339360104


In [44]:
mape_list = []
for i in range(num_links):
    a1 = mape_loss_func(model_pre[:,i,:], label_test_target[:,i,:])
    mape_list.append(a1)

mape_pd = pd.Series(mape_list)
mape_pd.sort_values()

25    0.080393
0     0.091972
1     0.097362
7     0.101746
8     0.119456
17    0.121159
20    0.131436
6     0.131801
15    0.135690
3     0.138269
14    0.139419
12    0.156367
5     0.157407
2     0.172393
10    0.185532
21    0.186390
19    0.194352
28    0.214766
9     0.225588
11    0.230525
18    0.264293
13    0.304826
27    0.320274
29    0.334921
24    0.336816
23    0.353137
26    0.361229
4     0.367652
16    0.381255
22    0.415636
dtype: float64