In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold #cross_validation

from sklearn.linear_model import BayesianRidge, LinearRegression, ElasticNet  # 批量导入要实现的回归算法
from sklearn.svm import LinearSVR  # SVM中的回归算法
from sklearn.svm import SVR  # SVM中的回归算法
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor  # 集成算法
from sklearn.neighbors import KNeighborsRegressor #  knn
from sklearn.model_selection import cross_val_score  # 交叉检验
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score  # 批量导入指标算法
from sklearn.model_selection import GridSearchCV 
import matplotlib.pyplot as plt  # 导入图形展示库
from sklearn.preprocessing import StandardScaler #引入标准化模块
import seaborn as sns
from sklearn.neural_network import MLPRegressor
from sklearn.utils.validation import check_array as check_arrays
from sklearn.model_selection import train_test_split


### Combine reconstructed LongLat data with full table created by step 1.

In [2]:
#read reconstructed longitude and latitude
re_Long = pd.read_csv('reconstructed_Longitude.csv',encoding='utf-8',header=None)
re_Lat = pd.read_csv('reconstructed_Lat.csv',encoding='utf-8',header=None)

In [3]:
# read original yet noise-filtered table
original_table = pd.read_csv('noise_filtered_Route11Bus51938440.csv',encoding='utf-8',header=None)

re_Long and re_lat don't have  the same length, so one needs to find the minimum of them before concat them to original table

In [4]:
min_len = min([len(re_Long),len(re_Lat)])

### prepare for the prediction dataset
From above, we know the min_len is the length of re_Lat

In [5]:
# original table should start here, at (len(original_table) - min_len)
table_1 = original_table.iloc[(len(original_table) - min_len):,:]

In [6]:
# slice re_Long in order to get the same length as re_Lat
re_Long = re_Long.tail(min_len) # get the last min_len rows of re_Long

### prediction test for re_Long

cause y has to shift one unit用这一个reconstructed space中的state variable 预测下一个(或下几个)状态的original space中的state variable

自己写交叉检验

In [7]:
def prediction_process(X,y):
    # 训练回归模型
    X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False)
    
    model_knn = KNeighborsRegressor()  # 建立贝叶斯岭回归模型对象
    model_lr = LinearRegression()  # 建立普通线性回归模型对象
    model_ann = MLPRegressor(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)  # 建立ANN
    model_svr = SVR(max_iter=3000)  # 建立支持向量机回归模型对象
    model_gbr = GradientBoostingRegressor(learning_rate=0.06, n_estimators=600,max_depth=5, min_samples_leaf =60, 
                   min_samples_split =1000, subsample=0.8)  # 建立梯度增强回归模型对象
    model_names = ['KNN', 'KF', 'ANN', 'LS-SVM', 'GBRT']  # 不同模型的名称列表
    model_dic = [model_knn, model_lr, model_ann, model_svr, model_gbr]  # 不同回归模型对象的集合

    def mean_absolute_percentage_error(y_true, y_pred):
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    pre_y_list = []  # 各个回归模型预测的y值列表
    for model in model_dic:  # 读出每个回归模型对象
        pre_y_list.append(model.fit(X_train, y_train).predict(X_test))  # 将回归训练中得到的预测y存入列表，要分训练集和预测集
    # 模型效果指标评估
    n_samples, n_features = X.shape  # 总样本量,总特征数
    model_metrics_name = [explained_variance_score, mean_absolute_error, mean_squared_error, r2_score]  # 回归评估指标对象集
    model_metrics_list = []  # 回归评估指标列表
    for i in range(5):  # 循环每个模型索引
        tmp_list = []  # 每个内循环的临时结果列表
        for m in model_metrics_name:  # 循环每个指标对象
            tmp_score = m(y_test, pre_y_list[i])  # 计算每个回归指标结果
            tmp_list.append(tmp_score)  # 将结果存入每个内循环的临时结果列表
        model_metrics_list.append(tmp_list)  # 将结果存入回归评估指标列表
    df2 = pd.DataFrame(model_metrics_list, index=model_names, columns=['ev', 'mae', 'mse', 'r2'])  # 建立回归指标的数据框
    print ('samples: %d \t features: %d' % (n_samples, n_features))  # 打印输出样本量和特征数量
    print (70 * '-')  # 打印分隔线
    print ('regression metrics:')  # 打印输出标题
    print (df2)  # 打印输出回归指标的数据框
    print (70 * '-')  # 打印分隔线
    print ('short name \t full name')  # 打印输出缩写和全名标题
    print ('ev \t explained_variance')
    print ('mae \t mean_absolute_error')
    print ('mse \t mean_squared_error')
    print ('r2 \t r2')
    print (70 * '-')  # 打印分隔线
    
    return pre_y_list, y_test

### variable initialize(run it everytime)

#### one step for long

In [66]:
prediction_step = 1
X_Long = np.array(re_Long.head(min_len-prediction_step))
y_Long = np.array(re_Long.iloc[:,-1].tail(min_len-prediction_step))
y_Long = y_Long.reshape([min_len-prediction_step,1]) 

In [67]:
pre_y_list, y_test = prediction_process(X_Long,y_Long)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


samples: 96261 	 features: 10
----------------------------------------------------------------------
regression metrics:
                  ev       mae       mse        r2
KNN    -3.202344e-01  0.065579  3.947338 -0.320309
KF     -2.039612e-01  0.062635  3.599656 -0.204016
ANN    -2.456063e-01  0.058923  3.724081 -0.245634
LS-SVM  7.300902e-07  0.107676  2.993053 -0.001119
GBRT   -4.282796e-02  0.119669  3.117880 -0.042871
----------------------------------------------------------------------
short name 	 full name
ev 	 explained_variance
mae 	 mean_absolute_error
mse 	 mean_squared_error
r2 	 r2
----------------------------------------------------------------------


In [68]:
np.set_printoptions(edgeitems=100)
y_test

array([[119.97971 ],
       [119.979496],
       [119.9792  ],
       [119.979004],
       [119.979004],
       [119.978961],
       [119.97858 ],
       [119.977875],
       [119.977731],
       [119.977255],
       [119.977255],
       [119.977143],
       [119.977055],
       [119.976691],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976433],
       [119.975818],
       [119.975076],
       [119.974245],
       [119.973343],
       [119.973158],
       [119.973158],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.972905],
       [119.972401],
       [119.971726],
       [119.971043],
       [119.971248],
       [119.971736],
       [119.972191],
       [119.972353],
       [119.972498],
       [119.972576],
       [119.972635],
       [119.97289 ],
       [119.9

In [69]:
pre_y_list[0]

array([[119.9795864],
       [119.979606 ],
       [119.979606 ],
       [119.9796636],
       [119.9796474],
       [119.9795852],
       [119.9794596],
       [119.9794596],
       [119.9789184],
       [119.978358 ],
       [119.9783136],
       [119.978375 ],
       [119.9780926],
       [119.9780204],
       [119.9777464],
       [119.9774104],
       [119.9774104],
       [119.9770894],
       [119.9768106],
       [119.9765136],
       [119.9760916],
       [119.975737 ],
       [119.975737 ],
       [119.975208 ],
       [119.9745134],
       [119.9713172],
       [119.9713172],
       [119.9721154],
       [119.9722864],
       [119.9724132],
       [119.9721024],
       [119.9716162],
       [119.9713566],
       [119.9715268],
       [119.9715282],
       [119.9717084],
       [119.9713696],
       [119.9713696],
       [119.9716558],
       [119.9717682],
       [119.971802 ],
       [119.9723026],
       [119.9728972],
       [119.9733384],
       [119.9732312],
       [11

In [70]:
y_test-pre_y_list[0]

array([[ 1.2360e-04],
       [-1.1000e-04],
       [-4.0600e-04],
       [-6.5960e-04],
       [-6.4340e-04],
       [-6.2420e-04],
       [-8.7960e-04],
       [-1.5846e-03],
       [-1.1874e-03],
       [-1.1030e-03],
       [-1.0586e-03],
       [-1.2320e-03],
       [-1.0376e-03],
       [-1.3294e-03],
       [-1.1984e-03],
       [-8.6240e-04],
       [-8.6240e-04],
       [-5.4140e-04],
       [-2.6260e-04],
       [ 3.4400e-05],
       [ 3.4140e-04],
       [ 8.1000e-05],
       [-6.6100e-04],
       [-9.6300e-04],
       [-1.1704e-03],
       [ 1.8408e-03],
       [ 1.8408e-03],
       [ 9.8960e-04],
       [ 8.1860e-04],
       [ 6.9180e-04],
       [ 1.0026e-03],
       [ 1.4888e-03],
       [ 1.7484e-03],
       [ 1.5782e-03],
       [ 1.5768e-03],
       [ 1.1966e-03],
       [ 1.0314e-03],
       [ 3.5640e-04],
       [-6.1280e-04],
       [-5.2020e-04],
       [-6.6000e-05],
       [-1.1160e-04],
       [-5.4420e-04],
       [-8.4040e-04],
       [-6.5520e-04],
       [-2

In [71]:
test = abs(y_test-pre_y_list[0])

In [72]:
len(test[test>0.01])

40

In [73]:
len(test[test>0.1])

36

In [74]:
y_no_noise_loc = np.where(test < 0.1)[0]
y_test_filtered = y_test[y_no_noise_loc].reshape(-1,1)
y_pred_filtered = pre_y_list[0][y_no_noise_loc]

In [75]:
long_1_y = y_test
long_1_y_pred = pre_y_list[0]

In [62]:
mean_absolute_error(y_test_filtered,y_pred_filtered)

0.0007738788364471433

## one-step for lat

In [76]:
prediction_step = 1
X_Lat = np.array(re_Lat.head(min_len-prediction_step))
y_Lat = np.array(re_Lat.iloc[:,-1].tail(min_len-prediction_step))
y_Lat = y_Lat.reshape([min_len-prediction_step,1]) 

In [77]:
pre_y_list, y_test = prediction_process(X_Lat,y_Lat)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


samples: 96261 	 features: 11
----------------------------------------------------------------------
regression metrics:
              ev       mae       mse        r2
KNN    -0.457128  0.023061  0.307067 -0.457474
KF     -0.202250  0.020960  0.253306 -0.202301
ANN     0.000000  0.027374  0.210719 -0.000164
LS-SVM  0.000740  0.068772  0.213618 -0.013922
GBRT   -0.069771  0.023201  0.225401 -0.069851
----------------------------------------------------------------------
short name 	 full name
ev 	 explained_variance
mae 	 mean_absolute_error
mse 	 mean_squared_error
r2 	 r2
----------------------------------------------------------------------


In [78]:
np.set_printoptions(edgeitems=100)
y_test

array([[31.785443],
       [31.785275],
       [31.785091],
       [31.784968],
       [31.784968],
       [31.784931],
       [31.784673],
       [31.784148],
       [31.784039],
       [31.783698],
       [31.783698],
       [31.783615],
       [31.783503],
       [31.783188],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783   ],
       [31.782546],
       [31.781975],
       [31.781321],
       [31.780689],
       [31.780573],
       [31.780573],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780398],
       [31.780031],
       [31.779636],
       [31.779298],
       [31.778806],
       [31.778085],
       [31.777403],
       [31.777141],
       [31.776921],
       [31.776838],
       [31.776778],
       [31.776474],
       [31.776293],
       [31.776293],
       [31.776126],


In [79]:
pre_y_list[0]

array([[31.7858122],
       [31.7858122],
       [31.7857832],
       [31.7857252],
       [31.7856962],
       [31.7855984],
       [31.785451 ],
       [31.7852218],
       [31.7847496],
       [31.7847496],
       [31.7849712],
       [31.784812 ],
       [31.7846236],
       [31.7839484],
       [31.7837888],
       [31.7837888],
       [31.783603 ],
       [31.7834814],
       [31.7833478],
       [31.7832496],
       [31.7831026],
       [31.7831026],
       [31.7829658],
       [31.782779 ],
       [31.782497 ],
       [31.7820822],
       [31.7815538],
       [31.7803922],
       [31.779897 ],
       [31.779897 ],
       [31.7797094],
       [31.7797094],
       [31.7797094],
       [31.7797094],
       [31.7795158],
       [31.7792108],
       [31.7787554],
       [31.7782446],
       [31.7782446],
       [31.777981 ],
       [31.777054 ],
       [31.7767362],
       [31.7765318],
       [31.776312 ],
       [31.7759188],
       [31.7757584],
       [31.7756094],
       [31.77

In [80]:
y_test-pre_y_list[0]

array([[-3.6920e-04],
       [-5.3720e-04],
       [-6.9220e-04],
       [-7.5720e-04],
       [-7.2820e-04],
       [-6.6740e-04],
       [-7.7800e-04],
       [-1.0738e-03],
       [-7.1060e-04],
       [-1.0516e-03],
       [-1.2732e-03],
       [-1.1970e-03],
       [-1.1206e-03],
       [-7.6040e-04],
       [-7.0580e-04],
       [-7.0580e-04],
       [-5.2000e-04],
       [-3.9840e-04],
       [-2.6480e-04],
       [-1.6660e-04],
       [-1.0260e-04],
       [-5.5660e-04],
       [-9.9080e-04],
       [-1.4580e-03],
       [-1.8080e-03],
       [-1.5092e-03],
       [-9.8080e-04],
       [ 1.4580e-04],
       [ 6.4100e-04],
       [ 6.4100e-04],
       [ 8.2860e-04],
       [ 8.2860e-04],
       [ 8.2860e-04],
       [ 8.2860e-04],
       [ 1.0222e-03],
       [ 1.1872e-03],
       [ 1.2756e-03],
       [ 1.3914e-03],
       [ 1.0534e-03],
       [ 8.2500e-04],
       [ 1.0310e-03],
       [ 6.6680e-04],
       [ 6.0920e-04],
       [ 6.0900e-04],
       [ 9.1920e-04],
       [ 1

In [81]:
test = abs(y_test-pre_y_list[0])

In [82]:
len(test[test>0.01])

150

In [83]:
len(test[test>0.1])

50

In [84]:
y_no_noise_loc = np.where(test < 0.1)[0]
y_test_filtered = y_test[y_no_noise_loc].reshape(-1,1)
y_pred_filtered = pre_y_list[0][y_no_noise_loc]

In [85]:
lat_1_y = y_test
lat_1_y_pred = pre_y_list[0]

In [63]:
mean_absolute_error(y_test_filtered,y_pred_filtered)

0.001251029422486083

## Multi-step

### for long

In [86]:
prediction_step = 60
X_Long = np.array(re_Long.head(min_len-prediction_step))
y_Long = np.array(re_Long.iloc[:,-1].tail(min_len-prediction_step))
y_Long = y_Long.reshape([min_len-prediction_step,1]) 

In [87]:
multi_pre_y_list, multi_y_test = prediction_process(X_Long,y_Long)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


samples: 96202 	 features: 10
----------------------------------------------------------------------
regression metrics:
                  ev       mae       mse        r2
KNN    -3.902724e-01  0.060809  4.159173 -0.390297
KF     -1.035319e-03  0.072452  2.995179 -0.001206
ANN    -7.449431e-02  0.054074  3.214505 -0.074520
LS-SVM -6.300341e-07  0.107289  2.994873 -0.001104
GBRT   -7.605674e-02  0.169445  3.219126 -0.076065
----------------------------------------------------------------------
short name 	 full name
ev 	 explained_variance
mae 	 mean_absolute_error
mse 	 mean_squared_error
r2 	 r2
----------------------------------------------------------------------


In [88]:
np.set_printoptions(edgeitems=100)
multi_y_test

array([[119.977055],
       [119.976691],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976548],
       [119.976433],
       [119.975818],
       [119.975076],
       [119.974245],
       [119.973343],
       [119.973158],
       [119.973158],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.973105],
       [119.972905],
       [119.972401],
       [119.971726],
       [119.971043],
       [119.971248],
       [119.971736],
       [119.972191],
       [119.972353],
       [119.972498],
       [119.972576],
       [119.972635],
       [119.97289 ],
       [119.973029],
       [119.973029],
       [119.97316 ],
       [119.97341 ],
       [119.97341 ],
       [119.97341 ],
       [119.97341 ],
       [119.97341 ],
       [119.973428],
       [119.973239],
       [119.972671],
       [119.972219],
       [119.9

In [89]:
multi_pre_y_list[0]

array([[119.9774518],
       [119.9774268],
       [119.9774088],
       [119.9774104],
       [119.9774104],
       [119.9772694],
       [119.9770894],
       [119.9759206],
       [119.9757292],
       [119.975587 ],
       [119.9765136],
       [119.9762286],
       [119.975874 ],
       [119.975737 ],
       [119.975208 ],
       [119.9737772],
       [119.9727144],
       [119.9717484],
       [119.9715112],
       [119.9715546],
       [119.9715546],
       [119.9716938],
       [119.9716938],
       [119.9716938],
       [119.9716938],
       [119.9716938],
       [119.9719734],
       [119.9722306],
       [119.9726744],
       [119.9730318],
       [119.9731894],
       [119.973351 ],
       [119.972881 ],
       [119.972881 ],
       [119.972627 ],
       [119.972805 ],
       [119.9731314],
       [119.972629 ],
       [119.9721206],
       [119.9721206],
       [119.9718674],
       [119.9716016],
       [119.971394 ],
       [119.9711452],
       [119.970841 ],
       [11

In [90]:
multi_y_test-multi_pre_y_list[0]

array([[-3.9680e-04],
       [-7.3580e-04],
       [-8.6080e-04],
       [-8.6240e-04],
       [-8.6240e-04],
       [-7.2140e-04],
       [-5.4140e-04],
       [ 6.2740e-04],
       [ 7.0380e-04],
       [ 2.3100e-04],
       [-1.4376e-03],
       [-1.9836e-03],
       [-2.5310e-03],
       [-2.5790e-03],
       [-2.0500e-03],
       [-6.7220e-04],
       [ 3.9060e-04],
       [ 1.3566e-03],
       [ 1.5938e-03],
       [ 1.5504e-03],
       [ 1.5504e-03],
       [ 1.4112e-03],
       [ 1.4112e-03],
       [ 1.2112e-03],
       [ 7.0720e-04],
       [ 3.2200e-05],
       [-9.3040e-04],
       [-9.8260e-04],
       [-9.3840e-04],
       [-8.4080e-04],
       [-8.3640e-04],
       [-8.5300e-04],
       [-3.0500e-04],
       [-2.4600e-04],
       [ 2.6300e-04],
       [ 2.2400e-04],
       [-1.0240e-04],
       [ 5.3100e-04],
       [ 1.2894e-03],
       [ 1.2894e-03],
       [ 1.5426e-03],
       [ 1.8084e-03],
       [ 2.0160e-03],
       [ 2.2828e-03],
       [ 2.3980e-03],
       [ 2

In [91]:
test = abs(multi_y_test-multi_pre_y_list[0])

In [92]:
len(test[test>1])

25

In [93]:
len(test[test>0.1])

25

In [94]:
len(test[test>0.01])

377

In [95]:
y_no_noise_loc = np.where(test < 0.1)[0]
y_test_filtered = multi_y_test[y_no_noise_loc].reshape(-1,1)
y_pred_filtered = multi_pre_y_list[0][y_no_noise_loc]

In [96]:
long_multi_y = multi_y_test
long_multi_y_pred = multi_pre_y_list[0]

In [64]:
mean_absolute_error(y_test_filtered,y_pred_filtered)

0.002200370441298934

### prediction test for re_Lat

In [97]:
prediction_step = 60
X_Lat = np.array(re_Lat.head(min_len-prediction_step))
y_Lat = np.array(re_Lat.iloc[:,-1].tail(min_len-prediction_step))
y_Lat = y_Lat.reshape([min_len-prediction_step,1]) 

In [98]:
multi_pre_y_list_Lat, multi_y_test_Lat = prediction_process(X_Lat,y_Lat)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


samples: 96202 	 features: 11
----------------------------------------------------------------------
regression metrics:
              ev       mae       mse        r2
KNN    -2.445225  0.045175  0.727148 -2.449213
KF      0.000009  0.027335  0.210849 -0.000160
ANN     0.000000  0.027393  0.210851 -0.000166
LS-SVM  0.000660  0.067082  0.213580 -0.013112
GBRT   -0.080723  0.035296  0.227909 -0.081081
----------------------------------------------------------------------
short name 	 full name
ev 	 explained_variance
mae 	 mean_absolute_error
mse 	 mean_squared_error
r2 	 r2
----------------------------------------------------------------------


In [99]:
multi_pre_y_list_Lat[0]

array([[31.7838838],
       [31.7838838],
       [31.7837936],
       [31.7837936],
       [31.7837946],
       [31.7836416],
       [31.7834558],
       [31.7833342],
       [31.7833108],
       [31.7832126],
       [31.7831026],
       [31.7829658],
       [31.7829658],
       [31.782779 ],
       [31.7815638],
       [31.7801642],
       [31.7801642],
       [31.779897 ],
       [31.779897 ],
       [31.779897 ],
       [31.7797094],
       [31.7797094],
       [31.7795158],
       [31.7787554],
       [31.7783956],
       [31.7778476],
       [31.7778476],
       [31.7773526],
       [31.777054 ],
       [31.7768994],
       [31.7765318],
       [31.7761208],
       [31.7759188],
       [31.7757584],
       [31.7757584],
       [31.7756094],
       [31.7756094],
       [31.7753972],
       [31.7753972],
       [31.775142 ],
       [31.775142 ],
       [31.774862 ],
       [31.774862 ],
       [31.774597 ],
       [31.7736066],
       [31.7732976],
       [31.7729752],
       [31.77

In [100]:
multi_y_test_Lat

array([[31.783503],
       [31.783188],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783083],
       [31.783   ],
       [31.782546],
       [31.781975],
       [31.781321],
       [31.780689],
       [31.780573],
       [31.780573],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780538],
       [31.780398],
       [31.780031],
       [31.779636],
       [31.779298],
       [31.778806],
       [31.778085],
       [31.777403],
       [31.777141],
       [31.776921],
       [31.776838],
       [31.776778],
       [31.776474],
       [31.776293],
       [31.776293],
       [31.776126],
       [31.775826],
       [31.775826],
       [31.775826],
       [31.775826],
       [31.775826],
       [31.775798],
       [31.775473],
       [31.775024],
       [31.774563],
       [31.774443],
       [31.774173],
       [31.774101],


In [101]:
abs(multi_pre_y_list_Lat[0]-multi_y_test_Lat)

array([[3.80800000e-04],
       [6.95800000e-04],
       [7.10600000e-04],
       [7.10600000e-04],
       [7.11600000e-04],
       [5.58600000e-04],
       [3.72800000e-04],
       [2.51200000e-04],
       [3.10800000e-04],
       [6.66600000e-04],
       [1.12760000e-03],
       [1.64480000e-03],
       [2.27680000e-03],
       [2.20600000e-03],
       [9.90800000e-04],
       [3.73800000e-04],
       [3.73800000e-04],
       [6.41000000e-04],
       [6.41000000e-04],
       [6.41000000e-04],
       [8.28600000e-04],
       [8.28600000e-04],
       [1.02220000e-03],
       [1.64260000e-03],
       [1.63540000e-03],
       [1.78840000e-03],
       [1.45040000e-03],
       [1.45340000e-03],
       [1.03100000e-03],
       [5.03600000e-04],
       [6.09200000e-04],
       [8.00200000e-04],
       [9.19200000e-04],
       [1.01960000e-03],
       [7.15600000e-04],
       [6.83600000e-04],
       [6.83600000e-04],
       [7.28800000e-04],
       [4.28800000e-04],
       [6.84000000e-04],


In [102]:
test_Lat = abs(multi_pre_y_list_Lat[0]-multi_y_test_Lat)

In [103]:
len(test_Lat[test_Lat>0.1])

68

In [104]:
len(test_Lat[test_Lat>0.01])

667

In [105]:
y_no_noise_loc = np.where(test_Lat < 0.1)[0]
y_test_filtered = multi_y_test_Lat[y_no_noise_loc].reshape(-1,1)
y_pred_filtered = multi_pre_y_list_Lat[0][y_no_noise_loc]

In [106]:
lat_multi_y = multi_y_test_Lat
lat_multi_y_pred = multi_pre_y_list_Lat[0]

In [65]:
mean_absolute_error(y_test_filtered,y_pred_filtered)

0.002883960559119626

对比发现Delay coordinate embedding 之后的效果极好，造成上面各个参数，例如mae有0.01这个数量级的原因在于有些noise导致预测值与实际值大于1°，如上所示。经度差一度大概是100KM，所以0.01数量级的差别则是1KM，误差如mae应当控制在0.001的数量级

## visualization

In [121]:
pd.DataFrame(np.c_[long_1_y,long_1_y_pred,lat_1_y,lat_1_y_pred]).to_csv("DCE_prediction_output_1step.csv",index=False)

In [122]:
pd.DataFrame(np.c_[long_multi_y,long_multi_y_pred,lat_multi_y,lat_multi_y_pred]).to_csv("DCE_prediction_output_multi_step.csv",index=False)

In [3]:
import folium
import pandas as pd
import numpy as np

In [121]:
df_1 = pd.read_csv('unrecontrstructed_prediction_output_1step.csv',encoding='utf-8')
df_1 = df_1[df_1.iloc[:,3]>31].as_matrix()

  


In [122]:
df_2 = pd.read_csv('unrecontrstructed_prediction_output_multi_step.csv',encoding='utf-8')
df_2 = df_2[df_2.iloc[:,3]>31].as_matrix()

  """Entry point for launching an IPython kernel.


In [123]:
df_3 = pd.read_csv('DCE_prediction_output_1step.csv',encoding='utf-8').as_matrix()

  """Entry point for launching an IPython kernel.


In [124]:
df_4 = pd.read_csv('DCE_prediction_output_multi_step.csv',encoding='utf-8').as_matrix()

  """Entry point for launching an IPython kernel.


In [125]:
pd.set_option('display.max_rows', 300)
print(df_1)

[[119.97971   119.9797684  31.785443   31.7857406]
 [119.979496  119.9797282  31.785275   31.7856668]
 [119.9792    119.9794778  31.785091   31.7851404]
 ...
 [119.957603  119.9573222  31.803846   31.8030238]
 [119.957743  119.957604   31.804348   31.803953 ]
 [119.957805  119.957922   31.80449    31.804655 ]]


In [126]:
m = folium.Map(location=[31.774801 ,119.972503],zoom_start=12)

In [127]:
point_origin = df_1[:,[2,0]]
point_SVR = df_1[:,[3,1]]
point_DCE_SVR = df_3[:,[3,1]]

In [128]:
for each_one in point_origin[100:500]:
    folium.Circle(
        radius=10,
        location=list(each_one),
        popup='The Waterfront',
        color='crimson',
        fill=True,
        opacity=1
    ).add_to(m)

In [129]:
for each_one in point_SVR[100:500]:
    folium.Circle(
        radius=10,
        location=list(each_one),
        popup='The Waterfront',
        color='blue',
        fill=True,
        opacity=1
    ).add_to(m)

In [130]:
for each_one in point_DCE_SVR[100:500]:
    folium.Circle(
        radius=10,
        location=list(each_one),
        popup='The Waterfront',
        color='green',
        fill=True,
        opacity=1
    ).add_to(m)

In [131]:
route_origin = folium.PolyLine(    #polyline方法为将坐标用线段形式连接起来
    point_origin[100:500],    #将坐标点连接起来
    weight=3,  #线的大小为3
    color='crimson',  #线的颜色为橙色
    opacity=0.8    #线的透明度
).add_to(m)    #将这条线添加到刚才的区域m

In [132]:
route_SVR = folium.PolyLine(    #polyline方法为将坐标用线段形式连接起来
    point_SVR[100:500],    #将坐标点连接起来
    weight=3,  #线的大小为3
    color='blue',  #线的颜色为橙色
    opacity=0.8    #线的透明度
).add_to(m)    #将这条线添加到刚才的区域m

In [133]:
route_DCE_SVR = folium.PolyLine(    #polyline方法为将坐标用线段形式连接起来
    point_DCE_SVR[100:500],    #将坐标点连接起来
    weight=3,  #线的大小为3
    color='green',  #线的颜色为橙色
    opacity=0.8    #线的透明度
).add_to(m)    #将这条线添加到刚才的区域m

In [134]:
m

In [135]:
m.save('one_step_ahead.html')

In [136]:
m = folium.Map(location=[31.774801 ,119.972503],zoom_start=12)

In [137]:
point_origin = df_2[:,[2,0]]
point_SVR = df_2[:,[3,1]]
point_DCE_SVR = df_4[:,[3,1]]

In [138]:
for each_one in point_origin[100:500]:
    folium.Circle(
        radius=10,
        location=list(each_one),
        popup='The Waterfront',
        color='crimson',
        fill=True,
        opacity=1
    ).add_to(m)

In [139]:
for each_one in point_SVR[100:500]:
    folium.Circle(
        radius=10,
        location=list(each_one),
        popup='The Waterfront',
        color='blue',
        fill=True,
        opacity=1
    ).add_to(m)

In [140]:
for each_one in point_DCE_SVR[100:500]:
    folium.Circle(
        radius=10,
        location=list(each_one),
        popup='The Waterfront',
        color='green',
        fill=True,
        opacity=1
    ).add_to(m)

In [141]:
route_origin = folium.PolyLine(    #polyline方法为将坐标用线段形式连接起来
    point_origin[100:500],    #将坐标点连接起来
    weight=3,  #线的大小为3
    color='crimson',  #线的颜色为橙色
    opacity=0.8    #线的透明度
).add_to(m)    #将这条线添加到刚才的区域m

In [142]:
route_SVR = folium.PolyLine(    #polyline方法为将坐标用线段形式连接起来
    point_SVR[100:500],    #将坐标点连接起来
    weight=3,  #线的大小为3
    color='blue',  #线的颜色为橙色
    opacity=0.8    #线的透明度
).add_to(m)    #将这条线添加到刚才的区域m

In [143]:
route_DCE_SVR = folium.PolyLine(    #polyline方法为将坐标用线段形式连接起来
    point_DCE_SVR[100:500],    #将坐标点连接起来
    weight=3,  #线的大小为3
    color='green',  #线的颜色为橙色
    opacity=0.8    #线的透明度
).add_to(m)    #将这条线添加到刚才的区域m

In [144]:
m

In [145]:
m.save('multi_step_ahead.html')