In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential, Model
from sklearn.model_selection import train_test_split
from keras.layers import LSTM
from keras.layers import Dense, Input,Flatten, Dropout, TimeDistributed, RepeatVector
from keras.optimizers import Adam
from keras import backend as K
import keras
import matplotlib.pyplot as plt
import utm
from sklearn.preprocessing import minmax_scale, MinMaxScaler
from keras.utils import to_categorical

class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = {'batch': [], 'epoch': []}
        self.accuracy = {'batch': [], 'epoch': []}
        self.val_loss = {'batch': [], 'epoch': []}
        self.val_acc = {'batch': [], 'epoch': []}

    def on_batch_end(self, batch, logs={}):
        self.losses['batch'].append(logs.get('loss'))
        self.accuracy['batch'].append(logs.get('acc'))
        self.val_loss['batch'].append(logs.get('val_loss'))
        self.val_acc['batch'].append(logs.get('val_acc'))

    def on_epoch_end(self, batch, logs={}):
        self.losses['epoch'].append(logs.get('loss'))
        self.accuracy['epoch'].append(logs.get('acc'))
        self.val_loss['epoch'].append(logs.get('val_loss'))
        self.val_acc['epoch'].append(logs.get('val_acc'))

    def loss_plot(self, loss_type):
        iters = range(len(self.losses[loss_type]))
        plt.figure()
        # acc
        plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc')
        # loss
        plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
        if loss_type == 'epoch':
            # val_acc
            plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc')
            # val_loss
            plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
        plt.grid(True)
        plt.xlabel(loss_type)
        plt.ylabel('acc-loss')
        plt.legend(loc="upper right")
        plt.show()


def distance_loss(y_pred, y_true):
    return K.sqrt(K.mean(K.sum(K.square(y_pred-y_true), axis=-1)))

def median_absolute_deviation(y_pred, y_true):
    deviation = np.abs(y_pred-y_true)
    return np.mean(deviation, axis=0)


data = pd.read_csv('trainfix.csv')
label = pd.read_csv('label1.csv')['label']
X_temp = [
    np.concatenate([mr[4:9],mr[9:14],mr[14:19],mr[19:24],mr[24:29],
               mr[29:34]]) for mr in data.values
]
X_temp = np.array(X_temp)
y_temp = data[['Latitude','Longitude']].values

X = []
y = []
for t in range(72):
    temp = X_temp[data['TrajID'] == t]
    temp_y = y_temp[data['TrajID'] == t]
    for i in range(len(temp)-5):
        X.append(temp[i:i+6])
        y.append(temp_y[i:i+6])
X = np.array(X)
y = np.array(y)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=33)

Using TensorFlow backend.


In [2]:
#执行这里会覆盖X_test 出最终结果
data_test = pd.read_csv('testfix.csv')
TrajID=data_test['TrajID'].values
print(TrajID.shape)
X_test = [
    np.concatenate([mr[4:9],mr[9:14],mr[14:19],mr[19:24],mr[24:29],
               mr[29:34]]) for mr in data_test.values
]
X_test = np.array(X_test)
X_final = []
traj_id=[]
for t in range(72):
    temp = X_test[(data_test['TrajID'] == t)]
    for i in range(len(temp)-5):
        X_final.append(temp[i:i+6])
        traj_id.append(t)
X_test = np.array(X_final)
history = LossHistory()

(2470,)


In [4]:
def build_model_ae():
    m = Sequential()
    m.add(LSTM(64, return_sequences=True, input_shape=(6,30)))
    m.add(LSTM(32, return_sequences=True))
    m.add(TimeDistributed(Dense(30)))
    r = Adam(lr=0.001)
    m.compile(optimizer=r, loss='mse', metrics=['mse'])
    return m

def build_model_lstm():
    m = Sequential()
    m.add(LSTM(32, return_sequences=True, input_shape=(6, 64)))
    m.add(LSTM(64, return_sequences=True))
    m.add(TimeDistributed(Dense(1024, activation='relu')))
    m.add(TimeDistributed(Dense(1024, activation='relu')))
    m.add(TimeDistributed(Dense(891, activation='softmax')))
    r = Adam(lr=0.001)
    m.compile(optimizer=r, loss='categorical_crossentropy', metrics=['accuracy'])
    return m

model1 = build_model_ae()
model1.fit(X, X, epochs=80, batch_size=32)
# model1.fit(X_train, X_train, validation_data=(X_test, X_test), epochs=80, batch_size=32)
model1 = Model(inputs=model1.inputs, outputs=model1.layers[0].output)
model1.summary()
X_train_vec = model1.predict(X)
X_test_vec = model1.predict(X_test)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80


Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3_input (InputLayer)    (None, 6, 30)             0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 6, 64)             24320     
Total params: 24,320
Trainable params: 24,320
Non-trainable params: 0
_________________________________________________________________


In [5]:
from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X_train_vec, y, test_size=0.3, random_state=33)

grid=pd.read_csv('grid1.csv')[['x','y']].values
Y_train=[]
for seq in y:
    poses=[]
    for mr in seq:
        u1, u2, _, _ = utm.from_latlon(mr[0], mr[1])
        test = np.array([u1, u2])
        poses.append(np.argmin(np.sum(np.square(test - grid), axis=1)))
    Y_train.append(poses)

Y_train=to_categorical(np.array(Y_train),891)
# Y_test=[]
# y_test_utm=[]
# for seq in y_test:
#     poses=[]
#     utms=[]
#     for mr in seq:
#         u1, u2, _, _ = utm.from_latlon(mr[0], mr[1])
#         test = np.array([u1, u2])
#         utms.append([u1, u2])
#         poses.append(np.argmin(np.sum(np.square(test - grid), axis=1)))
#     y_test_utm.append(utms)
#     Y_test.append(poses)

# Y_test=to_categorical(np.array(Y_test),891)

In [6]:
model2 = build_model_lstm()
model2.fit(X_train_vec, Y_train,epochs=100, batch_size=64)
model2.summary()
y_result = [0]*len(data_test)
y_result_count = [0]*len(data_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 6, 32)             12416     
_________________________________________________________________
lstm_6 (LSTM)                (None, 6, 64)             24832     
_________________________________________________________________
time_distributed_3 (TimeDist (None, 6, 1024)           66560     
_________________________________________________________________
time_distributed_4 (TimeDist (None, 6, 1024)           1049600   
_________________________________________________________________
time_distributed_5 (TimeDist (None, 6, 891)            913275    
Total params: 2,066,683
Trainable param

In [None]:
import math
def Dist_Error(pred,real):
    errors=np.abs(pred-real)
    result=[math.sqrt(math.pow(i[0],2)+math.pow(i[1],2))for i in errors]
    return np.array(result)
testRe=model2.predict_classes(X_test)
zone_number = 51
zone_letter = 'R'
testRe=testRe.reshape(-1)
y_test_utm=np.array(y_test_utm).reshape(-1,2)

testFi=[[grid[i][0], grid[i][1]] for i in testRe]

In [None]:
errors=Dist_Error(np.array(testFi),np.array(y_test_utm))
errors=np.array(errors)
print(np.median(errors))
print(np.mean(errors))
print(np.sort(errors)[int(len(errors)*0.9)])

In [None]:
x = np.arange(0,np.max(errors))
error_y = [len(errors[errors < i])/len(errors) for i in x]
plt.plot(x, error_y)

In [7]:
y_pred = model2.predict_classes(X_test_vec)

In [8]:
import math
def traj_merge(traj_id,trajs,code=0):
    maxleng=len(trajs[0])
    last=-1
    result=[]
    for i in range(len(traj_id)):
        if traj_id[i]!=last:
            result+=trajs[i]
            last=traj_id[i]
        else:
            result.append(trajs[i][maxleng-1])
    return result

In [9]:
y_p = traj_merge(traj_id,y_pred.tolist())

In [10]:
grid = pd.read_csv('grid1.csv').values
zone_number = 51
zone_letter = 'R'
y_final = [[utm.to_latlon(grid[i][1], grid[i][2], zone_letter=zone_letter, zone_number=zone_number)[1],
           utm.to_latlon(grid[i][1], grid[i][2], zone_letter=zone_letter, zone_number=zone_number)[0]] for i in y_p]
y_final = np.array(y_final)
df_pred = pd.DataFrame(data={'Longitude':y_final[:,0], 'Latitude':y_final[:,1]})
df_pred.to_csv('pred6.csv', index=False)
print('success')

success
