# 下一跳预测

## 1. 准备工作

需安装如下依赖：

In [1]:
# !pip install -q torch
# !pip install -q numpy
# !pip install -q pandas
# !pip install -q scikit-learn
# !pip install -q tqdm

## 2. 模型探索

In [2]:
# 导入所有依赖
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# 固定随机种子，设置device
seed = 3407

np.random.seed(seed)
torch.manual_seed(seed)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'mps'  # for Apple
torch.device(device)

device(type='cuda')

### 2.1 模型定义

In [4]:
class LSTMPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMPredictor, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [5]:
class BiLSTMPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BiLSTMPredictor, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        # Bi-LSTM 需要两个隐藏状态
        h0 = torch.zeros(2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(2, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [6]:
class GRUPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUPredictor, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

### 2.2 DataLoader

In [7]:
def createSequence_(df, window_size):
    size = len(df)

    # f = df.columns.values[1:]

    # 按照滑动窗口进行划分
    seq = []
    label = []
    for i in range(0, size - window_size):
        seq.append(df[i:i + window_size - 1])
        # print(df.iloc[i + window_size - 1][f])
        label.append(df.iloc[i + window_size - 1])
    seq = np.array(seq).astype(float)
    label = np.array(label).astype(float)
    seq = torch.tensor(seq, dtype=torch.float32).to(device)
    label = torch.tensor(label, dtype=torch.float32).to(device)
    return seq, label

### 2.3 训练 & 评估定义

In [8]:
# 计算 RMSE（只计算点坐标和距离，不包括时间、速度等）
def calc_rmse(predictions, targets):
    mse = torch.mean((predictions[:, :3] - targets[:, :3]) ** 2)
    rmse = torch.sqrt(mse)
    return rmse

In [9]:
def trainModel(train_X, train_Y, val_X, val_Y, model,
               lr=1e-2, epoch_num=20, logging_steps=5):
    # torch.cuda()
    # 使用 DataLoader 和 TensorDataset 批量加载数据

    b_size = 16

    train_set = TensorDataset(train_X, train_Y)
    train_loader = DataLoader(train_set, batch_size=b_size, shuffle=True, generator=torch.Generator(device=device))
    test_set = TensorDataset(val_X, val_Y)
    test_loader = DataLoader(test_set, batch_size=b_size, shuffle=False, generator=torch.Generator(device=device))

    # 初始化损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr)

    # 模型训练
    for epoch in tqdm(range(epoch_num)):
        loss = 0.0
        for data in train_loader:
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            loss += loss.item()

        # log
        if epoch % logging_steps == (logging_steps - 1):
            print(f"Epoch {epoch + 1}, loss: {loss / len(train_loader):.6f}")

    # 用验证集评估
    preds = []
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            preds.append(outputs)
    
    preds = torch.cat(preds, dim=0)
    loss = criterion(preds, val_Y)
    rmse = calc_rmse(preds, val_Y)

    return model, loss.item(), rmse.item()

### 2.4 启动训练

In [10]:
torch.set_default_tensor_type(torch.cuda.FloatTensor)
t = torch.tensor(1)
print(t.device)

cuda:0


In [11]:
df = pd.read_csv('data/METR-LA_p.csv')

In [12]:
# 全部可选模型
all_model_types = [
    LSTMPredictor,
    BiLSTMPredictor,
    GRUPredictor
]

In [13]:
# 划分训练集：验证集：测试集 == 6:2:2
def split_dataset_(df, window_size):
    seq, label = createSequence_(df, window_size)

    # train
    train_seq, test_seq = train_test_split(seq, test_size=0.4, random_state=seed)
    train_label, test_label = train_test_split(label, test_size=0.4, random_state=seed)

    # val & test
    val_seq, test_seq = train_test_split(test_seq, test_size=0.5, random_state=seed)
    val_label, test_label = train_test_split(test_label, test_size=0.5, random_state=seed)

    return train_seq, train_label, val_seq, val_label, test_seq, test_label

In [14]:
# 部分参数
window_size = 10
lr = 1e-4
epoch_num = 200
logging_steps = 10

In [15]:
print(torch.__version__)
print(torch.cuda.is_available())

1.13.0+cu116
True


In [16]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [17]:
best_model = None
min_rmse = 1e+5

train_seq, train_label, val_seq, val_label, test_seq, test_label = split_dataset_(df, window_size)
    # 遍历模型
for i in range(3):
    if i == 0:
        model = LSTMPredictor(train_seq.shape[2], 108, train_label.shape[1]).to(device)
        torch.save(model.state_dict(), 'model/LSTMPredictor.pt')
    elif i == 1:
        model = BiLSTMPredictor(train_seq.shape[2], 108, train_label.shape[1]).to(device)
        torch.save(model.state_dict(), 'model/BiLSTMPredictor.pt')
    else:
        model = GRUPredictor(train_seq.shape[2], 108, train_label.shape[1]).to(device)
        torch.save(model.state_dict(), 'model/GRU.pt')
            
    print(f'current model_type: {type(model)}', flush=True)
        
        # 训练
    
    model, loss, rmse = trainModel(train_seq, train_label, val_seq, val_label, model,
                                           lr, epoch_num, logging_steps)
        # 在（划分的）测试集上测试
    predictions = []
    test_set = TensorDataset(test_seq, test_label)
    test_loader = DataLoader(test_set, batch_size=32, shuffle=False, generator=torch.Generator(device=device))

    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            predictions.append(outputs)

    predictions = torch.cat(predictions, dim=0)
    test_rmse = calc_rmse(predictions, test_label)
    print(f'test rmse: {test_rmse:.5f}')
    print(f'=' * 80, end='\n\n', flush=True)

    # 记录最好的模型
    if test_rmse < min_rmse:
        min_rmse = test_rmse
        best_model = model

current model_type: <class '__main__.LSTMPredictor'>


  5%|▌         | 10/200 [00:42<12:32,  3.96s/it]

Epoch 10, loss: 0.257271


 10%|█         | 20/200 [01:24<12:56,  4.31s/it]

Epoch 20, loss: 0.208733


 15%|█▌        | 30/200 [02:07<12:09,  4.29s/it]

Epoch 30, loss: 0.179831


 20%|██        | 40/200 [02:51<11:30,  4.32s/it]

Epoch 40, loss: 0.177920


 25%|██▌       | 50/200 [03:37<11:32,  4.62s/it]

Epoch 50, loss: 0.096312


 30%|███       | 60/200 [04:22<10:18,  4.42s/it]

Epoch 60, loss: 0.099876


 35%|███▌      | 70/200 [05:07<09:41,  4.47s/it]

Epoch 70, loss: 0.087590


 40%|████      | 80/200 [05:51<08:56,  4.47s/it]

Epoch 80, loss: 0.164100


 45%|████▌     | 90/200 [06:38<08:39,  4.72s/it]

Epoch 90, loss: 0.076729


 50%|█████     | 100/200 [07:24<07:35,  4.55s/it]

Epoch 100, loss: 0.171698


 55%|█████▌    | 110/200 [08:09<06:40,  4.45s/it]

Epoch 110, loss: 0.052824


 60%|██████    | 120/200 [08:55<06:06,  4.58s/it]

Epoch 120, loss: 0.114022


 65%|██████▌   | 130/200 [09:41<05:14,  4.49s/it]

Epoch 130, loss: 0.064867


 70%|███████   | 140/200 [10:25<04:29,  4.48s/it]

Epoch 140, loss: 0.072450


 75%|███████▌  | 150/200 [11:13<03:57,  4.76s/it]

Epoch 150, loss: 0.105743


 80%|████████  | 160/200 [11:59<03:01,  4.54s/it]

Epoch 160, loss: 0.056122


 85%|████████▌ | 170/200 [12:45<02:18,  4.62s/it]

Epoch 170, loss: 0.073981


 90%|█████████ | 180/200 [13:27<01:19,  4.00s/it]

Epoch 180, loss: 0.060683


 95%|█████████▌| 190/200 [14:09<00:42,  4.23s/it]

Epoch 190, loss: 0.084422


100%|██████████| 200/200 [14:55<00:00,  4.48s/it]

Epoch 200, loss: 0.042333





test rmse: 5.71907

current model_type: <class '__main__.BiLSTMPredictor'>


  5%|▌         | 10/200 [00:57<18:05,  5.72s/it]

Epoch 10, loss: 0.199114


 10%|█         | 20/200 [01:55<17:27,  5.82s/it]

Epoch 20, loss: 0.173930


 15%|█▌        | 30/200 [02:52<16:03,  5.67s/it]

Epoch 30, loss: 0.157771


 20%|██        | 40/200 [03:47<14:25,  5.41s/it]

Epoch 40, loss: 0.134743


 25%|██▌       | 50/200 [04:41<13:29,  5.39s/it]

Epoch 50, loss: 0.072924


 30%|███       | 60/200 [05:37<12:50,  5.51s/it]

Epoch 60, loss: 0.079769


 35%|███▌      | 70/200 [06:32<12:42,  5.86s/it]

Epoch 70, loss: 0.054989


 40%|████      | 80/200 [07:33<11:41,  5.85s/it]

Epoch 80, loss: 0.113203


 45%|████▌     | 90/200 [08:30<10:31,  5.74s/it]

Epoch 90, loss: 0.054994


 50%|█████     | 100/200 [09:23<08:18,  4.98s/it]

Epoch 100, loss: 0.120506


 55%|█████▌    | 110/200 [10:08<06:37,  4.42s/it]

Epoch 110, loss: 0.038092


 60%|██████    | 120/200 [10:52<05:53,  4.42s/it]

Epoch 120, loss: 0.070803


 65%|██████▌   | 130/200 [11:37<05:10,  4.43s/it]

Epoch 130, loss: 0.040596


 70%|███████   | 140/200 [12:21<04:22,  4.37s/it]

Epoch 140, loss: 0.046534


 75%|███████▌  | 150/200 [13:06<03:41,  4.44s/it]

Epoch 150, loss: 0.065665


 80%|████████  | 160/200 [13:51<02:56,  4.42s/it]

Epoch 160, loss: 0.037773


 85%|████████▌ | 170/200 [14:36<02:13,  4.45s/it]

Epoch 170, loss: 0.036996


 90%|█████████ | 180/200 [15:20<01:28,  4.41s/it]

Epoch 180, loss: 0.025229


 95%|█████████▌| 190/200 [16:05<00:44,  4.40s/it]

Epoch 190, loss: 0.061037


100%|██████████| 200/200 [16:49<00:00,  5.05s/it]

Epoch 200, loss: 0.028545





test rmse: 4.75137

current model_type: <class '__main__.GRUPredictor'>


  5%|▌         | 10/200 [00:34<10:58,  3.46s/it]

Epoch 10, loss: 0.229876


 10%|█         | 20/200 [01:09<10:34,  3.53s/it]

Epoch 20, loss: 0.192013


 15%|█▌        | 30/200 [01:45<09:58,  3.52s/it]

Epoch 30, loss: 0.175683


 20%|██        | 40/200 [02:20<09:20,  3.50s/it]

Epoch 40, loss: 0.170545


 25%|██▌       | 50/200 [02:55<08:46,  3.51s/it]

Epoch 50, loss: 0.092568


 30%|███       | 60/200 [03:31<08:11,  3.51s/it]

Epoch 60, loss: 0.104318


 35%|███▌      | 70/200 [04:06<07:36,  3.51s/it]

Epoch 70, loss: 0.082700


 40%|████      | 80/200 [04:41<07:00,  3.50s/it]

Epoch 80, loss: 0.171681


 45%|████▌     | 90/200 [05:16<06:25,  3.51s/it]

Epoch 90, loss: 0.079906


 50%|█████     | 100/200 [05:52<05:49,  3.49s/it]

Epoch 100, loss: 0.176380


 55%|█████▌    | 110/200 [06:27<05:14,  3.50s/it]

Epoch 110, loss: 0.057245


 60%|██████    | 120/200 [07:02<04:40,  3.51s/it]

Epoch 120, loss: 0.121277


 65%|██████▌   | 130/200 [07:37<04:05,  3.51s/it]

Epoch 130, loss: 0.078781


 70%|███████   | 140/200 [08:13<03:30,  3.51s/it]

Epoch 140, loss: 0.088922


 75%|███████▌  | 150/200 [08:48<02:55,  3.51s/it]

Epoch 150, loss: 0.110351


 80%|████████  | 160/200 [09:23<02:20,  3.51s/it]

Epoch 160, loss: 0.066474


 85%|████████▌ | 170/200 [09:59<01:48,  3.62s/it]

Epoch 170, loss: 0.095524


 90%|█████████ | 180/200 [10:35<01:10,  3.52s/it]

Epoch 180, loss: 0.065313


 95%|█████████▌| 190/200 [11:10<00:35,  3.50s/it]

Epoch 190, loss: 0.099775


100%|██████████| 200/200 [11:46<00:00,  3.53s/it]

Epoch 200, loss: 0.052344





test rmse: 6.29479



In [18]:
print(f'best model: {best_model}')
print(f'min_rmse: {min_rmse:.6f}')

best model: BiLSTMPredictor(
  (lstm): LSTM(207, 108, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=216, out_features=207, bias=True)
)
min_rmse: 4.751367


### 2.5 在真实测试集上预测

In [19]:
df_real = pd.read_csv('data/jump_task.csv')

# 填充空值（便于统一读取，且采用forward fill，不会影响minmax）
df_real['coordinates'] = df_real['coordinates'].ffill()
df_real['current_dis'] = df_real['current_dis'].ffill()

df_real[['longitude', 'latitude']] = pd.DataFrame(
    df_real['coordinates'].apply(lambda x: eval(x)).tolist(), index=df_real.index
)
df_real.head()

Unnamed: 0,id,time,entity_id,traj_id,coordinates,current_dis,speeds,holidays,longitude,latitude
0,355,2013-10-08T08:30:00Z,256,25,"[116.324127,39.897049]",0.0,21.6075,0,116.324127,39.897049
1,356,2013-10-08T08:30:55Z,256,25,"[116.327652,39.897018]",0.300751,21.15,0,116.327652,39.897018
2,357,2013-10-08T08:32:44Z,256,25,"[116.330978,39.897041]",0.584521,20.4825,0,116.330978,39.897041
3,358,2013-10-08T08:34:32Z,256,25,"[116.336624,39.897305]",1.067123,20.6575,0,116.336624,39.897305
4,359,2013-10-08T08:35:25Z,256,25,"[116.341118,39.897537]",1.451388,24.07,0,116.341118,39.897537


In [20]:
# 划分轨迹
traj_index_list = df_real.groupby(['traj_id']).size()      # Series

traj_list = []
for index, length in traj_index_list.items():
    traj_id = index
    trajectory = df_real[(df_real['traj_id'] == traj_id)][best_features].values.tolist()
    traj_list.append(trajectory[:14])

traj_seq = torch.tensor(np.array(traj_list), dtype=torch.float32)
traj_seq.shape

NameError: name 'best_features' is not defined

In [None]:
# 预测
predictions = best_model(traj_seq)
predictions.shape

torch.Size([1959, 4])

In [None]:
# 写入新文件
df_pred = pd.read_csv('data/jump_task.csv')
windows_size = 15

for i, pred in enumerate(predictions):
    coordinates = f"[{pred[0].item():.6f},{pred[1].item():.6f}]"
    current_dis = pred[2].item()
    cur_line = (i + 1) * window_size - 1

    df_pred.loc[cur_line, 'coordinates'] = coordinates
    df_pred.loc[cur_line, 'current_dis'] = current_dis

df_pred.to_csv('data/jump_task-pred.csv', index=False)

## 3. 超参数调优

在我们的实验中，Optuna 简洁、高效，故仅展示 Optuna 自动调优

基于上面的实验结果，我们这里选取的模型特征组合是：**Bi-LSTM + holidays**

In [None]:
# !pip install -q optuna

import optuna

ModuleNotFoundError: No module named 'optuna'

In [None]:
# 读取数据
df = pd.read_csv('data/traj.csv')

# 1. 将时间time转为基于最早时间的偏移time_offset
df['time'] = pd.to_datetime(df['time'])
base_time = df['time'].min()
df['time_offset'] = (df['time'] - base_time).dt.total_seconds()

# 2. 将coordinates列转换为经度和纬度两列
df[['longitude', 'latitude']] = pd.DataFrame(df['coordinates'].apply(lambda x: eval(x)).tolist(), index=df.index)

# 3. 将holidays转为float32
df['holidays'] = df['holidays'].astype('float32')

In [None]:
# 划分训练集和验证集 = 8:2
features = ['longitude', 'latitude', 'current_dis', 'holidays']
X, y = createSequence_(df, features, window_size)
X_train, X_val = train_test_split(X, test_size=0.2, random_state=seed)
y_train, y_val = train_test_split(y, test_size=0.2, random_state=seed)

In [None]:
X_train.shape

torch.Size([163064, 14, 4])

In [None]:
def objective(trial):
    # 定义超参数搜索空间
    hidden_size = trial.suggest_categorical('hidden_size', [32, 48, 64, 96, 108, 128])
    lr = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    num_epochs = trial.suggest_int('num_epochs', 20, 400)

    model = BiLSTMPredictor(X_train.shape[2], hidden_size, y_train.shape[1]).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # 训练模型
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

    # 验证模型
    model.eval()
    preds = []
    with torch.no_grad():
        outputs = model(X_val)
        preds.append(outputs)
        preds = torch.cat(preds, dim=0)
        rmse = calc_rmse(preds, y_val)

    # 以 rmse 作为目标值
    return rmse.item()

In [None]:
study = optuna.create_study(direction='minimize')   # 目标最小化
study.optimize(objective, n_trials=400)

# 打印最佳参数和目标值
print('Best Parameters:', study.best_params)
print('Best Objective Value:', study.best_value)

In [None]:
# 部分参数
window_size = 15
lr = 1e-2
epoch_num = 200
logging_steps = 40

features = ['longitude', 'latitude', 'current_dis', 'holidays']
X, y = createSequence_(df, window_size)
model = BiLSTMPredictor(X.shape[2], 108, y.shape[1]).to(device)

train_set = TensorDataset(X, y)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, generator=torch.Generator(device=device))

# 初始化损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=lr)

# 模型训练
for epoch in tqdm(range(epoch_num)):
    loss = 0.0
    for data in train_loader:
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss += loss.item()

    # log
    if epoch % logging_steps == (logging_steps - 1):
        print(f"Epoch {epoch + 1}, loss: {loss / len(train_loader):.6f}")

 20%|██        | 40/200 [02:43<10:53,  4.08s/it]

Epoch 40, loss: 0.000146


 40%|████      | 80/200 [05:27<08:09,  4.08s/it]

Epoch 80, loss: 0.000948


 60%|██████    | 120/200 [08:11<05:27,  4.10s/it]

Epoch 120, loss: 0.000337


 80%|████████  | 160/200 [10:55<02:36,  3.90s/it]

Epoch 160, loss: 0.000203


100%|██████████| 200/200 [13:37<00:00,  4.09s/it]

Epoch 200, loss: 0.000248



