In [1]:
import pandas as pd

DATASET_PATH = '../train.csv'  # 完整的74万条数据
MINI_DATASET_PATH = './mini_train.csv'  # 74万条数据的前一万条数据

In [2]:
data = pd.read_csv(DATASET_PATH)
data

Unnamed: 0,holidays,time_period,cpath,WKT,cost,x1,y1,x2,y2,speed,road_cost_time
0,0,17,81232,"LINESTRING (116.3177242 39.9994668, 116.316748...",100.406686,116.317724,39.999467,116.316748,39.998962,240.080413,0.418221
1,0,17,12987,"LINESTRING (116.3167484 39.9989623, 116.317724...",100.406686,116.316748,39.998962,116.317724,39.999467,240.080413,0.418221
2,0,17,81235,"LINESTRING (116.3157202 39.9987527, 116.312787...",250.976954,116.315720,39.998753,116.312787,39.998608,265.487340,0.945344
3,0,18,67757,"LINESTRING (116.3076567 39.9572086, 116.307654...",59.536597,116.307657,39.957209,116.307655,39.956672,197.626817,0.301258
4,0,18,67743,"LINESTRING (116.3076546 39.9566724, 116.310289...",225.228398,116.307655,39.956672,116.310290,39.956718,197.626817,1.139665
...,...,...,...,...,...,...,...,...,...,...,...
741238,0,23,46394,"LINESTRING (116.3274908 39.800067, 116.3276872...",19.357983,116.327491,39.800067,116.327687,39.800153,287.408962,0.067353
741239,0,23,46411,"LINESTRING (116.3276272 39.8002128, 116.327490...",19.962984,116.327627,39.800213,116.327491,39.800067,751.141649,0.026577
741240,0,23,46394,"LINESTRING (116.3274908 39.800067, 116.3276872...",19.357983,116.327491,39.800067,116.327687,39.800153,751.141649,0.025771
741241,0,23,46411,"LINESTRING (116.3276272 39.8002128, 116.327490...",19.962984,116.327627,39.800213,116.327491,39.800067,488.938405,0.040829


In [3]:
def transfer_data(data):
    X = data.loc[:, ['holidays', 'time_period', 'cpath', 'cost', 'x1', 'y1', 'x2', 'y2', 'speed']]
    X = X.values
    y = data.loc[:, ['road_cost_time']]
    y = y.values
    return X, y

X, y = transfer_data(data=data)

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 随机抽选训练组与测试组


import torch
import torch.nn as nn
import numpy as np
# 转换为PyTorch的张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [5]:
# 定义模型参数
input_size = X.shape[1]  # 输入特征的维度
hidden_sizes = [10, 20, 10]  # 隐藏层大小列表
output_size = 1  # 输出维度
lr = 0.005  # 学习率

# 构建前馈神经网络模型
class FeedForwardNN(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(FeedForwardNN, self).__init__()
        layers = []
        input_dim = input_size
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(input_dim, hidden_size))
            layers.append(nn.BatchNorm1d(hidden_size))  # 添加批归一化层
            layers.append(nn.ReLU())  # 或者其他的激活函数
            input_dim = hidden_size
        layers.append(nn.Linear(hidden_sizes[-1], output_size))  # 输出层
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

In [6]:
import torch.optim as optim

# 实例化模型
model = FeedForwardNN(input_size, hidden_sizes, output_size)

# 定义损失函数和优化器
criterion = nn.MSELoss()  # 均方误差损失函数
optimizer = optim.Adam(model.parameters(), lr=lr)  # Adam优化器

In [7]:
# 训练模型
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.3482
Epoch [20/100], Loss: 0.2886
Epoch [30/100], Loss: 0.2188
Epoch [40/100], Loss: 0.1281
Epoch [50/100], Loss: 0.1004
Epoch [60/100], Loss: 0.0907
Epoch [70/100], Loss: 0.0806
Epoch [80/100], Loss: 0.0718
Epoch [90/100], Loss: 0.0629
Epoch [100/100], Loss: 0.0620


In [8]:
torch.save(model, 'time_predict_model.pth')

# 评估模型
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')

Test Loss: 0.0863


  "type " + obj.__name__ + ". It won't be checked "


In [9]:
data = pd.read_csv(DATASET_PATH)
data

Unnamed: 0,holidays,time_period,cpath,WKT,cost,x1,y1,x2,y2,speed,road_cost_time
0,0,17,81232,"LINESTRING (116.3177242 39.9994668, 116.316748...",100.406686,116.317724,39.999467,116.316748,39.998962,240.080413,0.418221
1,0,17,12987,"LINESTRING (116.3167484 39.9989623, 116.317724...",100.406686,116.316748,39.998962,116.317724,39.999467,240.080413,0.418221
2,0,17,81235,"LINESTRING (116.3157202 39.9987527, 116.312787...",250.976954,116.315720,39.998753,116.312787,39.998608,265.487340,0.945344
3,0,18,67757,"LINESTRING (116.3076567 39.9572086, 116.307654...",59.536597,116.307657,39.957209,116.307655,39.956672,197.626817,0.301258
4,0,18,67743,"LINESTRING (116.3076546 39.9566724, 116.310289...",225.228398,116.307655,39.956672,116.310290,39.956718,197.626817,1.139665
...,...,...,...,...,...,...,...,...,...,...,...
741238,0,23,46394,"LINESTRING (116.3274908 39.800067, 116.3276872...",19.357983,116.327491,39.800067,116.327687,39.800153,287.408962,0.067353
741239,0,23,46411,"LINESTRING (116.3276272 39.8002128, 116.327490...",19.962984,116.327627,39.800213,116.327491,39.800067,751.141649,0.026577
741240,0,23,46394,"LINESTRING (116.3274908 39.800067, 116.3276872...",19.357983,116.327491,39.800067,116.327687,39.800153,751.141649,0.025771
741241,0,23,46411,"LINESTRING (116.3276272 39.8002128, 116.327490...",19.962984,116.327627,39.800213,116.327491,39.800067,488.938405,0.040829


In [10]:
row = data.iloc[[626682]]
X, y = transfer_data(data=row)
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

# 关闭梯度计算以节省内存和提高速度
model.eval()
with torch.no_grad():
    # 使用模型进行预测
    prediction = model(X_tensor)
    
print("model prediction:" + str(prediction[0][0]))
print("ground truth:" + str(y_tensor[0]))

model prediction:tensor(0.0137)
ground truth:tensor([0.0442])
