In [11]:
# speed_matrix = r'E:\硕士毕业论文update250120\硕士毕业论文代码\车辆速度估计方法\2.速度转移概率矩阵\速度转移矩阵结果\2.无1133A的19辆车训练集'

import warnings
# 关闭警告提醒
warnings.filterwarnings("ignore")

import os
import time
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib import cm
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 显示所有列
pd.set_option('display.max_columns', None)

# matplotlib绘图字体设置
from matplotlib.font_manager import FontProperties  # 设置英文字体为Times New Roman
plt.rcParams['font.sans-serif'] = ['Times New Roman']
plt.rcParams['axes.unicode_minus'] = False  # 正常显示负号
plt.rcParams['font.size'] = 15  # 设置全局字体大小
chinese_font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc', size=15)  # 设置中文字体为宋体

# 调用函数

In [22]:
# 结合速度转移矩阵和规则约束优化的速度序列估计算法 调用函数

def simulate_next_speed_with_escape(isfinal, current_speed, road_type, transition_matrix, max_speed, low_speed_threshold=5, escape_speed=20):
    """
    根据速度转移矩阵预测下一时刻速度，并加入低速跳脱机制
    """
    num_states = transition_matrix.shape[0]
    current_index = min(int(current_speed / max_speed * (num_states - 1)), num_states - 1)  # 将实际速度映射到状态索引
    
    # 检查当前速度是否处于低速状态，并设置跳脱速度
    if isfinal == False:
        if road_type not in ["城市快速路", "城市快速路匝道"] and 0 < current_speed < low_speed_threshold:
            return current_speed + escape_speed
        if road_type in ["城市快速路", "城市快速路匝道"] and 0 < current_speed < 40:   # 对城市快速路降低跳脱条件
            return min(current_speed * 0.8 + 40, max_speed)  # 提高跳脱速度
    
    # 正常速度转移: 获取转移概率并确保有效
    next_probabilities = transition_matrix[current_index]
    if np.isnan(next_probabilities).any() or next_probabilities.sum() == 0:
        next_probabilities = np.full(len(next_probabilities), 1 / len(next_probabilities))  # 均匀分布
    else:
        next_probabilities = next_probabilities / next_probabilities.sum()  # 归一化
    
    # 按概率选择下一个状态索引
    next_index = np.random.choice(len(next_probabilities), p=next_probabilities)
    # 将状态索引映射到实际速度
    potential_speed = next_index / (num_states - 1) * max_speed
    
    # 限制高速公路和快速路不允许在高速区间速度骤降
    if road_type not in ["高速公路", "城市快速路", "高速公路匝道", "城市快速路匝道"]:
        return potential_speed
    else:
        iteration = 0
        while current_speed > 80 and (potential_speed - current_speed) < -25:
            # 按概率选择下一个状态索引
            next_index = np.random.choice(len(next_probabilities), p=next_probabilities)
            # 将状态索引映射到实际速度
            potential_speed = next_index / (num_states - 1) * max_speed
            iteration += 1
            # 避免长时间循环
            if iteration > 100:
                return min(current_speed + 5, max_speed)
        return potential_speed
    
    
def smooth_speed_sequence(speed_sequence, alpha=0.7):
    """
    使用指数平滑法平滑速度序列
    """
    smoothed_sequence = [speed_sequence[0]]
    for i in range(1, len(speed_sequence)):
        smoothed_sequence.append(alpha * smoothed_sequence[-1] + (1 - alpha) * speed_sequence[i])
    return smoothed_sequence


def generate_speed_sequence(road_distance, road_type, time_steps, transition_matrix, start_speed, isfirst, isfinal, time_interval=2, max_speed=150):
    """
    根据转移矩阵生成速度序列，加入低速跳脱机制和动态调整
    :param road_distance: 当前子行程距离 (米)
    :param road_type: 当前子行程的道路类型
    :param time_steps: 当前子行程对应的时间步数
    :param transition_matrix: 对应道路类型的速度转移矩阵
    :param start_speed: 起点速度 (km/h)
    :param isfirst: 是否第一个子行程
    :param isfinal: 是否最后一个子行程
    :param time_interval: 时间步长 (秒)
    :param max_speed: 最大速度 (km/h)
    """
    # 初始化
    current_speed = start_speed
    speed_sequence = []
    segment_total_distance = 0
    
    # 第一个子行程的速度从0开始
    if isfirst:
        speed_sequence.append(start_speed)
    
    # print('road_distance:', road_distance)
    while segment_total_distance < road_distance:
        
        # 马尔科夫蒙特卡洛算法查询下一时刻速度
        next_speed = simulate_next_speed_with_escape(isfinal, current_speed, road_type, transition_matrix, max_speed)
        
        # 在最后一个子行程的尾部增加减速机制
        if isfinal and (road_distance - segment_total_distance) < min(road_distance * 0.1, 50):
            if current_speed > 80:
                next_speed = max(0, current_speed - 40)
            else:
                next_speed = max(0, current_speed - 20)
            
            # 速度减速为0，则停止子行程的速度估计，避免陷入死循环
            if next_speed == 0:
                speed_sequence.append(next_speed)
                segment_total_distance = sum(speed_sequence) * time_interval / 3.6
                break
            
        # print('next_speed:', next_speed)
        speed_sequence.append(next_speed)
        segment_total_distance = sum(speed_sequence) * time_interval / 3.6
        current_speed = next_speed
        
    # 指数平滑速度序列
    # print('speed_sequence:', speed_sequence)
    speed_sequence = smooth_speed_sequence(speed_sequence)
    
    return speed_sequence


def dynamic_speed_adjustment(speed_sequence, target_distance, start_distance, time_interval):
    """动态调整速度序列以接近目标行驶距离"""
    
    # 计算当前距离误差
    current_distance = sum(speed_sequence) * time_interval / 3.6
    distance_error = target_distance - current_distance
    # print('distance_error:', distance_error)

    # 根据距离误差调整速度
    adjustment = (distance_error / 1000) / (len(speed_sequence) * time_interval / 3600)  # 将距离误差平均分到每一时间间隔的速度调整
    adjusted_speed_sequence = np.array([max(0, s + adjustment) for s in speed_sequence])
    
    # 返回调整之后的累积距离
    adjusted_cumulative_distance = []
    for i in range(len(adjusted_speed_sequence)):
        adjusted_cumulative_distance.append(adjusted_speed_sequence[i] / 3.6 * time_interval + start_distance)
        start_distance = adjusted_cumulative_distance[-1]
    
    # 返回调整之后的速度序列和距离误差
    adjusted_distance = sum(adjusted_speed_sequence) * time_interval / 3.6
    adjusted_error = abs(target_distance - adjusted_distance)
    # print('after adjusted distance_error:', adjusted_error)

    return adjusted_speed_sequence, adjusted_cumulative_distance, adjusted_distance, adjusted_error


def predict_entire_trip(total_time, road_infos, matrix_path, time_interval=2, max_speed=150):
    """
    根据多段道路类型和距离预测整段行程的速度序列
    :param total_time: 实际总行程时间（秒）
    :param road_infos: [[道路类型序列], [距离序列], [时间序列], [空间序列]]
    :param total_time: 总时间（秒）
    :param avg_speed: 平均速度（km/h）
    :param time_interval: 时间步长（秒）
    :param max_speed: 最大速度（km/h）
    """
    road_types, distances, times, isurbans = road_infos
    num_steps = total_time // time_interval  # 总时间步数
    total_sequence = []
    cumulative_distance = [0]
    total_error = 0
    total_distance = 0
    
    segment_roadtype = [] 
    segment_error = []

    start_speed = 0  # 初始速度为0
    for i, (road_type, road_distance, time, isurban) in enumerate(zip(road_types, distances, times, isurbans)):
        # print('road_type:', road_type)
        
        # 如果子行程行驶距离为0，则跳过
        if road_distance <= 0:
            continue
        
        # 分配时间步
        segment_time_steps = int((road_distance / sum(distances)) * num_steps)
        
        # 获取当前时间、空间、道路类型的速度转移矩阵
        matrix_name = f"{time}+{'Urban' if isurban else 'Non-Urban'}+{road_type}.csv"
        segment_transition_matrix = pd.read_csv(matrix_path + "/" + matrix_name).iloc[:, 1:].values
        
        # 调整概率转移矩阵
        for n, row in enumerate(segment_transition_matrix):
            # 替换全零行：赋予相邻状态较高概率
            if row.sum() == 0:
                segment_transition_matrix[n, max(0, n - 1): min(segment_transition_matrix.shape[1], n + 2)] = 1
                segment_transition_matrix[n] /= segment_transition_matrix[n].sum()
            
            # 修改在不同场景下的转移矩阵高速、低速偏好
            # 时间段
            
            if time == "low_peak":
                if n > len(row) * 0.7:  
                    row[n:] *= 1.3   # 增加高速区间权重
            elif time == "high_peak":
                if n < len(row) * 0.5:
                    row[:n] *= 1.2   # 增加低速区间权重
            # 空间
            if isurban == False:
                if n > len(row) * 0.7:  
                    row[n:] *= 1.15  # 增加高速区间权重
            # 道路类型
            if road_type == "高速公路" or road_type == "高速公路匝道":
                if n > len(row) * 0.7:
                    row[n:] *= 1.3
            elif road_type == "城市快速路" or road_type == "城市快速路匝道":
                if n > len(row) * 0.5:
                    row[n:] *= 1.5  # 增加高速区间权重
                elif n < len(row) * 0.2:
                    row[:n] *= 0.8  # 减少低速区间权重
            elif road_type == "主干道" and n > len(row) * 0.5:
                row[n:] *= 1.1
            
            
        # 判断是否为第一个/最后一个子行程
        isfirst = i == 0
        isfinal = i == len(road_types) - 1
        
        # 为当前子行程生成速度序列
        speed_sequence = generate_speed_sequence(road_distance=road_distance, 
                                                 road_type=road_type,
                                                 time_steps=segment_time_steps,
                                                 transition_matrix=segment_transition_matrix,
                                                 start_speed=start_speed,
                                                 isfirst=isfirst,
                                                 isfinal=isfinal,
                                                 time_interval=time_interval,
                                                 max_speed=max_speed
                                                )
        # print('speed sequence:', speed_sequence)
        
        # 补偿距离误差
        adjusted_speed_sequence, adjusted_cumulative_distance, predicted_distance, predicted_error = dynamic_speed_adjustment(speed_sequence, road_distance, cumulative_distance[-1], time_interval)
        # print('adjusted_speed_sequence', adjusted_speed_sequence)
        
        # 累计速度序列
        total_sequence.extend(adjusted_speed_sequence)
        cumulative_distance.extend(adjusted_cumulative_distance)
        total_error += predicted_error
        total_distance += predicted_distance
        
        segment_roadtype.append(road_type)
        segment_error.append(predicted_error)

        # 更新起点速度（逐步减速到接近0）
        start_speed = max(0, adjusted_speed_sequence[-1] - 5)

    # 返回完整速度序列及误差
    cumulative_distance = cumulative_distance[1:]
    # 结束速度为0
    total_sequence[-1] = 0
    return total_sequence, cumulative_distance, total_distance, total_error, segment_roadtype, segment_error

In [21]:
# 按照时间顺序统计不同道路类型的行驶距离(m)、时间（高峰/平峰/低峰）、空间（主城区/非主城区）

def input_data(df):
    """
    构造输入数据
    :param df: 测试路线数据
    """
    
    # 提取经过的道路类型
    type_array = df['road_type'].drop_duplicates().tolist()
    
    type_list = []
    time_list = []
    space_list = []
    length_list = []
    
    if len(type_array) <= 1:
        type_list.append(df.iloc[0]['road_type'])
        time_list.append(df.iloc[0]['time_period'])
        space_list.append(df.iloc[0]['in_urban_area'])
        length_list.append(df.iloc[-1]['c_distance'])
        return [type_list, length_list, time_list, space_list] 
        
    # 遍历出行序列
    start_point = 0
    for i in range(1, len(df)):
        if i < (len(df)-1):
            if df.iloc[i]['road_type'] != df.iloc[i-1]['road_type']:
                type_list.append(df.iloc[i-1]['road_type'])
                time_list.append(df.iloc[i-1]['time_period'])
                space_list.append(df.iloc[i-1]['in_urban_area'])
                length_list.append(sum(df.iloc[start_point:i]['distance']))
                start_point = i
        else:
            if df.iloc[i]['road_type'] != df.iloc[i-1]['road_type']:
                type_list.append(df.iloc[i]['road_type'])
                time_list.append(df.iloc[i]['time_period'])
                space_list.append(df.iloc[i]['in_urban_area'])
                length_list.append(df.iloc[i]['distance'])
            else:
                type_list.append(df.iloc[i-1]['road_type'])
                time_list.append(df.iloc[i-1]['time_period'])
                space_list.append(df.iloc[i-1]['in_urban_area'])
                length_list.append(sum(df.iloc[start_point:i+1]['distance']))
            
    # 输出: 不同道路类型的行驶距离(m)、时间（高峰/平峰/低峰）、空间（主城区/非主城区）
    output = [type_list, length_list, time_list, space_list]  
    
    return output

In [23]:
# 将速度估计结果匹配至原始数据

def match_predspeed_truedf(pred_df, true_df):
    """原始路径数据：匹配速度预测值"""
    
    # 确保数据按 c_distance 排序
    pred_df = pred_df.sort_values(by="c_distance").reset_index(drop=True)
    true_df = true_df.sort_values(by="c_distance").reset_index(drop=True)
    
    # 初始化列表存储匹配值
    matched_pred_speed = []
    
    # 遍历 true_df 中的每个点，寻找 pred_df 中最近的 c_distance
    for _, row in true_df.iterrows():
        c_distance = row["c_distance"]
        
        # 根据 c_distance 选择距离最近的估计速度
        distance_diff = (pred_df['c_distance'] - c_distance).abs()
        closest_index = distance_diff.idxmin()
        pred_speed = pred_df.loc[closest_index, 'speed_kmh']
        matched_pred_speed.append(pred_speed)
        
    true_df['predict_speed'] = matched_pred_speed
    
    return true_df


def categorize_time_period(time):
    """标记时间段（高峰期、平峰期、低峰期）"""
    if time.hour in range(7, 9) or time.hour in range(17, 19):  # 高峰期
        return 'high_peak'
    elif time.hour in range(22, 24) or time.hour in range(0, 7):  # 低峰期
        return 'low_peak'
    else:
        return 'off_peak'  # 平峰期


# 将原始数据的路网节点及其信息匹配至速度估计列表
def match_predspeed_preddf(pred_df, truedf_with_predspeed):
    """速度估计数据：匹配原始路径中的路网节点及其信息"""
    
    # 确保数据按 c_distance 排序
    pred_df = pred_df.sort_values(by="c_distance").reset_index(drop=True)
    true_df = truedf_with_predspeed.sort_values(by="c_distance").reset_index(drop=True)
    
    start_time = true_df.at[0, 'time']
    end_time = true_df.at[len(true_df)-1, 'time']
    total_dist = true_df .iloc[-1]['c_distance'] - true_df.iloc[0]['c_distance']
    total_travel = (end_time - start_time).total_seconds()
    current_time = start_time
    current_edge = true_df.at[0, 'edge_index']
    current_road_type = true_df.at[0, 'road_type']
    
    # 添加车辆id、出行id、日期等固定信息
    pred_df['device_id'] = true_df.at[0, 'device_id']
    pred_df['travel_id'] = true_df.at[0, 'travel_id']
    pred_df['date'] = true_df.at[0, 'date']
    
    # 匹配路网节点信息
    pred_df.rename(columns={'speed_kmh': 'predict_speed'}, inplace=True)
    true_df = true_df[['predict_speed', 'posname', 'longitude', 'latitude', 'edge_index', 'road_type']]
    pred_df = pd.merge(pred_df, true_df, on='predict_speed', how='left')
    
    # 初始化列表存储匹配值
    time_array = []
    distance_array = []
    edge_index_array = []
    road_type_array = []
    
    # 遍历 pred_df 中的每个速度点
    for i, row in pred_df.iterrows():
        
        # 计算每一速度点对应的行驶距离，单位m
        if i > 0:
            distance = pred_df.at[i, 'c_distance'] - pred_df.at[i-1, 'c_distance']
        else:
            distance = 0
            
        # 计算每一速度点对应的时刻
        rate = distance / total_dist  # 根据距离比，等比计算时间差(s)
        trange = round(total_travel * rate)
        current_time = current_time + datetime.timedelta(seconds=trange)
        
        # 查询每一速度点对应的连接边索引和道路类型
        if pd.isnull(pred_df['edge_index'][i]):
            edge_index = current_edge
            road_type = current_road_type
        else:
            edge_index = pred_df['edge_index'][i]
            current_edge = pred_df['edge_index'][i]
            road_type = pred_df['road_type'][i]
            current_road_type = pred_df['road_type'][i]
        
        # 保存结果
        time_array.append(current_time)
        distance_array.append(distance)
        edge_index_array.append(edge_index)
        road_type_array.append(road_type)
        
    # dataframe新增字段
    
    pred_df['time'] = time_array
    pred_df['distance'] = distance_array
    pred_df['edge_index'] = edge_index_array
    pred_df['road_type'] = road_type_array
    pred_df['time_period'] = pred_df['time'].apply(categorize_time_period)
    pred_df['in_urban_area'] = True
    
    # 根据 time 去重
    pred_unique = pred_df.drop_duplicates(subset='time', keep='first')
    pred_unique['edge_index'] = pred_unique['edge_index'].astype(int)
    pred_unique.reset_index(drop=True, inplace=True)
    
    pred_unique = pred_unique[['device_id', 'travel_id', 'date', 'time', 'predict_speed', 'distance', 'c_distance', 
                               'edge_index', 'road_type', 'time_period', 'in_urban_area', 
                               'posname', 'longitude', 'latitude']]
    
    return pred_unique

In [12]:
# 速度转移矩阵的存储路径
speed_matrix_paths = r'C:\01 毕业论文\7.论文代码和结果\硕士毕业论文代码\车辆速度估计方法\2.速度转移概率矩阵\速度转移矩阵结果\2.无1133A的19辆车训练集'

# 测试数据
test_data = pd.read_csv(r'C:\01 毕业论文\7.论文代码和结果\硕士毕业论文代码\应用案例分析\车辆速度估计\study_data.csv', encoding='gbk')
test_data['time'] = pd.to_datetime(test_data['time'])
total_travel = test_data['travel_id'].drop_duplicates().tolist()

print('出行次数:', len(total_travel))
print('出行数据记录:', len(test_data))
test_data.head(3)

# 速度估计

In [13]:
# 保存所有行程的出行信息
travel_ids = []
device_ids = []
real_total_times = []
real_record_numbers = []
pred_speed_lengths = []
real_total_distances = []
pred_total_distances = []
distance_errors = []
real_mean_speeds = []
pred_mean_speeds = []
speed_errors = []

# 保存所有行程的速度估计结果
# 对应路网节点
device_id_1 = []
travel_id_1 = []
date_1 = []
time_1 = []
bayonetname_1 = []
posname_1 = []
longitude_1 = []
latitude_1 = []
distance_1 = []
c_distance_1 = []
edge_length_1 = []
edge_index_1 = []
time_period_1 = []
road_type_1 = []
in_urban_area_1 = []
predict_speed_1 = []

# 对应估计速度点，并根据 time 去重
device_id_2 = []
travel_id_2 = []
date_2 = []
time_2 = []
posname_2 = []
longitude_2 = []
latitude_2 = []
distance_2 = []
c_distance_2 = []
edge_index_2 = []
time_period_2 = []
road_type_2 = []
in_urban_area_2 = []
predict_speed_2 = []

# test_data_speed = pd.DataFrame(columns=['device_id', 'travel_id', 'date', 'time', 'bayonetname', 'posname', 'longitude', 'latitude', 
#                                         'distance', 'c_distance', 'edge_length', 'edge_index', 'time_period', 'road_type', 
#                                         'in_urban_area', 'predict_speed'])
# predict_speed_result = pd.DataFrame(columns=['device_id', 'travel_id', 'date', 'time', 'predict_speed', 'distance', 'c_distance', 
#                                              'edge_index', 'road_type', 'time_period', 'in_urban_area', 
#                                              'posname', 'longitude', 'latitude'])

# save_file = r'E:\硕士毕业论文update250120\案例分析\数据\速度估计结果' 
# test_data_speed = pd.read_csv(save_file + '/'+ 'test_data_speed.csv', encoding='gbk')
# predict_speed_result= pd.read_csv(save_file + '/'+ 'predict_speed_result.csv', encoding='gbk')

In [14]:
ind = len(total_travel)
start = time.perf_counter()
for i in range(0, ind):
    
    # 路径id
    idx = total_travel[i]
    
    # 选取测试路线
    # print('travel =', idx)
    test_path = test_data[test_data['travel_id'] == idx]
    test_path.reset_index(drop=True, inplace=True)
    
    #  计算实际行程信息
    real_total_time = (test_path.iloc[-1]['time'] - test_path.iloc[0]['time']).seconds  # 单位:s
    real_total_distance = round((test_path.iloc[-1]['c_distance'] - test_path.iloc[0]['c_distance']), 2)  # 单位:m
    real_record_number = len(test_path)
    real_mean_speed = round(real_total_distance / real_total_time * 3.6, 2) # 单位:km/h
    
    # 如果总行程距离为0，则跳过
    if real_total_distance == 0:
        continue

    # 生成速度估计模型的输入数据
    road_infos = input_data(test_path)
    
    # 如果根据道路类型分段的总行程距离为0，则跳过
    if sum(road_infos[1]) == 0:
        continue

    # 运行速度估计模型
    speed_sequence, cumulative_distance, total_distance, total_error, segment_roadtype, segment_error = predict_entire_trip(real_total_time, road_infos, speed_matrix_paths, time_interval=2, max_speed=150)

    # 汇总2s时间间隔的累积距离和速度
    pre_speed_data = {'c_distance': cumulative_distance, 'speed_kmh': speed_sequence}
    pre_speed_data = pd.DataFrame(pre_speed_data)

    # 原始数据匹配估计速度，根据路网节点对应
    speed_result = match_predspeed_truedf(pre_speed_data, test_path)
    # 速度估计结果匹配路网节点，根据每一速度点对应（时间间隔为1~5s）
    speed_result_2s = match_predspeed_preddf(pre_speed_data, speed_result)

    # 计算误差
    pred_speed_sequence_length = len(speed_result_2s)
    pred_total_distance = round(total_distance, 2)
    pred_mean_speed = round(np.mean(speed_sequence), 2)
    distance_error = round(total_error, 2)
    mean_speed_error = round(pred_mean_speed - real_mean_speed, 2)
    
    # 保存出行信息和误差
    travel_ids.append(idx)
    device_ids.append(test_path.loc[0, 'device_id'])
    real_total_times.append(real_total_time)
    real_record_numbers.append(real_record_number)
    pred_speed_lengths.append(pred_speed_sequence_length)
    real_total_distances.append(real_total_distance)
    pred_total_distances.append(pred_total_distance)
    distance_errors.append(distance_error)
    real_mean_speeds.append(real_mean_speed)
    pred_mean_speeds.append(pred_mean_speed)
    speed_errors.append(mean_speed_error)
    
    # 保存匹配好的速度估计结果
    # test_data_speed = pd.concat([test_data_speed, speed_result], ignore_index=True)
    # predict_speed_result = pd.concat([predict_speed_result, speed_result_2s], ignore_index=True)
    device_id_1.extend(speed_result['device_id'].tolist())
    travel_id_1.extend(speed_result['travel_id'].tolist())
    date_1.extend(speed_result['date'].tolist())
    time_1.extend(speed_result['time'].tolist())
    bayonetname_1.extend(speed_result['bayonetname'].tolist())
    posname_1.extend(speed_result['posname'].tolist())
    longitude_1.extend(speed_result['longitude'].tolist())
    latitude_1.extend(speed_result['latitude'].tolist())
    distance_1.extend(speed_result['distance'].tolist())
    c_distance_1.extend(speed_result['c_distance'].tolist())
    edge_length_1.extend(speed_result['edge_length'].tolist())
    edge_index_1.extend(speed_result['edge_index'].tolist())
    time_period_1.extend(speed_result['time_period'].tolist())
    road_type_1.extend(speed_result['road_type'].tolist())
    in_urban_area_1.extend(speed_result['in_urban_area'].tolist())
    predict_speed_1.extend(speed_result['predict_speed'].tolist())
    # 对应估计速度点，并根据 time 去重
    device_id_2.extend(speed_result_2s['device_id'].tolist())
    travel_id_2.extend(speed_result_2s['travel_id'].tolist())
    date_2.extend(speed_result_2s['date'].tolist())
    time_2.extend(speed_result_2s['time'].tolist())
    posname_2.extend(speed_result_2s['posname'].tolist())
    longitude_2.extend(speed_result_2s['longitude'].tolist())
    latitude_2.extend(speed_result_2s['latitude'].tolist())
    distance_2.extend(speed_result_2s['distance'].tolist())
    c_distance_2.extend(speed_result_2s['c_distance'].tolist())
    edge_index_2.extend(speed_result_2s['edge_index'].tolist())
    time_period_2.extend(speed_result_2s['time_period'].tolist())
    road_type_2.extend(speed_result_2s['road_type'].tolist())
    in_urban_area_2.extend(speed_result_2s['in_urban_area'].tolist())
    predict_speed_2.extend(speed_result_2s['predict_speed'].tolist())
    
    # 定期打印结果
    if i in np.arange(0, 40001, 5000):
        end = time.perf_counter()
        opt = end - start
        print(f'第{i}条路径 travel_id = {idx} 速度估计完成  运行时间(h): {round(opt/3600, 2)}')
        print('路网节点:', real_record_number, 
              ' *总行程时间(s):', real_total_time, 
              ' *总行程距离(m):', real_total_distance, 
              ' *平均速度(km/h):', real_mean_speed)
        print("速度序列:", pred_speed_sequence_length, 
              f" *估计行驶距离(m): {pred_total_distance}", "相对误差: {:.2%}".format(distance_error / real_total_distance),
              f" *估计平均速度(km/h): {pred_mean_speed}", "相对误差: {:.2%}".format(mean_speed_error / real_mean_speed))
        print(f"停车时刻:  pred={speed_result_2s.loc[len(speed_result_2s)-1, 'time']}  true={test_path.loc[len(test_path)-1, 'time']}")

print(1)
print(f"已运行至: {i}")
end = time.perf_counter()
opt = end - start
print("CPU Time:", round(opt / 3600, 2), "h")

In [15]:
# 创建dataframe
test_data_result = {'travel_id': travel_ids, 'device_id': device_ids, 'real_total_time': real_total_times, 
                    'real_record_number': real_record_numbers, 'pred_speed_length': pred_speed_lengths, 
                    'real_total_distance': real_total_distances, 'pred_total_distance': pred_total_distances, 'distance_error': distance_errors, 
                    'real_mean_speed': real_mean_speeds, 'pred_mean_speed': pred_mean_speeds, 'speed_error': speed_errors
                   }
test_data_result = pd.DataFrame(test_data_result)

test_data_speed = {'device_id': device_id_1, 'travel_id': travel_id_1, 'date': date_1, 'time': time_1, 
                   'bayonetname': bayonetname_1, 'posname': posname_1, 'longitude': longitude_1, 'latitude': latitude_1, 
                   'distance': distance_1, 'c_distance': c_distance_1, 'edge_length': edge_length_1, 'edge_index': edge_index_1, 
                   'time_period': time_period_1, 'road_type': road_type_1, 'in_urban_area': in_urban_area_1, 
                   'predict_speed': predict_speed_1}
test_data_speed = pd.DataFrame(test_data_speed)

predict_speed_result = {'device_id': device_id_2, 'travel_id': travel_id_2, 'date': date_2, 'time': time_2, 
                        'predict_speed': predict_speed_2, 'distance': distance_2, 'c_distance': c_distance_2, 
                        'edge_index': edge_index_2, 'road_type': road_type_2, 'time_period': time_period_2, 
                        'in_urban_area': in_urban_area_2, 'posname': posname_2, 'longitude': longitude_2, 'latitude': latitude_2}
predict_speed_result = pd.DataFrame(predict_speed_result)

print('单次出行:', len(test_data_result))
print('单次出行数据:', len(test_data_speed))
print('速度估计点数据:', len(predict_speed_result))

In [16]:
# 删除平均速度为inf的路径数据

abnormal_id = test_data_result[test_data_result['real_mean_speed'] > 60]['travel_id'].values.tolist()
print('平均速度>60km/h的出行数:', len(abnormal_id), '/', len(test_data_result), 
      '占比:{:.2%}'.format(len(abnormal_id) / len(test_data_result)))

test_data_result = test_data_result.drop(test_data_result[test_data_result['travel_id'].isin(abnormal_id)].index)
test_data_speed = test_data_speed.drop(test_data_speed[test_data_speed['travel_id'].isin(abnormal_id)].index)
predict_speed_result = predict_speed_result.drop(predict_speed_result[predict_speed_result['travel_id'].isin(abnormal_id)].index)
test_data_result.reset_index(drop=True, inplace=True)
test_data_speed.reset_index(drop=True, inplace=True)
predict_speed_result.reset_index(drop=True, inplace=True)

print(f"速度估计成功的单次出行: {len(test_data_result)} / {len(total_travel)}", 
      "占比: {:.2%}".format(len(test_data_result) / len(total_travel)))
print(f"速度估计成功的单次出行数据: {len(test_data_speed)} / {len(test_data)}", 
      "占比: {:.2%}".format(len(test_data_speed) / len(test_data)))
print('速度估计点数据:', len(predict_speed_result))

In [17]:
# 统计速度估计成功的出行指标
print('出行车辆:', len(test_data_result['device_id'].drop_duplicates()))
print('单次出现经过的平均路网节点数量:', round(np.mean(test_data_result['real_record_number'])), 
      round(np.min(test_data_result['real_record_number'])), '-', round(np.max(test_data_result['real_record_number'])))
print('平均实际行程时间(min):', round(np.mean(test_data_result['real_total_time']) / 60), 
      round(np.min(test_data_result['real_total_time']) / 60), '-', round(np.max(test_data_result['real_total_time']) / 60))
print('平均实际行驶距离(m):', round(np.mean(test_data_result['real_total_distance']), 2),
      round(np.min(test_data_result['real_total_distance']), 2), '-', round(np.max(test_data_result['real_total_distance']), 2), 
      '绝对误差:', round(np.mean(test_data_result['distance_error']), 2))
print('平均速度(km/h):', round(np.mean(test_data_result['real_mean_speed']), 2), 
      round(np.min(test_data_result['real_mean_speed']), 2), '-', round(np.max(test_data_result['real_mean_speed']), 2), 
      '绝对误差:', round(np.mean(test_data_result['speed_error']), 2))

In [19]:
# 保存数据
save_file = r'C:\01 毕业论文\7.论文代码和结果\案例分析\数据\速度估计结果' 
test_data_result.to_csv(save_file + '/'+ 'path_error.csv', index=False, encoding='gbk')
test_data_speed.to_csv(save_file + '/'+ 'test_data_speed.csv', index=False, encoding='gbk')
predict_speed_result.to_csv(save_file + '/'+ 'predict_speed_result.csv', index=False, encoding='gbk')

In [20]:
predict_speed_result.head(3)

# 手动单次运算

In [24]:
# 选取测试路线
k = 1340
idx = total_travel[k]
test_path = test_data[test_data['travel_id'] == idx]
test_path.reset_index(drop=True, inplace=True)

# 统计行程信息
real_total_time = (test_path.iloc[-1]['time'] - test_path.iloc[0]['time']).seconds  # 单位:s
real_total_distance = round((test_path.iloc[-1]['c_distance'] - test_path.iloc[0]['c_distance']), 2)  # 单位:m
real_record_number = len(test_path)
real_mean_speed = round(real_total_distance / real_total_time * 3.6, 2) # 单位:km/h

# 生成输入数据
road_infos = input_data(test_path)

print(f"travel_id = {idx}")
print('路网节点:', real_record_number, 
      ' *总行程时间(s):', real_total_time, 
      ' *总行程距离(m):', real_total_distance, 'vs.', round(sum(road_infos[1]),2), 
      ' *平均速度(km/h):', real_mean_speed)


# 运行速度估计算法
speed_sequence, cumulative_distance, total_distance, total_error, segment_roadtype, segment_error = predict_entire_trip(real_total_time, road_infos, speed_matrix_paths, time_interval=2, max_speed=150)

# 汇总每一时间间隔的累积距离和速度
pre_speed_data = {'c_distance': cumulative_distance, 'speed_kmh': speed_sequence}
pre_speed_data = pd.DataFrame(pre_speed_data)

# 原始数据匹配估计速度，根据路网节点对应
speed_result = match_predspeed_truedf(pre_speed_data, test_path)
# 速度估计结果匹配路网节点，根据每一速度点对应（时间间隔为2s）
speed_result_2s = match_predspeed_preddf(pre_speed_data, speed_result)

# 计算误差
pred_speed_sequence_length = len(speed_result_2s)
pred_total_distance = round(total_distance, 2)
pred_mean_speed = round(np.mean(speed_sequence), 2)
distance_error = round(total_error, 2)
mean_speed_error = round(pred_mean_speed - real_mean_speed, 2)

print("速度序列:", pred_speed_sequence_length, 
      f" *估计行驶距离(m): {pred_total_distance}", "相对误差: {:.2%}".format(distance_error / real_total_distance, 4),
      f" *估计平均速度(km/h): {pred_mean_speed}", "相对误差: {:.2%}".format(mean_speed_error / real_mean_speed))
print(f"end_time:  pred={speed_result_2s.loc[len(speed_result_2s)-1, 'time']}  true={test_path.loc[len(test_path)-1, 'time']}")

# 绘制速度估计曲线
plt.figure(figsize=(5, 2))
plt.plot(speed_result['c_distance'], speed_result['predict_speed'])
plt.plot(speed_result['c_distance'], [real_mean_speed]*len(speed_result), '--', alpha=0.8, label='real meanspeed')
plt.plot(speed_result['c_distance'], [pred_mean_speed]*len(speed_result), '--', alpha=0.8, label='pred meanspeed')
plt.xlabel('distance (m)')
plt.ylabel('speed (km/h)')
plt.yticks(np.arange(0, 121, 40))
plt.legend(loc='upper right', frameon=False)
plt.show()

# 打印数据示例
speed_result_2s.head(3)