# Preprocess data

In [9]:
import json

def load_and_convert_data(input_file):
    vehicles_data = {}

    with open(input_file, 'r') as f:
        for line in f:
            obj = json.loads(line.strip())
            vehicle_id = obj['vehicle_id']
            frame_ids = obj['frame_id']
            timestamps = [ts / 1000 for ts in obj['timestamp']]  # 毫秒转秒
            local_xs = [x * 0.3048 for x in obj['local_x']]  # 英尺转米
            local_ys = [y * 0.3048 for y in obj['local_y']]  # 英尺转米
            lane_ids = obj['lane_id']
            precedings = obj['preceding']
            followings = obj['following']
            v_length = obj['v_length'] * 0.3048

            vehicles_data[vehicle_id] = {
                'frame_id': frame_ids,
                'timestamp': timestamps,
                'local_x': local_xs,
                'local_y': local_ys,
                'lane_id': lane_ids,
                'preceding': precedings,
                'following': followings,
                'v_length': v_length
            }

    return vehicles_data

input_file = '../../trajectories-0400-0415.json'
vehicles_data = load_and_convert_data(input_file)
print("Load complete!")

Load complete!


# Find lane_change event

In [10]:
def detect_lane_changes(vehicles_data, output_file):
    lane_changes = []

    for vehicle_id, data in vehicles_data.items():
        lane_id = data['lane_id']
        timestamps = data['timestamp']
        preceding = data['preceding']
        following = data['following']
        v_length = data['v_length']

        for i in range(1, len(lane_id)):
            if lane_id[i] != lane_id[i - 1]:
                lane_changes.append({
                    'vehicle_id': vehicle_id,
                    'v_length': v_length,
                    'before_lane_id': lane_id[i-1],
                    'after_lane_id': lane_id[i],
                    'before_change_timestamp': timestamps[i - 1],
                    'after_change_timestamp': timestamps[i],
                    'before_preceding_id': preceding[i-1],
                    'after_preceding_id': preceding[i],
                    'after_following_id': following[i]
                })

    with open(output_file, 'w') as f:
        for change in lane_changes:
            json.dump(change, f)
            f.write('\n')

output_file = 'trajectories-0400-0415-lane-change-for-IDM.json'
detect_lane_changes(vehicles_data, output_file)

print('Record complete!')

Record complete!


# Complete the IDM-model file

In [11]:
def create_idm_modeling_file(vehicles_data, lane_changes_file, idm_output_file, delta_t=40):
    idm_data = []

    with open(lane_changes_file, 'r') as f:
        for line in f:
            change = json.loads(line.strip())
            vehicle_id = change['vehicle_id']
            v_length = change['v_length']
            before_change = change['before_change_timestamp']
            after_change = change['after_change_timestamp']
            
            vehicle_data = vehicles_data[vehicle_id]
            timestamps = vehicle_data['timestamp']
            local_y = vehicle_data['local_y']
            
            start_time = before_change - delta_t / 2
            end_time = after_change + delta_t / 2

            # 检查目标车辆的时间窗口内数据是否完整
            if start_time not in timestamps or end_time not in timestamps:
                continue

            before_preceding_id = change['before_preceding_id']
            after_preceding_id = change['after_preceding_id']
            after_following_id = change['after_following_id']

            # 检查变道前前车的时间窗口内数据是否完整
            if before_preceding_id and before_preceding_id in vehicles_data:
                preceding_data = vehicles_data[before_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                if start_time not in preceding_timestamps or before_change not in preceding_timestamps:
                    continue

            # 检查变道后前车的时间窗口内数据是否完整
            if after_preceding_id and after_preceding_id in vehicles_data:
                preceding_data = vehicles_data[after_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                if after_change not in preceding_timestamps or end_time not in preceding_timestamps:
                    continue

            # 检查变道后后车的时间窗口内数据是否完整
            if after_following_id and after_following_id in vehicles_data:
                following_data = vehicles_data[after_following_id]
                following_timestamps = following_data['timestamp']
                if after_change not in following_timestamps or end_time not in following_timestamps:
                    continue

            idm_entry = {
                'vehicle_id': vehicle_id,
                'v_length': v_length,
                'before_lane_id': change['before_lane_id'],
                'after_lane_id': change['after_lane_id'],
                'before_change_timestamp': before_change,
                'after_change_timestamp': after_change,
                'before_preceding_id': before_preceding_id,
                'after_preceding_id': after_preceding_id,
                'after_following_id': after_following_id,
                'vehicle_initial_velocity': None,
                'vehicle_initial_position': None,
                'vehicle_post_change_velocity': None,
                'vehicle_post_change_position': None,
                'timestamps': [],  # 新增时间帧
                'before_preceding_positions': [],
                'before_preceding_velocities': [],
                'after_preceding_positions': [],
                'after_preceding_velocities': []
            }

            # 计算初速度和初位置，并记录时间帧
            for i, ts in enumerate(timestamps):
                if start_time <= ts <= end_time:
                    idm_entry['timestamps'].append(ts)  # 记录时间帧
                    if start_time <= ts <= before_change:
                        idm_entry['vehicle_initial_velocity'] = (local_y[i + 1] - local_y[i]) / (timestamps[i + 1] - ts) * 2.237  # m/s to mph
                        idm_entry['vehicle_initial_position'] = local_y[i]

                    if ts == after_change:
                        idm_entry['vehicle_post_change_velocity'] = (local_y[i + 1] - local_y[i]) / (timestamps[i + 1] - ts) * 2.237  # m/s to mph
                        idm_entry['vehicle_post_change_position'] = local_y[i]

            if before_preceding_id and before_preceding_id in vehicles_data:
                preceding_data = vehicles_data[before_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                preceding_local_y = preceding_data['local_y']
                for i, ts in enumerate(preceding_timestamps):
                    if start_time <= ts <= before_change:
                        idm_entry['before_preceding_positions'].append(preceding_local_y[i])
                        velocity = (preceding_local_y[i + 1] - preceding_local_y[i]) / (preceding_timestamps[i + 1] - ts) * 2.237  # m/s to mph
                        idm_entry['before_preceding_velocities'].append(velocity)
                # 补齐速度
                while len(idm_entry['before_preceding_velocities']) < len(idm_entry['before_preceding_positions']):
                    idm_entry['before_preceding_velocities'].append(idm_entry['before_preceding_velocities'][-1])

            if after_preceding_id and after_preceding_id in vehicles_data:
                preceding_data = vehicles_data[after_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                preceding_local_y = preceding_data['local_y']
                for i, ts in enumerate(preceding_timestamps):
                    if after_change <= ts <= end_time:
                        idm_entry['after_preceding_positions'].append(preceding_local_y[i])
                        velocity = (preceding_local_y[i + 1] - preceding_local_y[i]) / (preceding_timestamps[i + 1] - ts) * 2.237  # m/s to mph
                        idm_entry['after_preceding_velocities'].append(velocity)
                # 补齐速度
                while len(idm_entry['after_preceding_velocities']) < len(idm_entry['after_preceding_positions']):
                    idm_entry['after_preceding_velocities'].append(idm_entry['after_preceding_velocities'][-1])

            idm_data.append(idm_entry)

            if len(idm_data) % 400 == 0:
                print(f"Processed {len(idm_data)} IDM entries")

    with open(idm_output_file, 'w') as f:
        for entry in idm_data:
            json.dump(entry, f)
            f.write('\n')

    print(f"Total IDM entries: {len(idm_data)}")

create_idm_modeling_file(vehicles_data, 'trajectories-0400-0415-lane-change-for-IDM.json', 'trajectories-0400-0415-idm_modeling_data.json', delta_t=40)

print('IDM modeling data complete!')

Total IDM entries: 313
IDM modeling data complete!


# Complete the empirical file

In [12]:
def create_real_values_file(vehicles_data, lane_changes_file, real_output_file, delta_t=40):
    real_data = []

    with open(lane_changes_file, 'r') as f:
        for line in f:
            change = json.loads(line.strip())
            vehicle_id = change['vehicle_id']
            v_length = change['v_length']
            before_change = change['before_change_timestamp']
            after_change = change['after_change_timestamp']
            
            vehicle_data = vehicles_data[vehicle_id]
            timestamps = vehicle_data['timestamp']
            local_y = vehicle_data['local_y']
            
            start_time = before_change - delta_t / 2
            end_time = after_change + delta_t / 2

            # 检查目标车辆的时间窗口内数据是否完整
            if start_time not in timestamps or end_time not in timestamps:
                continue

            before_preceding_id = change['before_preceding_id']
            after_preceding_id = change['after_preceding_id']
            after_following_id = change['after_following_id']

            # 检查变道前前车的时间窗口内数据是否完整
            if before_preceding_id and before_preceding_id in vehicles_data:
                preceding_data = vehicles_data[before_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                if start_time not in preceding_timestamps or before_change not in preceding_timestamps:
                    continue

            # 检查变道后前车的时间窗口内数据是否完整
            if after_preceding_id and after_preceding_id in vehicles_data:
                preceding_data = vehicles_data[after_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                if after_change not in preceding_timestamps or end_time not in preceding_timestamps:
                    continue

            # 检查变道后后车的时间窗口内数据是否完整
            if after_following_id and after_following_id in vehicles_data:
                following_data = vehicles_data[after_following_id]
                following_timestamps = following_data['timestamp']
                if after_change not in following_timestamps or end_time not in following_timestamps:
                    continue

            real_entry = {
                'vehicle_id': vehicle_id,
                'v_length': v_length,
                'before_lane_id': change['before_lane_id'],
                'after_lane_id': change['after_lane_id'],
                'before_change_timestamp': before_change,
                'after_change_timestamp': after_change,
                'before_preceding_id': before_preceding_id,
                'after_preceding_id': after_preceding_id,
                'after_following_id': after_following_id,
                'timestamps': [],  # 新增时间帧
                'vehicle_speeds': [],
                'vehicle_accelerations': [],
                'gap_before_change': [],
                'gap_after_change': [],
                'before_preceding_positions': [],
                'before_preceding_velocities': [],
                'after_preceding_positions': [],
                'after_preceding_velocities': []
            }

            # 计算速度和加速度，以及间距(gap)，并记录时间帧
            for i, ts in enumerate(timestamps):
                if start_time <= ts <= end_time:
                    real_entry['timestamps'].append(ts)  # 记录时间帧
                    if i < len(timestamps) - 1:
                        speed = (local_y[i + 1] - local_y[i]) / (timestamps[i + 1] - ts) * 2.237  # m/s to mph
                        real_entry['vehicle_speeds'].append(speed)
                    else:
                        real_entry['vehicle_speeds'].append(real_entry['vehicle_speeds'][-1])  # 补齐速度

                    if i < len(timestamps) - 2:
                        acceleration = ((local_y[i + 2] - local_y[i + 1]) / (timestamps[i + 2] - timestamps[i + 1]) - (local_y[i + 1] - local_y[i]) / (timestamps[i + 1] - ts)) / (timestamps[i + 2] - ts) * 2.237
                        real_entry['vehicle_accelerations'].append(acceleration)
                    else:
                        real_entry['vehicle_accelerations'].append(real_entry['vehicle_accelerations'][-1])  # 补齐加速度

                    preceding_id = change['before_preceding_id'] if ts <= before_change else change['after_preceding_id']
                    if preceding_id and preceding_id in vehicles_data:
                        preceding_data = vehicles_data[preceding_id]
                        preceding_timestamps = preceding_data['timestamp']
                        if ts in preceding_timestamps:
                            for j, tss in enumerate(preceding_timestamps):
                                if ts == tss:
                                    gap = preceding_data['local_y'][j] - local_y[i]
                                    if ts <= before_change:
                                        real_entry['gap_before_change'].append(gap)
                                    else:
                                        real_entry['gap_after_change'].append(gap)
                    else:
                        if ts <= before_change:
                            real_entry['gap_before_change'].append(0)
                        else:
                            real_entry['gap_after_change'].append(0)

            if before_preceding_id and before_preceding_id in vehicles_data:
                preceding_data = vehicles_data[before_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                preceding_local_y = preceding_data['local_y']
                for i, ts in enumerate(preceding_timestamps):
                    if start_time <= ts <= before_change:
                        real_entry['before_preceding_positions'].append(preceding_local_y[i])
                        velocity = (preceding_local_y[i + 1] - preceding_local_y[i]) / (preceding_timestamps[i + 1] - ts) * 2.237  # m/s to mph
                        real_entry['before_preceding_velocities'].append(velocity)
                # 补齐速度
                while len(real_entry['before_preceding_velocities']) < len(real_entry['before_preceding_positions']):
                    real_entry['before_preceding_velocities'].append(real_entry['before_preceding_velocities'][-1])

            if after_preceding_id and after_preceding_id in vehicles_data:
                preceding_data = vehicles_data[after_preceding_id]
                preceding_timestamps = preceding_data['timestamp']
                preceding_local_y = preceding_data['local_y']
                for i, ts in enumerate(preceding_timestamps):
                    if after_change <= ts <= end_time:
                        real_entry['after_preceding_positions'].append(preceding_local_y[i])
                        velocity = (preceding_local_y[i + 1] - preceding_local_y[i]) / (preceding_timestamps[i + 1] - ts) * 2.237  # m/s to mph
                        real_entry['after_preceding_velocities'].append(velocity)
                # 补齐速度
                while len(real_entry['after_preceding_velocities']) < len(real_entry['after_preceding_positions']):
                    real_entry['after_preceding_velocities'].append(real_entry['after_preceding_velocities'][-1])

            real_data.append(real_entry)

            if len(real_data) % 400 == 0:
                print(f"Processed {len(real_data)} real value entries")

    with open(real_output_file, 'w') as f:
        for entry in real_data:
            json.dump(entry, f)
            f.write('\n')

    print(f"Total real value entries: {len(real_data)}")

create_real_values_file(vehicles_data, 'trajectories-0400-0415-lane-change-for-IDM.json', 'trajectories-0400-0415-real_values_data.json', delta_t=40)

print('Real values data complete!')

Total real value entries: 313
Real values data complete!
