In [1]:
import os
import pandas
import numpy as np
import HighD_Columns as HC 
import NGSIM_Columns as NC 


In [10]:
def transform_track_features(ngsim_data, NC_dict, logging = False):
    """
    This method iterate on all rows of ngsim dataset sorted by vehicle ID and:
        1. Correct vehicle ID duplication problem.
        2. TODO:Re-calc X-/Y- Velocity/Acceleration using X and Y (all using NGSIM coordinate) after applying a filtering algorithm. 
    """
    current_veh_id = ngsim_data[0, NC_dict[NC.ID]] 
    correct_veh_id = ngsim_data[0,NC_dict[NC.ID]] 
    cur_t = ngsim_data[0,NC_dict[NC.GLOBAL_TIME]]
    empty_veh_id = max(list(set(ngsim_data[:,NC_dict[NC.ID]])))+1
    num_rows = ngsim_data.shape[0]
    #augmented_features = np.zeros((num_rows, 4))
    fr = 0
    for row_itr in range(num_rows):
        if current_veh_id != ngsim_data[row_itr, NC_dict[NC.ID]]:
            current_veh_id = ngsim_data[row_itr, NC_dict[NC.ID]]
            correct_veh_id = ngsim_data[row_itr, NC_dict[NC.ID]]
            cur_t = ngsim_data[row_itr, NC_dict[NC.GLOBAL_TIME]] + 100
            fr = 0 + 1
            continue
        if cur_t != ngsim_data[row_itr, NC_dict[NC.GLOBAL_TIME]]:
            correct_veh_id = empty_veh_id
            if logging:
                print("Duplicatation found in id: ", current_veh_id, ". The id is changed to: ", correct_veh_id)
            empty_veh_id += 1
            ngsim_data[row_itr, NC_dict[NC.ID]] = correct_veh_id
            cur_t = ngsim_data[row_itr, NC_dict[NC.GLOBAL_TIME]] + 100
            fr = 0 + 1
            continue
        
        ngsim_data[row_itr, NC_dict[NC.ID]] = correct_veh_id
        
        cur_t += 100
        fr += 1
    return ngsim_data

In [21]:
def transform_frame_features(ngsim_data, NC_dict, logging = True):
    """
    1. Extract vehicle IDs of surrounding vehicles.
    2. Divide Global Time by 100
    3. transform from feet to meter. 
    4. Reverse the order of Lane IDs
    """
    SVC_dict = {
        HC.PRECEDING_ID:0,
        HC.FOLLOWING_ID:1,
        HC.LEFT_PRECEDING_ID:2,
        HC.LEFT_ALONGSIDE_ID:3,
        HC.LEFT_FOLLOWING_ID:4,
        HC.RIGHT_PRECEDING_ID:5,
        HC.RIGHT_ALONGSIDE_ID:6,
        HC.RIGHT_FOLLOWING_ID:7
    }

    sorted_ind = np.argsort(ngsim_data[:,NC_dict[NC.GLOBAL_TIME]])
    ngsim_data = ngsim_data[sorted_ind]
    augmented_features = np.zeros((ngsim_data.shape[0], 8))
    all_times = sorted(list(set(ngsim_data[:,NC_dict[NC.GLOBAL_TIME]])))
    max_itr = len(all_times)
    for itr, g_time in enumerate(all_times):
        if logging and itr%100 == 0:
            print('Processing: ', itr, 'out_of: ', max_itr)
        selected_ind = ngsim_data[:,NC_dict[NC.GLOBAL_TIME]] == g_time
        cur_data = ngsim_data[selected_ind]
        cur_aug_features = augmented_features[selected_ind]
        num_rows = cur_data.shape[0]
        for rows_itr in range(num_rows):
            cur_lane = cur_data[rows_itr, NC_dict[NC.LANE_ID]]
            cur_y = cur_data[rows_itr, NC_dict[NC.Y]]
            cur_length = cur_data[rows_itr, NC_dict[NC.LENGTH]]
            cur_lane_sv_ind = (cur_data[:,NC_dict[NC.LANE_ID]] == cur_lane)
            left_lane_sv_ind = (cur_data[:,NC_dict[NC.LANE_ID]] == (cur_lane-1))
            right_lane_sv_ind = (cur_data[:,NC_dict[NC.LANE_ID]] == (cur_lane+1))
            preceding_sv_ind = (cur_data[:,NC_dict[NC.Y]]- cur_data[:,NC_dict[NC.LENGTH]] > cur_y)
            following_sv_ind = (cur_data[:,NC_dict[NC.Y]] < cur_y-cur_length)
            alongside_sv_ind = np.logical_and((cur_data[:,NC_dict[NC.Y]] >= (cur_y-cur_length)), ((cur_data[:,NC_dict[NC.Y]]-cur_data[:,NC_dict[NC.LENGTH]]) <= cur_y))

            #pv_id
            cur_aug_features[rows_itr,SVC_dict[HC.PRECEDING_ID]] = cur_data[np.argmin(cur_data[np.logical_and(preceding_sv_ind, cur_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(preceding_sv_ind, cur_lane_sv_ind)) == True else 0
            #fv_id
            cur_aug_features[rows_itr,SVC_dict[HC.FOLLOWING_ID]] = cur_data[np.argmax(cur_data[np.logical_and(following_sv_ind, cur_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(following_sv_ind, cur_lane_sv_ind)) == True else 0

            #rpv_id
            cur_aug_features[rows_itr,SVC_dict[HC.RIGHT_PRECEDING_ID]] = cur_data[np.argmin(cur_data[np.logical_and(preceding_sv_ind, right_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(preceding_sv_ind, right_lane_sv_ind)) == True else 0
            #rfv_id
            cur_aug_features[rows_itr,SVC_dict[HC.RIGHT_FOLLOWING_ID]] = cur_data[np.argmax(cur_data[np.logical_and(following_sv_ind, right_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(following_sv_ind, right_lane_sv_ind)) == True else 0

            #lpv_id
            cur_aug_features[rows_itr,SVC_dict[HC.LEFT_PRECEDING_ID]] = cur_data[np.argmin(cur_data[np.logical_and(preceding_sv_ind, left_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(preceding_sv_ind, left_lane_sv_ind)) == True else 0
            #lfv_id
            cur_aug_features[rows_itr,SVC_dict[HC.LEFT_FOLLOWING_ID]] = cur_data[np.argmax(cur_data[np.logical_and(following_sv_ind, left_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(following_sv_ind, left_lane_sv_ind)) == True else 0
            
            #rav_id
            cur_aug_features[rows_itr,SVC_dict[HC.RIGHT_ALONGSIDE_ID]] = cur_data[np.argmax(cur_data[np.logical_and(alongside_sv_ind, right_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(alongside_sv_ind, right_lane_sv_ind)) == True else 0
            #lav_id
            cur_aug_features[rows_itr,SVC_dict[HC.LEFT_ALONGSIDE_ID]] = cur_data[np.argmax(cur_data[np.logical_and(alongside_sv_ind, left_lane_sv_ind),NC_dict[NC.Y]]),0] if np.any(np.logical_and(alongside_sv_ind, left_lane_sv_ind)) == True else 0

        augmented_features[selected_ind] = cur_aug_features

    ngsim_data = np.concatenate((ngsim_data, augmented_features), axis = 1)
    
    ngsim_data[:,NC_dict[NC.GLOBAL_TIME]] = ngsim_data[:,NC_dict[NC.GLOBAL_TIME]]/100
    
    ngsim_data[:,NC_dict[NC.X]] = 0.3048 * ngsim_data[:,NC_dict[NC.X]]
    ngsim_data[:,NC_dict[NC.Y]] = 0.3048 * ngsim_data[:,NC_dict[NC.Y]]
    ngsim_data[:,NC_dict[NC.LENGTH]] = 0.3048 * ngsim_data[:,NC_dict[NC.LENGTH]]
    ngsim_data[:,NC_dict[NC.WIDTH]] = 0.3048 * ngsim_data[:,NC_dict[NC.WIDTH]]
    ngsim_data[:,NC_dict[NC.VELOCITY]] = 0.3048 * ngsim_data[:,NC_dict[NC.VELOCITY]]
    ngsim_data[:,NC_dict[NC.ACCELERATION]] = 0.3048 * ngsim_data[:,NC_dict[NC.ACCELERATION]]
    ngsim_data[:,NC_dict[NC.DHW]] = 0.3048 * ngsim_data[:,NC_dict[NC.DHW]]
    
    ngsim_data[:,NC_dict[NC.LANE_ID]] = max(ngsim_data[:,NC_dict[NC.LANE_ID]])+1- ngsim_data[:,NC_dict[NC.LANE_ID]]
    return ngsim_data, SVC_dict

In [4]:
ngsim_dataset = "./ngsim/Next_Generation_Simulation__NGSIM__Vehicle_Trajectories_and_Supporting_Data.csv"
# Load and seperate by location
df = pandas.read_csv(ngsim_dataset)
ngsim = []
locations = ['us-101', 'i-80']
for location in locations:
    ngsim.append(df[df[NC.LOCATION]==location])

for i, location in enumerate(locations):
    
    # Drop undesired columns
    ngsim[i] = ngsim[i].drop(
                    columns = [
                        NC.O_ZONE, 
                        NC.D_ZONE, 
                        NC.INT_ID, 
                        NC.SECTION_ID, 
                        NC.DIRECTION, 
                        NC.MOVEMENT, 
                        NC.GLOBAL_X, 
                        NC.GLOBAL_Y, 
                        NC.FRAME,
                        NC.LOCATION, 
                        NC.TOTAL_FRAME,
                        NC.PRECEDING_ID,
                        NC.FOLLOWING_ID,
                        ])
    
    # NGSIM has some duplicate rows need to be dropped
    ngsim[i] = ngsim[i].drop_duplicates()

    

In [22]:
for i, location in enumerate(locations):
    # To numpy
    ngsim[i] = ngsim[i].sort_values([NC.ID, NC.GLOBAL_TIME], ascending=[1,1])
    ngsim_columns = ngsim[i].columns
    ngsim_array = ngsim[i].to_numpy()
    NC_dict = {}
    for i,c in enumerate(ngsim_columns):
        NC_dict[c] = i
    assert(len(ngsim_columns)==12)
    
    # Transformations
    #ngsim_array = transform_track_features(ngsim_array, NC_dict)
    ngsim_array, SVC_dict = transform_frame_features(ngsim_array, NC_dict)
    
    highD_columns = [None]* (len(ngsim_columns) + len(SVC_dict))
    # Untransformed Columns
    highD_columns[NC_dict[NC.CLASS]] = NC.CLASS
    highD_columns[NC_dict[NC.VELOCITY]] = NC.VELOCITY # Note: Velocity is changed from feet/s to m/s
    highD_columns[NC_dict[NC.ACCELERATION]] = NC.ACCELERATION # Note: Acceleration is changed from feet/s^2 to m/s^2 
    
    # Transformed Columns
    highD_columns[NC_dict[NC.ID]] = HC.TRACK_ID
    highD_columns[NC_dict[NC.GLOBAL_TIME]] = HC.FRAME
    highD_columns[NC_dict[NC.X]] = HC.Y # NC.X = HC.Y
    highD_columns[NC_dict[NC.Y]] = HC.X # NC.Y = HC.X
    highD_columns[NC_dict[NC.LENGTH]] = HC.WIDTH # NC.LENGTH = HC.WIDTH
    highD_columns[NC_dict[NC.WIDTH]] = HC.HEIGHT # NC.WIDTH = HC.HEIGHT
    highD_columns[NC_dict[NC.DHW]] = HC.DHW
    highD_columns[NC_dict[NC.THW]] = HC.THW
    highD_columns[NC_dict[NC.LANE_ID]] = HC.LANE_ID
    
    # Added Columns
    highD_columns[SVC_dict[HC.PRECEDING_ID]] = HC.PRECEDING_ID
    highD_columns[SVC_dict[HC.FOLLOWING_ID]] = HC.FOLLOWING_ID
    highD_columns[SVC_dict[HC.LEFT_PRECEDING_ID]] = HC.LEFT_PRECEDING_ID
    highD_columns[SVC_dict[HC.LEFT_ALONGSIDE_ID]] = HC.LEFT_ALONGSIDE_ID
    highD_columns[SVC_dict[HC.LEFT_FOLLOWING_ID]] = HC.LEFT_FOLLOWING_ID
    highD_columns[SVC_dict[HC.RIGHT_PRECEDING_ID]] = HC.RIGHT_PRECEDING_ID
    highD_columns[SVC_dict[HC.RIGHT_ALONGSIDE_ID]] = HC.RIGHT_ALONGSIDE_ID
    highD_columns[SVC_dict[HC.RIGHT_FOLLOWING_ID]] = HC.RIGHT_FOLLOWING_ID
    
    # To dataframe
    transformed_ngsim = pandas.DataFrame(data = ngsim_array, columns = highD_columns)
    transformed_ngsim = transformed_ngsim.sort_values([HC.ID, HC.FRANE], ascending=[1,1])
    tranfromed_ngsim.to_csv(location+'.csv', index=False)


Processing:  0 out_of:  27726
Processing:  100 out_of:  27726
Processing:  200 out_of:  27726
Processing:  300 out_of:  27726
Processing:  400 out_of:  27726
Processing:  500 out_of:  27726
Processing:  600 out_of:  27726
Processing:  700 out_of:  27726
Processing:  800 out_of:  27726
Processing:  900 out_of:  27726
Processing:  1000 out_of:  27726
Processing:  1100 out_of:  27726
Processing:  1200 out_of:  27726
Processing:  1300 out_of:  27726
Processing:  1400 out_of:  27726
Processing:  1500 out_of:  27726
Processing:  1600 out_of:  27726
Processing:  1700 out_of:  27726
Processing:  1800 out_of:  27726
Processing:  1900 out_of:  27726
Processing:  2000 out_of:  27726
Processing:  2100 out_of:  27726
Processing:  2200 out_of:  27726
Processing:  2300 out_of:  27726
Processing:  2400 out_of:  27726
Processing:  2500 out_of:  27726
Processing:  2600 out_of:  27726
Processing:  2700 out_of:  27726
Processing:  2800 out_of:  27726
Processing:  2900 out_of:  27726
Processing:  3000 out_

KeyError: 'v_VEL'