In [None]:
import os
import ijson
import pandas as pd

def get_traj_stats(input_filename):
    '''
    This function reads the input file and calculates the travel distance, travel time and travel speed for each trajectory from INCEPTION dataset.
    
    :param input_filename: the input file name
    :type input_filename: str
    :return: the dataframe containing the trajectory statistics with columns traj_id, travel_distance, travel_time, travel_speed
    :rtype: pandas.DataFrame
    
    example: get_traj_stats('/.../I24M_INCEPTION_v1/11-22-2022/637c399add50d54aa5af0cf4__post2.json')
    '''
    # to create a dataframe to store the trajectory statistics
    # traj_stats = pd.DataFrame()
    traj_stats_list = []
    # to read the input file and parse the json objects
    with open(input_filename, 'r') as input_file:
        parser = ijson.items(input_file, 'item')
        for doc in parser:
            # to filter out the trajectories with direction = -1 (Westbound) and length > 0
            if ((int(doc['direction']) == -1) & (int(doc['length']) > 0)):
                # to calculate the travel distance, travel time and travel speed
                travel_distance = - float(doc['ending_x']) + float(doc['starting_x'])
                travel_time = float(doc['last_timestamp']) - float(doc['first_timestamp'])
                travel_speed = travel_distance / travel_time
                traj_stats_single = pd.DataFrame({'traj_id': doc['_id']['$oid'],
                                                  'travel_distance': travel_distance, 
                                                  'travel_time': travel_time, 
                                                  'travel_speed': travel_speed}, index=[0])
                traj_stats_list.append(traj_stats_single)
        # to concatenate the list of dataframes into a single dataframe
        traj_stats = pd.concat(traj_stats_list, ignore_index=True)
        return traj_stats

In [None]:
inception_root = '' # the root directory of the INCEPTION dataset
date_dir = '' # the date directory, like '11-22-2022'
file_name =  '' # the file name, like '637c399add50d54aa5af0cf4__post2.json' for ''11-22-2022'
input_filename = os.path.join(inception_root, date_dir, file_name)

In [None]:
traj_stats_table = get_traj_stats(input_filename)
# users can save the pd.DataFrame to a csv file as needed
traj_stats_table.to_csv('', index=False)