In [98]:
# the purpose of this script is to deserialize all of the contents of the player folders given in the same directory.
# we will first need to find the parts of the csv files that are serialized, and then reverse the process in which they 
# were serialized in order to fix it.

In [99]:
# import everything we need
import os
import base64
import struct
import pandas as pd

In [100]:
def decode_position_log(base64_string):
    # This function decodes the Base64 string to a byte array
    # you pass in the base 64 string that you want to deserialize

    # decodes base 64 string into byte array
    byte_array = base64.b64decode(base64_string)
    
    # Unpack the byte array into floats
    x, y, z = struct.unpack('fff', byte_array)
    
    return x, y, z

# Example usage
example_pos = 'HQ0Lwphn1j9MWVhC'
position = decode_position_log(example_pos)
print(f"Position: x={position[0]}, y={position[1]}, z={position[2]}")

Position: x=-34.7628059387207, y=1.6750364303588867, z=54.08720397949219


In [101]:
def deserialize_csv(path):
    # This function reads the csv, and then creates a new one with the same contents but while deserializing all of the data for future
    # use in graphing.
    
    df = pd.read_csv(path, delimiter=';') # create a dataframe out of the csv's contents
    #print(df.head()) # check if we read it correctly

    columns_to_skip = ['A accel', 'A steering', 'A velocity', 'A indicators', 'A Horn Button'] # These are not serialized

    for column in df.columns[5:]: # loop through each key. Starts at the 6th key since the previous 5 are not base 64
        if column in columns_to_skip: # if the column isn't serialized, skip it
            continue
        #print(column)
        df[column] = df[column].apply(decode_position_log) # replaces the serialized value with the deserialized one
        #print(df[column])
    
    # write the new df to a new csv
    if 'deserialized' in path:
         serialized_csv = path
    else:
        serialized_csv = path[:-4] + '_deserialized.csv' # slice off the '.csv' from the last file and add _deserialized
        
    df.to_csv(serialized_csv, index=False) # create a new csv with the deserialized data in the pandas dataframe
    
#deserialize_csv('CSV_Scenario-Ped-101_Session-temp_2024-02-22-13-58-23.csv') # test on one file

In [102]:
def deserialize_all(path):
    # this deserializes all of the data. Goes throughout the folders to each csv file in the study
    path += 'p'

    for i in range(1, 33):
        new_path = path
        
        new_path += str(i) + '/csv/' # we go through p1-p32
        
        # Get the list of files and directories in the specified directory
        files_and_dirs = os.listdir(new_path)
    
        # Get an array of all the files in a directory (csv files)
        files = [f for f in files_and_dirs if os.path.isfile(os.path.join(new_path, f))]

        print(f'deserializing {(path + str(i))[14:]}...', end = ' ')
        for csv_file in files:
            deserialize_csv(new_path + csv_file)            
        print('done.')

path = 'xcped_decoded/'
deserialize_all(path)

deserializing p1... done.
deserializing p2... done.
deserializing p3... done.
deserializing p4... done.
deserializing p5... 

Error: Incorrect padding