In [1]:
# the purpose of this script is to deserialize all of the contents of the player folders given in the same directory.
# we will first need to find the parts of the csv files that are serialized, and then reverse the process in which they 
# were serialized in order to fix it.

In [2]:
# import everything we need
import os # for file manipulation and creation
import base64 # for deserializing
import struct # for deserializing
import pandas as pd

In [12]:
def decode_position_log(base64_string):
    # This function decodes the Base64 string to a byte array
    # you pass in the base 64 string that you want to deserialize

    # ignore NaN values that would throw an error
    if pd.isna(base64_string):
        return None
        
    # decodes base 64 string into byte array
    byte_array = base64.b64decode(base64_string)
    
    # Unpack the byte array into floats
    x, y, z = struct.unpack('fff', byte_array)

    # Round each coordinate to 4 decimal places
    x = round(x, 4)
    y = round(y, 4)
    z = round(z, 4)
    
    return x, y, z

# Example usage for testing
'''
example_pos = ''
position = decode_position_log(example_pos)
print(f"Position: x={position[0]}, y={position[1]}, z={position[2]}")
'''

'\nexample_pos = \'\'\nposition = decode_position_log(example_pos)\nprint(f"Position: x={position[0]}, y={position[1]}, z={position[2]}")\n'

In [18]:
def deserialize_csv(path):
    # This function reads the csv, and then creates a new one with the same contents but while deserializing all of the data for future
    # use in graphing.
    
    df = pd.read_csv(path, delimiter=';') # create a dataframe out of the csv's contents
    
    #print(df.head()) # check if we read it correctly

    columns_to_skip = ['A accel', 'A steering', 'A velocity', 'A indicators', 'A Horn Button'] # These are not serialized

    for column in df.columns[5:]: # loop through each key. Starts at the 6th key since the previous 5 are not base 64
        if column in columns_to_skip: # if the column isn't serialized, skip it
            continue
            
        #print(column) # for testing

        # replaces an entire column with the deserialized equivalents. Throws an error if unsuccessful
        try:
            df[column] = df[column].apply(decode_position_log)
        except Exception as e:
            print(f'ERROR: value in {column} column in file {path[21:]}')
            return
            
        #print(df[column]) # for testing

    # slice off the '.csv' from the last file and add _deserialized
    deserialized_csv = path[:-4] + '_deserialized.csv'

    # write the deserialized df to a new csv (or overwrite an already existing deserialized csv) 
    df.to_csv(deserialized_csv, index=False)

#deserialize_csv('xcped_decoded/p6/csv/test.csv') # test on one file

In [35]:
def deserialize_all(path):
    # this deserializes all of the data. Goes throughout the folders to each csv file in the study
    path += 'p' # p1, p2, etc.

    # there are p = 32 pairs in the study. Iterate through each
    for i in range(1, 33):

        if i==14: # p14 was a failed study, so ignore it
            continue
            
        new_path = path + str(i) + '/csv/' # navigate through more files

        # Get the list of files and folders in the specified directory
        files_and_dirs = os.listdir(new_path)
    
        # Get an array of all the files in a directory (csv files). Do not include already deserialized files
        files = [f for f in files_and_dirs if os.path.isfile(os.path.join(new_path, f)) and 'deserialized' not in f]

        # prints that we are deserializing a file
        print(f'deserializing {(path + str(i))[14:]}...', end = ' ')

        # deserialize each file
        for csv_file in files:
            deserialize_csv(new_path + csv_file)            
        print('done.')
            

path = 'xcped_decoded/'
deserialize_all(path)

deserializing p1... done.
deserializing p2... done.
deserializing p3... done.
deserializing p4... done.
deserializing p5... done.
deserializing p6... done.
deserializing p7... done.
deserializing p8... done.
deserializing p9... done.
deserializing p10... done.
deserializing p11... done.
deserializing p12... done.
deserializing p13... done.
deserializing p15... done.
deserializing p16... done.
deserializing p17... done.
deserializing p18... done.
deserializing p19... done.
deserializing p20... done.
deserializing p21... done.
deserializing p22... done.
deserializing p23... done.
deserializing p24... done.
deserializing p25... done.
deserializing p26... done.
deserializing p27... done.
deserializing p28... done.
deserializing p29... done.
deserializing p30... done.
deserializing p31... done.
deserializing p32... done.


In [34]:
def remove_NaN_Values(file_path):
    # this removes any NaN values in files with them. Only run this on a file that needs it once or else it will
    # mess with the data. This function is not needed anymore
    
    # Load the CSV file into a dataframe
    data = pd.read_csv(file_path, delimiter=';')
    
    # Calculate and print the number of NaN values per column before cleaning
    initial_nan_counts = data.isna().sum()
    print("Number of NaN values per column before cleaning:")
    print(initial_nan_counts)
    
    # Remove rows with NaN values
    cleaned_data = data.dropna(how='any')
    
    # Calculate and print the number of NaN values per column after cleaning
    final_nan_counts = cleaned_data.isna().sum()
    print("\nNumber of NaN values per column after cleaning:")
    print(final_nan_counts)
    
    # Print the reduction in dataset size
    print("\nOriginal number of rows:", len(data), " | Number of rows after cleaning:", len(cleaned_data))
    
    # Optionally, save the cleaned DataFrame back to a CSV file or return it
    cleaned_data.to_csv(file_path, index=False)
    return cleaned_data

# Specify the path to your CSV file
'''
file_path = 'xcped_decoded/p5/csv/CSV_Scenario-Ped-105_Session-temp_2024-02-27-11-34-12.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p8/csv/CSV_Scenario-Ped-21_Session-temp_2024-03-04-11-03-59.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p10/csv/CSV_Scenario-Ped-12_Session-temp_2024-03-06-11-26-46.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p10/csv/CSV_Scenario-Ped-21_Session-temp_2024-03-06-11-46-57.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p16/csv/CSV_Scenario-Ped-7_Session-temp_2024-03-14-17-04-32.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p17/csv/CSV_Scenario-Ped-12_Session-temp_2024-03-18-15-15-07.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p23/csv/CSV_Scenario-Ped-105_Session-temp_2024-03-28-13-41-28.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p24/csv/CSV_Scenario-Ped-7_Session-temp_2024-03-29-15-16-13.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p28/csv/CSV_Scenario-Ped-103_Session-temp_2024-04-10-14-40-11.csv'
check_nan_values_in_csv(file_path)
file_path = 'xcped_decoded/p32/csv/CSV_Scenario-Ped-101_Session-temp_2024-05-01-14-52-13.csv'
check_nan_values_in_csv(file_path)
'''

Number of NaN values per column before cleaning:
]GameTime                           559
ScenarioTime                        559
FrameRate                           559
FrameRate-XRDevice                  559
Frame Number                        559
                                   ... 
A Hand Bone Rot ThumbDistal Left    559
A Hand Bone Pos ThumbTip Left       559
A Hand Bone Rot ThumbTip Left       559
A Hand Bone Pos Palm Left           559
A Hand Bone Rot Palm Left           559
Length: 342, dtype: int64

Number of NaN values per column after cleaning:
]GameTime                           0
ScenarioTime                        0
FrameRate                           0
FrameRate-XRDevice                  0
Frame Number                        0
                                   ..
A Hand Bone Rot ThumbDistal Left    0
A Hand Bone Pos ThumbTip Left       0
A Hand Bone Rot ThumbTip Left       0
A Hand Bone Pos Palm Left           0
A Hand Bone Rot Palm Left           0
Length: 342, dtype

Unnamed: 0,]GameTime,ScenarioTime,FrameRate,FrameRate-XRDevice,Frame Number,A VR Pos,A VR Rot,A XR Origin Pos,A XR Origin Rot,B VR Pos,...,B Hand Bone Pos ThumbMetacarpal Left,B Hand Bone Rot ThumbMetacarpal Left,B Hand Bone Pos ThumbProximal Left,B Hand Bone Rot ThumbProximal Left,B Hand Bone Pos ThumbDistal Left,B Hand Bone Rot ThumbDistal Left,B Hand Bone Pos ThumbTip Left,B Hand Bone Rot ThumbTip Left,B Hand Bone Pos Palm Left,B Hand Bone Rot Palm Left
1,1392.087,52.047607,0.014829,0.0,116972.0,Zx0bQL4qmT/GUJ5C,ZVCwQ0ZFskNi1bBD,iAWmPjn0nT+zqp5C,7L9ROqNHsUM+Yaq4,RHwPQYR36D+7nERC,...,2tsMQd6Hij84CEFC,so2aQdkshUMwG6JD,b14MQU4niT83BkFC,XWIzQh3oakNiNJ5D,Jg4MQWIahj8b+EBC,Tz1kQuBmYUNh55VD,4OILQY+Jgz897kBC,Tz1kQuBmYUNh55VD,AyUNQe3diT9I6UBC,kdeuQZPuWkPiltlB
3,1557.328,0.000000,0.013889,0.0,128824.0,ATejwkojmD/pOjlC,opSwQ3VHZT//UbFD,hHKnwgP1nT+GszlC,XjyfOhQUsUMF+gG5,rrkJwupI7D9/vFdC,...,VtkIwmFPmj9yYldC,7vUrQY3SHkMQtatD,hM0IwpCImT/yQ1dC,QZk7QeJtFkOLrK9D,y7wIwn2nmD96JldC,p9CyQ94jIkM4PKtD,xbQIwnbvmD+vDldC,p9CyQ94jIkM4PKtD,ssQIwuR6mD+EfFdC,oYwBQoXh6EIb3yZC
5,1557.370,0.041870,0.016137,0.0,128826.0,/jajwrUZmD/OOjlC,89ewQ220jT/YR7FD,hHKnwtn0nT+FszlC,ykR7OgAUsUMmh864,EboJwt5H7D+Yu1dC,...,UdkIwj9Nmj/GZ1dC,hHMxQcwPHUPp+qtD,lcwIwiqAmT+wSVdC,gs05QVu+FEOk8a9D,/roIwjahmD+2LFdC,zbWyQ7FSIEOGe6tD,N7IIwgjsmD8yFVdC,zbWyQ7FSIEOGe6tD,nsUIwux0mD9+gldC,61QEQref5UIBMCdC
7,1557.412,0.083496,0.015040,0.0,128829.0,ljajwprvlz/POjlC,UBqxQ18NpT/KO7FD,hHKnwq70nT+KszlC,6616OhsUsUPHpMu4,TLoJwj9H7D8Pu1dC,...,UNkIwtBLmj/waldC,A9Q0QUAFHENBI6xD,C8wIwuJ6mT8cTVdC,Stg4Qfq9E0NNGbBD,8rkIwhCdmD9xMFdC,WauyQzg8H0N4oKtD,t7AIwvjomD8ZGVdC,WauyQzg8H0N4oKtD,LcYIwm1xmD8HhldC,uvoFQgew40KwVCdC
9,1557.453,0.125122,0.014479,0.0,128832.0,ATajwga0lz/fOjlC,X46xQ/l30D8qKrFD,hHKnwo70nT+PszlC,sUJ0OjIUsUNyzsa4,MboJwtlG7D8iuldC,...,NNkIwrxpmj+mb1dC,pJU5QVpWGkOdTqxD,FMsIwmKTmT9BUldC,PkU5QYgYEkPBQrBD,KrgIwhC1mD8eNldC,9KqyQ6Z/HUNlyatD,Oq4IwvMAmT8SH1dC,9KqyQ6Z/HUNlyatD,6MYIwn6LmD9Ai1dC,XOYHQm7Z4EJG5CdC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2309,1605.371,48.042969,0.013889,0.0,132282.0,1737wNLYlj/mAGtC,X/aiP7/LsUJSBLFD,pc70wDX0nT/EeHNC,95tsOisHq0LEEMG4,UhodwukO6T9Adm1C,...,wZgcwi70lz80wG5C,d8cGQg+KS0PjhrJD,0aMcwgCllT/Ppm5C,hupdQkcxQkPH/bND,n6gcwp0Ukj/Nk25C,zexdQupmTkP2NRc/,ua4cwpmPjz/whW5C,zexdQupmTkP2NRc/,ZH8cwkpElT/Yzm5C,ILpAQtH+HEOwT2RC
2311,1605.413,48.084595,0.013889,0.0,132285.0,rrz7wHbYlj/ZAGtC,7hmiP3jYsULkArFD,9870wDX0nT/DeHNC,XN1qOiIHq0JstcC4,ZBodwp0S6T94dW1C,...,wZgcwi70lz80wG5C,d8cGQg+KS0PjhrJD,0aMcwgCllT/Ppm5C,hupdQkcxQkPH/bND,n6gcwp0Ukj/Nk25C,zexdQupmTkP2NRc/,ua4cwpmPjz/whW5C,zexdQupmTkP2NRc/,ZH8cwkpElT/Yzm5C,ILpAQtH+HEOwT2RC
2313,1605.480,48.151978,0.019016,0.0,132288.0,uLv7wB3Ylj/NAGtC,qXmUP9QCskLwCLFD,L8/0wDP0nT/CeHNC,6EhzOowHq0IHZrm4,BBkdwjoW6T8zdW1C,...,w5gcwi70lz81wG5C,d8cGQg+KS0PjhrJD,0qMcwgCllT/Qpm5C,iupdQkcxQkPG/bND,oKgcwp4Ukj/Ok25C,z+xdQupmTkNFNhc/,u64cwpqPjz/yhW5C,z+xdQupmTkNFNhc/,ZX8cwktElT/Zzm5C,ILpAQtH+HEOvT2RC
2315,1605.522,48.193604,0.016515,0.0,132291.0,Urv7wAPYlj/KAGtC,0dCGP04jskJ9C7FD,MM/0wDf0nT/CeHNC,WvRXOpIHq0Kq8KO4,3hcdwkoX6T+4dW1C,...,w5gcwi70lz81wG5C,d8cGQg+KS0PjhrJD,0qMcwgCllT/Qpm5C,iupdQkcxQkPG/bND,oKgcwp4Ukj/Ok25C,z+xdQupmTkNFNhc/,u64cwpqPjz/yhW5C,z+xdQupmTkNFNhc/,ZX8cwktElT/Zzm5C,ILpAQtH+HEOvT2RC
