# Raw_Data_Preprocessing_HA
- written by Jasmin L. Walter (jawalter@uos.de)
- reads in nested json files and returns flattened csv files
- does not change anything in the data, only extracts all variables from all 9 layers of the nested json files and saves them in data frames / csv files unchanged

## Important: this script is optimized to run with the raw json recordings from the HumanA Westbrook builds version!!!
For example: it also handles the collidertype variable

Purpose: Flatten nested Unity VR eye-tracking JSON logs into per-file CSVs without altering values.
Extracts all variables (incl. up to two ray-cast hits per sample) from the nested structure
and writes a flattened data frame per recording, plus a trial info summary.
Usage:

Set DATA_PATH and PROCESSED_DATA_PATH to your raw JSON folder and output folder.
Optionally set subIDs to restrict processing; otherwise participants are inferred from DATA_PATH.
Run in Python. Inputs (expected JSON naming and structure):
Files named like <ParticipantID>Expl_S<Session>ET<ETSession>_<UnixTS>.json (OnQuit.json files are ignored)
JSON structure must contain trials[0]['dataPoints'] with 'rayCastHitsCombinedEyes' Outputs (to PROCESSED_DATA_PATH):
<prefix>_infoSummary.csv trial-level metadata (no dataPoints), adds FileInfo
<prefix>_flattened.csv flattened dataPoints with columns for first/second ray-cast hits; adds 'DataRow' indexing the original row Notes:
Ray-cast hits: rows with 0 hits are filled with NaNs (correct column schema); 1 hit fills “_1”, 2 hits fill “_1” and “_2” columns.
Files are discovered and sorted per participant and session; OnQuit.json files are excluded. Dependencies:
Python >= 3.9
pandas, numpy (json, re, os, time, warnings from the standard library) License: GNU General Public License v3.0 (GPL-3.0) (see LICENSE) """


Note, not all imported functions might be required 

Required by the current code:

os (paths, listing)
json (loading JSON)
numpy as np (np.nan, unique)
re (extracting numeric tokens from filenames)
pandas as pd (json_normalize, DataFrame ops, CSV I/O)
time (time.ctime for logging)
warnings (suppress FutureWarning)


Potentially save to remove (but double check whether script keeps running):

importlib, check_package, StopExecution (not invoked)
cv2
matplotlib.pyplot as plt
glob
scipy.cluster.vq as clusters
pandas.plotting.autocorrelation_plot as AC_plot
statsmodels.graphics.tsaplots
statsmodels.tsa.stattools.acf
mpl_toolkits.mplot3d.Axes3D
matplotlib.colors.LinearSegmentedColormap
from timeit import default_timer as timer
numpy imports/aliases already covered; networkx, skimage, sklearn are commented out (fine)

In [2]:
# General configuration
import os

# install_missing_packages: bool
#     A flag indicating if missing packages should be automatically installed
install_missing_packages = True

# use_conda: bool
#     A flag indicating if conda should be used for software installation.
#     If False, pip will be used. The default is to use conda if jupyter
#     is run in a conda environment.
use_conda = 'CONDA_EXE' in os.environ

In [3]:
import importlib

def check_package(package, pip_pkg: str = None, conda_pkg: str = None):
    """Check if a given package is installed. If missing install
    it (if global flag `install_missing_packages` is True) either with
    pip or with conda (depending on `use_conda`).
    """
    if importlib.util.find_spec(package) is not None:
        return  # ok, package is already installed

    if not install_missing_packages:
        raise RuntimeError(f"{package} is not installed!")

    if use_conda:
        import conda.cli
        conda.cli.main('conda', 'install',  '-y', conda_pkg or package)
    else:
        import subprocess
        import sys            
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pip_pkg or package])
        
# This is to exit cells without error tracebacks (cosmetic purpose)
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

In [4]:
import os
import cv2
import json
import numpy as np
import re
import matplotlib.pyplot as plt
import pandas as pd
#import networkx as nx
import glob
import scipy.cluster.vq as clusters
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#from sklearn.preprocessing import normalize
from pandas.plotting import autocorrelation_plot as AC_plot 
from statsmodels.graphics import tsaplots
from statsmodels.tsa.stattools import acf
#from skimage.filters import gaussian
from mpl_toolkits.mplot3d import Axes3D 
from matplotlib.colors import LinearSegmentedColormap

from timeit import default_timer as timer
import time

# Customize to run scripts - paths, subject ids to run etc.

In [11]:
DATA_PATH = 'E:/WestbrookProject/Human_A_Data/Experiment1/Exploration_short/'

PROCESSED_DATA_PATH = 'E:/WestbrookProject/Human_A_Data/Experiment1/pre-processing/'

# Getting the Folder without hidden files in ascending order 
DATA_FOLDER = sorted([f for f in os.listdir(DATA_PATH) if not f.startswith('.')], key=str.lower)
PROCESSED_DATA_FOLDER = sorted([f for f in os.listdir(PROCESSED_DATA_PATH) if not f.startswith('.')], key=str.lower)


In [14]:
subIDs = []
for sub in DATA_FOLDER:
    if sub[0:4].isdigit():
        subIDs.append(int(sub[0:4]))
    else:
        pass
subIDs = np.unique(subIDs)
print(subIDs)
print('number data folders:', len(subIDs))

[ 365  479 1754 2258 2361 2693 3246 3310 3572 3976 4176 4597 4796 4917
 5238 5531 5741 6642 7093 7264 7412 7842 8007 8469 8673 8695 9472 9502
 9586 9601]
number data folders: 30


In [26]:
# # custom subIDs, if only a subset of participants should be processed
subIDs = [1754, 2258, 2361, 2693, 3310, 4176, 4597, 4796, 4917,
 5741, 6642, 7093, 7264, 7412, 7842, 8007, 8469, 8673, 9472, 9502,
 9586, 9601]
#   365,  479, 
print(len(subIDs))

22


# Main part - flatten all nested data structures and save as csv

In [34]:
# if no ray cast information is available, the data frame will be filled with nan values
# create empty data frames with nan values and correct variable names
columns1 = ['hitObjectColliderName_1','ordinalOfHit_1', 'hitColliderType_1','hitPointOnObject.x_1','hitPointOnObject.y_1','hitPointOnObject.z_1',
            'hitObjectColliderBoundsCenter.x_1','hitObjectColliderBoundsCenter.y_1','hitObjectColliderBoundsCenter.z_1']

columns2 = ['hitObjectColliderName_2','ordinalOfHit_2','hitColliderType_2','hitPointOnObject.x_2','hitPointOnObject.y_2','hitPointOnObject.z_2',
            'hitObjectColliderBoundsCenter.x_2','hitObjectColliderBoundsCenter.y_2','hitObjectColliderBoundsCenter.z_2']

columnsRCall = ['hitObjectColliderName_1','ordinalOfHit_1','hitColliderType_1',
                'hitPointOnObject.x_1','hitPointOnObject.y_1','hitPointOnObject.z_1',
                'hitObjectColliderBoundsCenter.x_1','hitObjectColliderBoundsCenter.y_1','hitObjectColliderBoundsCenter.z_1',
                'hitObjectColliderName_2','ordinalOfHit_2','hitColliderType_2',
                'hitPointOnObject.x_2','hitPointOnObject.y_2','hitPointOnObject.z_2',
                'hitObjectColliderBoundsCenter.x_2','hitObjectColliderBoundsCenter.y_2','hitObjectColliderBoundsCenter.z_2',
                'DataRow']

emptyDF1 = pd.DataFrame(np.nan,index=[0], columns= columns1)
emptyDF2 = pd.DataFrame(np.nan,index=[0], columns= columns2)



#########################################################################################################
# data loop through all subjects and sessions

subcount = 0


for subject in subIDs:
    
    subcount +=1
    print('Subject ' 
          + str(subject) 
          + ' started - ' 
          + str(subcount) 
          + '/' 
          + str(len(subIDs)) 
          + ' subjects')
    
#     # Create empty dataframe for later concatenation
# complete_exploration_df = pd.DataFrame(columns = col_names)
#     complete_exploration_df.head()
    
    
    # change dir into the subject folder 
    CURRENT_SUBJECT_FOLDER = sorted([f for f in os.listdir(DATA_PATH+str(subject)) if not f.startswith('.')], key=str.lower)
    # get the data files according to the subject, ignoring OnQuit files
    subject_files = sorted([f for f in CURRENT_SUBJECT_FOLDER 
                             if f.startswith(str(subject)+'_Expl_S_') and f.endswith("OnQuit.json") == False], 
                            key=str.lower) 
    
    # the following works as long as the data name format is as follows:
    # 'subjectID'_Expl_S_'SessionNumber'_ET_'EyeTrackingSessionNumber'_'UnixTimestamp'.json
    folder_files = list()
    
    # loop through the subject folder and save all numbers
    for file in subject_files:
        folder_files.append(re.findall(r'\d+', file))
    
    # Extract all SubIDs (only one), SessionNumbers, ET_SessionNumbers (and Timestamps)
    try:
        SubID, SessionNumbers, ET_SessionNumbers, UnixTimestamp1, UnixTimeStamp2 = map(list, zip(*folder_files))

    except:
        print('\tSubject ' 
              + str(subject)
              + ' Filename is not valid!')
        
#     print(SubID)
#     print(SessionNumbers)
#     print(ET_SessionNumbers)
#     print(UnixTimestamp1)
#     print(UnixTimeStamp2)
    
    session_number = int(max(SessionNumbers)) # the maximum session number of the particular subject
    ET_session_number = int(max(ET_SessionNumbers)) # the maximum ET session number of the particular subject
    
    
    # print info of how many files were found 
    
    print(len(SubID), ' files were found for participant ', SubID[0])
    print('A maximum of ', session_number, 'sessions were found and will be processed')
    print('A total of ', session_number*ET_session_number, 'files were found and will be processed')

        
# --------- second layer - exploration session loop ---------

    # loop over exploration sessions
    for EXP_session in range(session_number):
        # to avoid start at 0
        EXP_session +=1 

        # extract the exploration data files for each session - but exclude OnQuit files
        subject_data = sorted([f for f in CURRENT_SUBJECT_FOLDER if f.startswith(str(subject) + '_Expl_S_0' + str(EXP_session)) 
                               and f.endswith("OnQuit.json") == False], key=str.lower)


        print("\tTotal Sessionfiles: "
              + str(len(subject_data))
              + " - Exploration Session "
              + str(EXP_session))

        ET_session_count = 0 # session count

# --------- third layer - eye tracking session loop ---------

        # loop over separate eye tracking sessions
        for fileName in subject_data:
            ET_session_count+=1

            print('load data of file ', fileName)

            print('Path: ', DATA_PATH + str(subject) + '/' + fileName)
            # open the JSON file as dictionary
            with open(DATA_PATH + str(subject) + '/' + fileName) as datafile:
                try:
                    print("read file")
                    dataR = '['+ datafile.read()
                    dataR = dataR[:len(dataR)] + "]"
                except:
                    print("reading did not work")

                subject_session = json.loads(dataR)
                print("data loaded")
                print('time is: ', time.ctime())



##################################################################################################################

            # Data flattening part: 
            # first save the overall trial information


            infoDF = pd.json_normalize(subject_session[0]['trials'][0])
            infoDF = infoDF.drop(columns=['dataPoints'])
            infoDF.insert(0,'FileInfo',fileName[0:18])
#             infoDF.to_csv(PROCESSED_DATA_PATH + fileName[0:18] + '_infoSummary.csv', index = False)
            infoDF.to_csv(PROCESSED_DATA_PATH + fileName[0:12] + fileName[13:19] + '_infoSummary.csv', index = False)

            
            print('trial info saved')
            

            # flatten the majority of the variables into currentDF data frame
            currentDF_raw = pd.json_normalize(subject_session[0]['trials'][0]['dataPoints'])

            # remove the 'rayCastHitsCombinedEyes' column as it still contains a nested data structure
            dataDF = currentDF_raw.drop(columns=['rayCastHitsCombinedEyes'])
            
            # create an empty data frame of the required size
            rayCastData_df = pd.DataFrame(np.nan,index=range(len(subject_session[0]['trials'][0]['dataPoints'])), columns= columnsRCall)

            # now loop through the individual trials and flatten the data
            for index in range(len(subject_session[0]['trials'][0]['dataPoints'])):
                
                # depending on the size of the ray cast data - flatten data and appand it to currentDF data frame
                # the variables are renamed to make the differentiation of first and second order collider hits more intuitive
                #lengthRCData = len(subject_session[0]['trials'][0]['dataPoints'][index]['rayCastHitsCombinedEyes'][0])
                lengthRCData = len(currentDF_raw.at[index,'rayCastHitsCombinedEyes'])
                
                
                if lengthRCData ==0: #case: no ray cast data is available = no collider was hit

                    combineDF = pd.concat([emptyDF1, emptyDF2], axis=1)
                    combineDF.insert(len(combineDF.columns), 'DataRow',index)


                elif lengthRCData == 1: # case: only one collider was hit, there is no secondary hit

                    pdRC1= pd.json_normalize(currentDF_raw.at[index,'rayCastHitsCombinedEyes'][0]).rename(
                        columns = {'hitObjectColliderName':'hitObjectColliderName_1',
                                   'ordinalOfHit':'ordinalOfHit_1',
                                   'hitColliderType':'hitColliderType_1',
                                   'hitPointOnObject.x':'hitPointOnObject.x_1',
                                   'hitPointOnObject.y':'hitPointOnObject.y_1',
                                   'hitPointOnObject.z':'hitPointOnObject.z_1',
                                   'hitObjectColliderBoundsCenter.x':'hitObjectColliderBoundsCenter.x_1',
                                   'hitObjectColliderBoundsCenter.y':'hitObjectColliderBoundsCenter.y_1',
                                   'hitObjectColliderBoundsCenter.z':'hitObjectColliderBoundsCenter.z_1'})
                    combineDF = pd.concat([pdRC1, emptyDF2], axis=1)
                    combineDF.insert(len(combineDF.columns), 'DataRow',index)

                elif lengthRCData == 2: # case: two collider were hit 

                    pdRC1= pd.json_normalize(currentDF_raw.at[index,'rayCastHitsCombinedEyes'][0]).rename(
                        columns = {'hitObjectColliderName':'hitObjectColliderName_1',
                                   'ordinalOfHit':'ordinalOfHit_1',
                                   'hitColliderType':'hitColliderType_1',
                                   'hitPointOnObject.x':'hitPointOnObject.x_1',
                                   'hitPointOnObject.y':'hitPointOnObject.y_1',
                                   'hitPointOnObject.z':'hitPointOnObject.z_1',
                                   'hitObjectColliderBoundsCenter.x':'hitObjectColliderBoundsCenter.x_1',
                                   'hitObjectColliderBoundsCenter.y':'hitObjectColliderBoundsCenter.y_1',
                                   'hitObjectColliderBoundsCenter.z':'hitObjectColliderBoundsCenter.z_1'})

                    pdRC2 = pd.json_normalize(currentDF_raw.at[index,'rayCastHitsCombinedEyes'][1]).rename(
                        columns = {'hitObjectColliderName':'hitObjectColliderName_2',
                                   'ordinalOfHit':'ordinalOfHit_2',
                                   'hitColliderType':'hitColliderType_2',
                                   'hitPointOnObject.x':'hitPointOnObject.x_2',
                                   'hitPointOnObject.y':'hitPointOnObject.y_2',
                                   'hitPointOnObject.z':'hitPointOnObject.z_2',
                                   'hitObjectColliderBoundsCenter.x':'hitObjectColliderBoundsCenter.x_2',
                                   'hitObjectColliderBoundsCenter.y':'hitObjectColliderBoundsCenter.y_2',
                                   'hitObjectColliderBoundsCenter.z':'hitObjectColliderBoundsCenter.z_2'})
                    combineDF = pd.concat([pdRC1, pdRC2], axis=1)
                    combineDF.insert(len(combineDF.columns), 'DataRow',index)


                else:
                    print('!!!an exception occured in the ray cast data flattening in trial ', index)

                # now add the new data row to the data overview
                # rayCastData_df = [rayCastData_df]

            
                rayCastData_df.loc[index] = combineDF.loc[0]
                
            flatData_df = pd.concat([dataDF,rayCastData_df],axis=1)   

            print('saving data')
#             flatData_df.to_csv(PROCESSED_DATA_PATH + fileName[0:18] + '_flattened.csv', index = False)
            flatData_df.to_csv(PROCESSED_DATA_PATH + fileName[0:12] + fileName[13:19] + '_flattened.csv', index = False)

            print('data saved')
            print('time is: ', time.ctime())


Subject 1754 started - 1/22 subjects
15  files were found for participant  1754
A maximum of  5 sessions were found and will be processed
A total of  15 files were found and will be processed
	Total Sessionfiles: 3 - Exploration Session 1
load data of file  1754_Expl_S_01_ET_1_1646740718.03586.json
Path:  E:/WestbrookProject/Human_A_Data/Experiment1/Exploration_short/1754/1754_Expl_S_01_ET_1_1646740718.03586.json
read file
data loaded
time is:  Wed Jul 31 20:04:33 2024
trial info saved


KeyboardInterrupt: 

In [32]:
print( fileName[0:12] + fileName[13:19])

1754_Expl_S_1_ET_1


# ----------------- end of script ------------------------------ (old version below)

In [None]:
# ####################################### old Version ##############################################
# # if no ray cast information is available, the data frame will be filled with nan values
# # create empty data frames with nan values and correct variable names
# columns1 = ['hitObjectColliderName_1','ordinalOfHit_1','hitPointOnObject.x_1','hitPointOnObject.y_1','hitPointOnObject.z_1',
#             'hitObjectColliderBoundsCenter.x_1','hitObjectColliderBoundsCenter.y_1','hitObjectColliderBoundsCenter.z_1']

# columns2 = ['hitObjectColliderName_2','ordinalOfHit_2','hitPointOnObject.x_2','hitPointOnObject.y_2','hitPointOnObject.z_2',
#             'hitObjectColliderBoundsCenter.x_2','hitObjectColliderBoundsCenter.y_2','hitObjectColliderBoundsCenter.z_2']

# columnsRCall = ['hitObjectColliderName_1','ordinalOfHit_1','hitPointOnObject.x_1','hitPointOnObject.y_1','hitPointOnObject.z_1',
#                 'hitObjectColliderBoundsCenter.x_1','hitObjectColliderBoundsCenter.y_1','hitObjectColliderBoundsCenter.z_1',
#                 'hitObjectColliderName_2','ordinalOfHit_2','hitPointOnObject.x_2','hitPointOnObject.y_2','hitPointOnObject.z_2',
#                 'hitObjectColliderBoundsCenter.x_2','hitObjectColliderBoundsCenter.y_2','hitObjectColliderBoundsCenter.z_2','dataRow']

# emptyDF1 = pd.DataFrame(np.nan,index=[0], columns= columns1)
# emptyDF2 = pd.DataFrame(np.nan,index=[0], columns= columns2)


# # create empty data overview data frame

# flatData_df = pd.DataFrame(columns = allColumnNames)

# # create empty subject information data frame

# subjectData_df = pd.DataFrame(columns = infoVarNames)

# #########################################################################################################
# # data loop through all subjects and sessions

# subcount = 0


# for subject in subIDs:
    
#     subcount +=1
#     print('Subject ' 
#           + str(subject) 
#           + ' started - ' 
#           + str(subcount) 
#           + '/' 
#           + str(len(subIDs)) 
#           + ' subjects')
    
# #     # Create empty dataframe for later concatenation
# #     complete_exploration_df = pd.DataFrame(columns = col_names)
# #     complete_exploration_df.head()
    
    
#     # change dir into the subject folder 
#     CURRENT_SUBJECT_FOLDER = sorted([f for f in os.listdir(DATA_PATH+str(subject)) if not f.startswith('.')], key=str.lower)
#     # get the data files according to the subject, ignoring OnQuit files
#     subject_files = sorted([f for f in CURRENT_SUBJECT_FOLDER 
#                              if f.startswith(str(subject)+'_Expl_S_') and f.endswith("OnQuit.json") == False], 
#                             key=str.lower) 
    
#     # the following works as long as the data name format is as follows:
#     # 'subjectID'_Expl_S_'SessionNumber'_ET_'EyeTrackingSessionNumber'_'UnixTimestamp'.json
#     folder_files = list()
    
#     # loop through the subject folder and save all numbers
#     for file in subject_files:
#         folder_files.append(re.findall(r'\d+', file))
    
#     # Extract all SubIDs (only one), SessionNumbers, ET_SessionNumbers (and Timestamps)
#     try:
#         SubID, SessionNumbers, ET_SessionNumbers, UnixTimestamp1, UnixTimeStamp2 = map(list, zip(*folder_files))
#     except:
#         print('\tSubject ' 
#               + str(subject)
#               + ' Filename is not valid!')
        
# #     print(SubID)
# #     print(SessionNumbers)
# #     print(ET_SessionNumbers)
# #     print(UnixTimestamp1)
# #     print(UnixTimeStamp2)
    
#     session_number = int(max(SessionNumbers)) # the maximum session number of the particular subject
#     ET_session_number = int(max(ET_SessionNumbers)) # the maximum ET session number of the particular subject
    
    
#     # print info of how many files were found 
    
#     print(len(SubID), ' files were found for participant ', SubID[0])
#     print('A maximum of ', session_number, 'sessions were found and will be processed')
        
# # --------- second layer - exploration session loop ---------

#     # loop over exploration sessions
#     for EXP_session in range(session_number):
#         # to avoid start at 0
#         EXP_session +=1 

#         # extract the exploration data files for each session - but exclude OnQuit files
#         subject_data = sorted([f for f in CURRENT_SUBJECT_FOLDER if f.startswith(str(subject) + '_Expl_S_' + str(EXP_session)) 
#                                and f.endswith("OnQuit.json") == False], key=str.lower)


#         print("\tTotal Sessionfiles: "
#               + str(len(subject_data))
#               + " - Exploration Session "
#               + str(EXP_session))

#         ET_session_count = 0 # session count

# # --------- third layer - eye tracking session loop ---------

#         # loop over separate eye tracking sessions
#         for fileName in subject_data:
#             ET_session_count+=1

#             print('load data of file ', fileName)

#             print('Path: ', DATA_PATH + str(subject) + '/' + fileName)
#             # open the JSON file as dictionary
#             with open(DATA_PATH + str(subject) + '/' + fileName) as datafile:
#                 try:
#                     print("read file")
#                     dataR = '['+ datafile.read()
#                     dataR = dataR[:len(dataR)] + "]"
#                 except:
#                     print("reading did not work")

#                 subject_session = json.loads(dataR)
#                 print(" data loaded")



# ##################################################################################################################

# #                 # Data flattening part: 
# #                 # first save trial information

# #                 currentTrialInfo = pd.json_normalize(subject_session[0]['trials'][0])
# #                 currentTrialInfo = infoDF.drop(columns=['dataPoints'])
# #                 currentTrialInfo.insert(0,'Participant',subject)
# #                 currentTrialInfo.insert(1,'Session',ET_session)

# #                 subjectData_df = pd.concat[subjectData_df, currentTrialInfo]


#             # create empty data overview data frame
#             flatData_df = pd.DataFrame(columns = allColumnNames)
        
#             # flatten the majority of the variables into currentDF data frame
#             #currentDF_raw = pd.json_normalize(subject_session[0]['trials'][0]['dataPoints'])

#             # remove the 'rayCastHitsCombinedEyes' column as it still contains a nested data structure
#             #currentDF = currentDF_raw.drop(columns=['rayCastHitsCombinedEyes'])
            
#             # create an empty data frame of the required size
#             #rayCastData_df = pd.DataFrame(np.nan,index=len(subject_session[0]['trials'][0]['dataPoints'], columns= columnsRCall)

            
#             start = timer()
#             # now loop through the individual trials and flatten the data
#             for index in range(20):#range(len(subject_session[0]['trials'][0]['dataPoints'])):
                
#                 # flatten the majority of the variables into currentDF data frame
#                 currentDF_raw = pd.json_normalize(subject_session[0]['trials'][0]['dataPoints'][index])

#                 # remove the 'rayCastHitsCombinedEyes' column as it still contains a nested data structure
#                 currentDF = currentDF_raw.drop(columns=['rayCastHitsCombinedEyes'])
            
                
#                 # depending on the size of the ray cast data - flatten data and appand it to currentDF data frame
#                 # the variables are renamed to make the differentiation of first and second order collider hits more intuitive
#                 #lengthRCData = len(subject_session[0]['trials'][0]['dataPoints'][index]['rayCastHitsCombinedEyes'][0])
#                 lengthRCData = len(currentDF_raw['rayCastHitsCombinedEyes'][0])
                
#                 if lengthRCData ==0: #case: no ray cast data is available = no collider was hit

#                     combineDF = pd.concat([currentDF, emptyDF1, emptyDF2], axis=1)
#                     combineDF.insert(len(combineDF.columns), 'dataRow',index)


#                 elif lengthRCData == 1: # case: only one collider was hit, there is no secondary hit

#                     pdRC1= pd.json_normalize(currentDF_raw['rayCastHitsCombinedEyes'][0][0]).rename(
#                         columns = {'hitObjectColliderName':'hitObjectColliderName_1',
#                                    'ordinalOfHit':'ordinalOfHit_1',
#                                    'hitPointOnObject.x':'hitPointOnObject.x_1',
#                                    'hitPointOnObject.y':'hitPointOnObject.y_1',
#                                    'hitPointOnObject.z':'hitPointOnObject.z_1',
#                                    'hitObjectColliderBoundsCenter.x':'hitObjectColliderBoundsCenter.x_1',
#                                    'hitObjectColliderBoundsCenter.y':'hitObjectColliderBoundsCenter.y_1',
#                                    'hitObjectColliderBoundsCenter.z':'hitObjectColliderBoundsCenter.z_1'})
#                     combineDF = pd.concat([currentDF, pdRC1, emptyDF2], axis=1)
#                     combineDF.insert(len(combineDF.columns), 'dataRow',index)

#                 elif lengthRCData == 2: # case: two collider were hit 

#                     pdRC1= pd.json_normalize(currentDF_raw['rayCastHitsCombinedEyes'][0][0]).rename(
#                         columns = {'hitObjectColliderName':'hitObjectColliderName_1',
#                                    'ordinalOfHit':'ordinalOfHit_1',
#                                    'hitPointOnObject.x':'hitPointOnObject.x_1',
#                                    'hitPointOnObject.y':'hitPointOnObject.y_1',
#                                    'hitPointOnObject.z':'hitPointOnObject.z_1',
#                                    'hitObjectColliderBoundsCenter.x':'hitObjectColliderBoundsCenter.x_1',
#                                    'hitObjectColliderBoundsCenter.y':'hitObjectColliderBoundsCenter.y_1',
#                                    'hitObjectColliderBoundsCenter.z':'hitObjectColliderBoundsCenter.z_1'})

#                     pdRC2 = pd.json_normalize(currentDF_raw['rayCastHitsCombinedEyes'][0][1]).rename(
#                         columns = {'hitObjectColliderName':'hitObjectColliderName_2',
#                                    'ordinalOfHit':'ordinalOfHit_2',
#                                    'hitPointOnObject.x':'hitPointOnObject.x_2',
#                                    'hitPointOnObject.y':'hitPointOnObject.y_2',
#                                    'hitPointOnObject.z':'hitPointOnObject.z_2',
#                                    'hitObjectColliderBoundsCenter.x':'hitObjectColliderBoundsCenter.x_2',
#                                    'hitObjectColliderBoundsCenter.y':'hitObjectColliderBoundsCenter.y_2',
#                                    'hitObjectColliderBoundsCenter.z':'hitObjectColliderBoundsCenter.z_2'})
#                     combineDF = pd.concat([currentDF, pdRC1, pdRC2], axis=1)
#                     combineDF.insert(len(combineDF.columns), 'dataRow',index)


#                 else:
#                     print('!!!an exception occured in the ray cast data flattening in trial ', index)

#                 # now add the new data row to the data overview
#                 # rayCastData_df = [rayCastData_df]

            
#                 flatData_df = pd.concat([flatData_df,combineDF],ignore_index=True)

#             print('saving data')
#             flatData_df.to_csv(PROCESSED_DATA_PATH + fileName[0:18] + '_flattened.csv', index = False)
#             print('data saved')
#             end= timer () 
#             print('timer:', end-start)
