# Handles single (exploration) file


**Changes (to Jasmins code):**

- uses a single file
- ignores warnings
- double checks correct labeling to prevent earlier experienced duplication error

In [None]:
import os
import json
import numpy as np
import pandas as pd
import warnings
import time

# Suppress FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Column definitions for empty DataFrames when no ray cast data is available
columns1 = ['hitObjectColliderName_1', 'ordinalOfHit_1', 'hitPointOnObject.x_1', 'hitPointOnObject.y_1', 'hitPointOnObject.z_1',
            'hitObjectColliderBoundsCenter.x_1', 'hitObjectColliderBoundsCenter.y_1', 'hitObjectColliderBoundsCenter.z_1']

columns2 = ['hitObjectColliderName_2', 'ordinalOfHit_2', 'hitPointOnObject.x_2', 'hitPointOnObject.y_2', 'hitPointOnObject.z_2',
            'hitObjectColliderBoundsCenter.x_2', 'hitObjectColliderBoundsCenter.y_2', 'hitObjectColliderBoundsCenter.z_2']

columnsRCall = columns1 + columns2 + ['DataRow']

emptyDF1 = pd.DataFrame(np.nan, index=[0], columns=columns1)
emptyDF2 = pd.DataFrame(np.nan, index=[0], columns=columns2)

# Define file paths
DATA_PATH = '/content/1005_Expl_S_1_ET_1_1618817882.14612.json'
PROCESSED_DATA_PATH = '/content/'

# Ensure processed data path exists
os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)

# Specify the single file to be processed
fileName = "data"

with open(DATA_PATH, 'r') as datafile:
    try:
        print("read file")
        dataR = '['+ datafile.read()
        dataR = dataR[:len(dataR)] + "]"
    except:
        print("reading did not work")

    # Parse the JSON data from the file
    subject_session = json.loads(dataR)
    print("data loaded")
    print('time is: ', time.ctime())

# Load JSON data
subject_session = json.loads(dataR)
print("Data loaded at:", time.ctime())

##################################################################################################################
# Data flattening

# Save trial information
infoDF = pd.json_normalize(subject_session[0]['trials'][0])
infoDF = infoDF.drop(columns=['dataPoints'])
infoDF.insert(0, 'FileInfo', fileName)
infoDF.to_csv(os.path.join(PROCESSED_DATA_PATH, fileName + '_infoSummaryExplo.csv'), index=False)
print('Trial info saved.')

# Flatten majority of the variables into dataDF
currentDF_raw = pd.json_normalize(subject_session[0]['trials'][0]['dataPoints'])
dataDF = currentDF_raw.drop(columns=['rayCastHitsCombinedEyes'])  # Remove nested column

# Create an empty DataFrame for ray cast data
rayCastData_df = pd.DataFrame(np.nan, index=range(len(subject_session[0]['trials'][0]['dataPoints'])), columns=columnsRCall)

# Flatten the ray cast data and fill DataFrame
for index in range(len(subject_session[0]['trials'][0]['dataPoints'])):
    lengthRCData = len(currentDF_raw.at[index, 'rayCastHitsCombinedEyes'])

    if lengthRCData == 0:  # No collider hit
        combineDF = pd.concat([emptyDF1, emptyDF2], axis=1)
        combineDF.insert(len(combineDF.columns), 'DataRow', index)

    elif lengthRCData == 1:  # Only one collider hit
        pdRC1 = pd.json_normalize(currentDF_raw.at[index, 'rayCastHitsCombinedEyes'][0])
        pdRC1.columns = [col + '_1' for col in pdRC1.columns]  # Explicitly rename to ensure *_1 suffix
        combineDF = pd.concat([pdRC1, emptyDF2], axis=1)
        combineDF.insert(len(combineDF.columns), 'DataRow', index)

    elif lengthRCData == 2:  # Two colliders hit
        pdRC1 = pd.json_normalize(currentDF_raw.at[index, 'rayCastHitsCombinedEyes'][0])
        pdRC1.columns = [col + '_1' for col in pdRC1.columns]  # Explicitly rename to ensure *_1 suffix

        pdRC2 = pd.json_normalize(currentDF_raw.at[index, 'rayCastHitsCombinedEyes'][1])
        pdRC2.columns = [col + '_2' for col in pdRC2.columns]  # Explicitly rename to ensure *_2 suffix

        combineDF = pd.concat([pdRC1, pdRC2], axis=1)
        combineDF.insert(len(combineDF.columns), 'DataRow', index)

    else:
        print(f"!!! An exception occurred in ray cast data flattening in trial {index}")

    # Verify column labeling
    if not all(col.endswith('_1') or col.endswith('_2') or col == 'DataRow' for col in combineDF.columns):
        print(f"Column labeling issue detected in trial {index}")
        print(combineDF.columns)

    # Append the new data row to rayCastData_df
    rayCastData_df.loc[index] = combineDF.loc[0]

# Concatenate dataDF and rayCastData_df
flatData_df = pd.concat([dataDF, rayCastData_df], axis=1)

# Save flattened data to CSV
print("Saving flattened data...")
flatData_df.to_csv(os.path.join(PROCESSED_DATA_PATH, fileName + '_flattened_Explo.csv'), index=False)
print("Data saved at:", time.ctime())
