# Create the dataset

In [1]:
import os
import json
import numpy as np

In [57]:
def clean_list(lst):
    new_lst = []
    for i in lst:
        if i is None:
            new_lst.append(np.nan)
        else:
            new_lst.append(i)
    return new_lst

def apply_averaging_window(lst):
    averaged_lst = []
    for i in range(len(lst) - 2):
        window = lst[i:i+3]  # Extract a window of 3 elements
        if None in window:
            averaged_lst.append(0.0)  # Append None if any element in the window is None
        else:
            average = sum(window) / len(window)  # Calculate the average
            averaged_lst.append(average)
    return averaged_lst

In [None]:
simulation_data_dir = '/Path/To/Base/Simulation/Data/'

# Iterate throught folders (each simulation)
for folder_name in os.listdir(simulation_data_dir):
    folder_dir = os.path.join(simulation_data_dir, folder_name)

    # Read the corresponding JSON file
    json_file_dir = os.path.join(folder_dir, f'{folder_name}.json')

    # Read JSON file
    with open(json_file_dir, 'r') as json_file:
        json_data = json.load(json_file)

    # SET UP VERTICAL DATA
    # Construct scores list
    vertical_scores   = json_data['down_verification_confidence'][::-1] + json_data['center_verification_confidence'] + json_data['up_verification_confidence']
    
    # Clean, average and derivative
    vertical_scores     = clean_list(vertical_scores)
    vertical_avg_scores = apply_averaging_window(vertical_scores)
    vertical_dev_scores = np.gradient(vertical_avg_scores)
    # Append a None value at the beginning and the end the array. To keep the same length
    vertical_dev_scores = np.insert(vertical_dev_scores, 0, np.nan)
    vertical_dev_scores = np.append(vertical_dev_scores, np.nan)

    # List of images
    vertical_images   = []

    for i in range(len(vertical_scores)):
        if i < len(vertical_scores)//2:
            img_path = os.path.join(folder_dir, f'{folder_name}_down_{14-i}.jpg')
        elif i == len(vertical_scores)//2:
            img_path = os.path.join(folder_dir, f'{folder_name}_center_{0}.jpg')
        else:
            img_path = os.path.join(folder_dir, f'{folder_name}_up_{i-16}.jpg')

        if os.path.exists(img_path):
            vertical_images.append(img_path)
        else:
            vertical_images.append(np.nan)

    # SET UP HORIZONTAL DATA
    # Construct scores list
    horizontal_scores = json_data['left_verification_confidence'][::-1] + json_data['center_verification_confidence'] + json_data['right_verification_confidence']

    horizontal_scores     = clean_list(horizontal_scores)
    horizontal_avg_scores = apply_averaging_window(horizontal_scores)
    horizontal_dev_scores = np.gradient(horizontal_avg_scores)
    # Append a None value at the beginning and the end the array. To keep the same length
    horizontal_dev_scores = np.insert(horizontal_dev_scores, 0, np.nan)
    horizontal_dev_scores = np.append(horizontal_dev_scores, np.nan)

    # List of images
    horizontal_images = []

    for i in range(len(horizontal_scores)):
        if i < len(horizontal_scores)//2:
            img_path = os.path.join(folder_dir, f'{folder_name}_left_{14-i}.jpg')
        elif i == len(horizontal_scores)//2:
            img_path = os.path.join(folder_dir, f'{folder_name}_center_{0}.jpg')
        else:
            img_path = os.path.join(folder_dir, f'{folder_name}_right_{i-16}.jpg')

        if os.path.exists(img_path):
            horizontal_images.append(img_path)
        else:
            horizontal_images.append(np.nan)