In [1]:
import json
import os
from utils import utils
import pandas as pd
from collections import defaultdict

## Import Example Data File

In [2]:
example_data_path = 'data_example.json'
with open(example_data_path, 'r') as example_f:
    example_data = json.load(example_f)

dataset_root_path = 'dataset'

## Data Structure Exploration

In [3]:
example_data.keys()

dict_keys(['$id', '$schema', 'ados', 'condition', 'eye_gaze', 'frame_rate', 'head_gaze', 'participant', 'skeleton', 'task', 'time'])

In [4]:
example_data['ados']

{'preTest': {'communication': 2,
  'interaction': 5,
  'module': 1.0,
  'play': 1,
  'protocol': 'ADOS-G',
  'socialCommunicationQuestionnaire': 23,
  'stereotype': 0,
  'total': 7}}

In [5]:
example_data['condition']

'RET'

In [6]:
example_data['participant']

{'ageInMonths': 47, 'gender': 'male', 'id': 37}

In [7]:
example_data['task']

{'ability': 'TT', 'difficultyLevel': 1, 'end': 10279, 'index': 18, 'start': 0}

In [8]:
unique_values = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))

users = [user for user in sorted(os.listdir(dataset_root_path)) if user != '.DS_Store']
user_data_list = []

for user in users:
    sessions_path = os.path.join(dataset_root_path, user)

    sessions = sorted(os.listdir(sessions_path))
    sessions = [session for session in sessions if
                ('initial diagnosis' in session.lower()
                 or 'diagnosis abilities' in session.lower()
                 and os.path.splitext(session)[1] == '.json')]

    for session in sessions:
        session_path = os.path.join(sessions_path, session)

        with open(session_path, 'r') as file:
            data = json.load(file)
            
            filtered_data = {k: v for k, v in data.items() if k not in ['$id', '$schema', 'eye_gaze', 'frame_rate', 'head_gaze', 'skeleton', 'time']}
            
            flattened_data = utils.flatten_nested_dict(filtered_data)
            flattened_data['user'] = user  # Add user as an identifier
            user_data_list.append(flattened_data)
                    
df = pd.DataFrame(user_data_list) 
df

Unnamed: 0,ados_preTest_communication,ados_preTest_interaction,ados_preTest_module,ados_preTest_play,ados_preTest_protocol,ados_preTest_socialCommunicationQuestionnaire,ados_preTest_stereotype,ados_preTest_total,condition,participant_ageInMonths,participant_gender,participant_id,task_ability,task_difficultyLevel,task_end,task_index,task_start,user
0,6,8,1.0,3,ADOS-G,13.0,2,14,RET,66,male,10,TT,1,21336,0.0,0,User 10
1,6,8,1.0,3,ADOS-G,13.0,2,14,RET,66,male,10,IM,1,2232,15.0,0,User 10
2,6,8,1.0,3,ADOS-G,13.0,2,14,RET,66,male,10,TT,1,5240,1.0,0,User 10
3,6,8,1.0,3,ADOS-G,13.0,2,14,RET,66,male,10,JA,1,9940,2.0,0,User 10
4,6,8,1.0,3,ADOS-G,13.0,2,14,RET,66,male,10,JA,1,1293,3.0,0,User 10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
340,7,10,1.0,1,ADOS-G,21.0,2,17,RET,60,male,9,TT,1,7939,2.0,60,User 9
341,7,10,1.0,1,ADOS-G,21.0,2,17,RET,60,male,9,TT,1,17164,4.0,39,User 9
342,7,10,1.0,1,ADOS-G,21.0,2,17,RET,60,male,9,TT,1,6732,5.0,62,User 9
343,7,10,1.0,1,ADOS-G,21.0,2,17,RET,60,male,9,JA,2,3197,7.0,77,User 9


In [21]:
unique_values = {col: df[col].unique().tolist() for col in df.columns}
print(json.dumps(unique_values))

{"ados_preTest_communication": [6, 7, 4, 8, 5, 2, 3], "ados_preTest_interaction": [8, 13, 11, 12, 10, 5, 3, 9, 7, 6], "ados_preTest_module": [1.0, 2.0], "ados_preTest_play": [3, 4, 2, 0, 1], "ados_preTest_protocol": ["ADOS-G"], "ados_preTest_socialCommunicationQuestionnaire": [13.0, 20.0, 17.0, 12.0, NaN, 22.0, 9.0, 4.0, 21.0, 15.0, 10.0, 6.0, 23.0, 8.0, 11.0, 14.0, 25.0, 18.0, 26.0, 3.0, 19.0], "ados_preTest_stereotype": [2, 4, 3, 5, 0, 1, 6], "ados_preTest_total": [14, 20, 15, 19, 13, 18, 12, 8, 7, 9, 11, 17, 16, 10], "condition": ["RET", "SHT"], "participant_ageInMonths": [66, 53, 57, 50, 45, 47, 69, 58, 60, 42, 35, 62, 56, 43, 46, 44, 71, 49, 36, 39, 63, 52, 51, 67, 40, 38, 72, 74, 76, 59], "participant_gender": ["male", "female"], "participant_id": [10, 11, 12, 13, 14, 15, 19, 20, 21, 23, 26, 29, 3, 30, 31, 32, 33, 34, 35, 4, 40, 46, 5, 50, 52, 54, 55, 56, 58, 59, 6, 60, 61, 62, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 8, 80, 81, 9], "task_ability": ["TT", "IM", "JA

In [26]:
df[df['ados_preTest_module'] == 2]

Unnamed: 0,ados_preTest_communication,ados_preTest_interaction,ados_preTest_module,ados_preTest_play,ados_preTest_protocol,ados_preTest_socialCommunicationQuestionnaire,ados_preTest_stereotype,ados_preTest_total,condition,participant_ageInMonths,participant_gender,participant_id,task_ability,task_difficultyLevel,task_end,task_index,task_start,user
217,3,8,2.0,0,ADOS-G,22.0,2,11,RET,63,female,62,TT,1,1625,0.0,0,User 62
218,3,8,2.0,0,ADOS-G,22.0,2,11,RET,63,female,62,TT,1,12800,1.0,0,User 62
219,3,8,2.0,0,ADOS-G,22.0,2,11,RET,63,female,62,TT,1,6060,2.0,0,User 62
220,3,8,2.0,0,ADOS-G,22.0,2,11,RET,63,female,62,JA,2,3300,3.0,0,User 62
221,3,8,2.0,0,ADOS-G,22.0,2,11,RET,63,female,62,TT,1,15389,4.0,0,User 62
222,3,8,2.0,0,ADOS-G,22.0,2,11,RET,63,female,62,IM,2,7802,5.0,0,User 62
223,4,5,2.0,0,ADOS-G,8.0,1,9,SHT,42,male,65,TT,1,6873,1.0,0,User 65
224,4,5,2.0,0,ADOS-G,8.0,1,9,SHT,42,male,65,TT,1,5320,1.0,0,User 65
225,4,5,2.0,0,ADOS-G,8.0,1,9,SHT,42,male,65,TT,1,7842,2.0,0,User 65
226,4,5,2.0,0,ADOS-G,8.0,1,9,SHT,42,male,65,TT,1,6111,2.0,0,User 65


In [27]:
print(df[df['ados_preTest_module'] == 2]['user'].unique())

['User 62' 'User 65' 'User 66' 'User 73']


In [24]:
print(df[df['participant_ageInMonths'] >= 72]['user'].unique())

['User 66' 'User 69' 'User 72' 'User 75']


In [28]:
df[df['participant_ageInMonths'] >= 72]

Unnamed: 0,ados_preTest_communication,ados_preTest_interaction,ados_preTest_module,ados_preTest_play,ados_preTest_protocol,ados_preTest_socialCommunicationQuestionnaire,ados_preTest_stereotype,ados_preTest_total,condition,participant_ageInMonths,participant_gender,participant_id,task_ability,task_difficultyLevel,task_end,task_index,task_start,user
230,7,10,2.0,0,ADOS-G,25.0,2,17,SHT,72,female,66,TT,1,9768,0.0,0,User 66
231,7,10,2.0,0,ADOS-G,25.0,2,17,SHT,72,female,66,TT,1,3658,1.0,0,User 66
232,7,10,2.0,0,ADOS-G,25.0,2,17,SHT,72,female,66,JA,2,2070,2.0,0,User 66
233,7,10,2.0,0,ADOS-G,25.0,2,17,SHT,72,female,66,TT,1,10298,3.0,0,User 66
234,7,10,2.0,0,ADOS-G,25.0,2,17,SHT,72,female,66,IM,2,2876,4.0,0,User 66
241,7,10,1.0,3,ADOS-G,18.0,4,17,SHT,74,male,69,TT,1,7772,2.0,0,User 69
242,7,10,1.0,3,ADOS-G,18.0,4,17,SHT,74,male,69,TT,1,5543,2.0,0,User 69
243,7,10,1.0,3,ADOS-G,18.0,4,17,SHT,74,male,69,TT,1,7196,4.0,0,User 69
244,7,10,1.0,3,ADOS-G,18.0,4,17,SHT,74,male,69,TT,1,5742,4.0,0,User 69
245,7,10,1.0,3,ADOS-G,18.0,4,17,SHT,74,male,69,IM,1,8471,5.0,0,User 69


In [41]:
df.groupby(['task_difficultyLevel'])['user'].count()

task_difficultyLevel
0     37
1    251
2     56
3      1
Name: user, dtype: int64

In [42]:
df.groupby(['task_ability'])['user'].count()

task_ability
       37
IM     46
JA     54
TT    208
Name: user, dtype: int64

In [9]:
skeleton_keys = list(example_data['skeleton'].keys())
skeleton_keys

['elbow_left',
 'elbow_right',
 'hand_left',
 'hand_right',
 'head',
 'sholder_center',
 'sholder_left',
 'sholder_right',
 'wrist_left',
 'wrist_right']

In [10]:
example_data['skeleton']['elbow_left'].keys()

dict_keys(['confidence', 'x', 'y', 'z'])

In [11]:
len(example_data['skeleton']['elbow_left']['x'])

10279

In [12]:
example_data['eye_gaze'].keys()

dict_keys(['rx', 'ry', 'rz'])

In [13]:
len(example_data['eye_gaze']['rx'])

10279

In [14]:
example_data['head_gaze'].keys()

dict_keys(['rx', 'ry', 'rz'])

In [15]:
len(example_data['head_gaze']['rx'])

10279

In [16]:
utils.extract_joints(example_data).shape

(10, 10279, 4)

In [17]:
utils.extract_gaze(example_data)[0].shape

(10279, 3)

In [18]:
utils.extract_gaze(example_data)[1].shape

(10279, 3)