In [1]:
import pandas as pd # For data manipulation
import numpy as np # For numerical operations
import glob # to read all the files at once
import os # Get the current working directory
import dill # For saving and loading Python objects
import matplotlib.pyplot as plt # For plotting
import seaborn as sns
import scipy.stats as stats # For statistical tests
# from sklearn.linear_model import LinearRegression # For linear regression
import statsmodels.formula.api as smf # For statistical models
from scipy.optimize import minimize # For optimization

# Load Data

In [7]:
# Read Data

# Path to your Data folder
data_folder = 'Data'

# Get all JSON files in the folder
files = glob.glob(os.path.join(data_folder, '*.json'))
# Keep only files whose filename (without path) splits into 3 parts on "_"
#files = [f for f in files if len(os.path.basename(f).split('_')) == 3]

# Read and concatenate all files
data_frames = [pd.read_json(file) for file in files]
data_set = pd.concat(data_frames, ignore_index=True)
data_set

Unnamed: 0,width,height,webaudio,browser,browser_version,mobile,os,fullscreen,vsync_rate,webcam,...,broad_category,image_png,image_id,category_name,category,distinct_rating,distinct_type,correct_response,item_type,cor_ans
0,562.0,835.0,1.0,chrome,142.0.0,0.0,Mac OS,1.0,60.34,1.0,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,,,,,,,,,,,...,,,,,,,,,,
610,,,,,,,,,,,...,M,M_Quartzite_13.png,285.0,Quartzite,18.0,2.27,Bottom,a,Foil,0.0
611,,,,,,,,,,,...,,,,,,,,,,
612,,,,,,,,,,,...,I,I_Gabbro_02.png,50.0,Gabbro,4.0,2.73,Bottom,l,Target,0.0


# Filter Data

In [8]:
# Memory phase data
filtered_data_memory = data_set.loc[
    data_set['task'] == 'memory_phase',
    ['trial_index','subject_id', 'group','task','item_type',
     'category_name','category','broad_category',
     'image_png','image_id','distinct_rating']
    ].copy()
filtered_data_memory = filtered_data_memory.astype({
    "subject_id": "category", "broad_category": "category",
    "image_id": "int", "category_name": "category",
    "category": "int"
    })
filtered_data_memory.head(10)

Unnamed: 0,trial_index,subject_id,group,task,item_type,category_name,category,broad_category,image_png,image_id,distinct_rating
6,6,tgoptzp7,Group2,memory_phase,,Gabbro,4,I,I_Gabbro_14.png,62,2.95
8,8,tgoptzp7,Group2,memory_phase,,Diorite,3,I,I_Diorite_01.png,33,2.97
10,10,tgoptzp7,Group2,memory_phase,,Pumice,9,I,I_Pumice_01.png,129,3.25
12,12,tgoptzp7,Group2,memory_phase,,Peridotite,8,I,I_Peridotite_13.png,125,7.28
14,14,tgoptzp7,Group2,memory_phase,,Shale,30,S,S_Shale_03.png,467,1.95
16,16,tgoptzp7,Group2,memory_phase,,Dolomite,25,S,S_Dolomite_16.png,400,1.71
18,18,tgoptzp7,Group2,memory_phase,,Gneiss,13,M,M_Gneiss_14.png,206,3.41
20,20,tgoptzp7,Group2,memory_phase,,Diorite,3,I,I_Diorite_10.png,42,2.84
22,22,tgoptzp7,Group2,memory_phase,,Gneiss,13,M,M_Gneiss_10.png,202,3.09
24,24,tgoptzp7,Group2,memory_phase,,Basalt,2,I,I_Basalt_13.png,29,2.68


In [9]:
# Test phase data
filtered_data_test = data_set.loc[
    data_set['task'] == 'test_phase',
    ['trial_index','subject_id', 'group','task','item_type',
     'response','correct_response', 'cor_ans','rt', 
     'category_name', 'category','broad_category',
     'image_png', 'image_id', 'distinct_rating']
    ].copy()
filtered_data_test = filtered_data_test.astype({
    "subject_id": "category",
    "group": "category",
    'item_type': "category",
    "broad_category": "category",
    "image_id": "int",
    "category_name": "category",
    "category": "int",
    "cor_ans": "bool"
    })

filtered_data_test.head(10)

Unnamed: 0,trial_index,subject_id,group,task,item_type,response,correct_response,cor_ans,rt,category_name,category,broad_category,image_png,image_id,distinct_rating
107,107,tgoptzp7,Group2,test_phase,Foil,l,a,False,3018.0,Shale,30,S,S_Shale_08.png,472,2.55
109,109,tgoptzp7,Group2,test_phase,Target,a,l,False,846.0,Pumice,9,I,I_Pumice_14.png,142,3.22
111,111,tgoptzp7,Group2,test_phase,Foil,l,a,False,356.0,Diorite,3,I,I_Diorite_04.png,36,2.98
113,113,tgoptzp7,Group2,test_phase,Foil,a,a,True,323.0,Basalt,2,I,I_Basalt_02.png,18,1.84
115,115,tgoptzp7,Group2,test_phase,Target,l,l,True,337.0,Marble,15,M,M_Marble_02.png,226,3.2
117,117,tgoptzp7,Group2,test_phase,Target,a,l,False,346.0,Gneiss,13,M,M_Gneiss_15.png,207,3.11
119,119,tgoptzp7,Group2,test_phase,Target,a,l,False,367.0,Dolomite,25,S,S_Dolomite_07.png,391,1.73
121,121,tgoptzp7,Group2,test_phase,Target,l,l,True,322.0,Gneiss,13,M,M_Gneiss_14.png,206,3.41
123,123,tgoptzp7,Group2,test_phase,Foil,l,a,False,281.0,Marble,15,M,M_Marble_06.png,230,3.4
125,125,tgoptzp7,Group2,test_phase,Target,l,l,True,306.0,Gneiss,13,M,M_Gneiss_10.png,202,3.09
