# Analyzing the collected experiment data

In [1]:
import pandas as pd
import os

## Download data from Google Cloud Storage

In [2]:
!gsutil -m cp -R gs://urm-pr-experiment.appspot.com/experiment_results/* ./data

Copying gs://urm-pr-experiment.appspot.com/experiment_results/music_1_stroop_31762cb9-3f06-46f8-a6aa-0cb7e725cba9_1673029476671.csv...
Copying gs://urm-pr-experiment.appspot.com/experiment_results/music_1_stroop_7f0450b7-99f7-47e9-8895-9011a537396a_1673024034386.csv...
Copying gs://urm-pr-experiment.appspot.com/experiment_results/music_1_stroop_8184eb4a-6f2a-435f-99e0-de643d884b97_1673016760844.csv...
Copying gs://urm-pr-experiment.appspot.com/experiment_results/music_1_stroop_88b5c856-49c7-4113-873d-671c29bff218_1673092484960.csv...
Copying gs://urm-pr-experiment.appspot.com/experiment_results/music_1_stroop_9547b8d4-5448-4b85-b0da-2c8461c00bcb_1673014857531.csv...
Copying gs://urm-pr-experiment.appspot.com/experiment_results/music_1_stroop_a4948d9f-faa2-415f-a376-4c3dea9296ce_1673083909712.csv...
Copying gs://urm-pr-experiment.appspot.com/experiment_results/music_2_digit_span_8184eb4a-6f2a-435f-99e0-de643d884b97_1673017027433.csv...
Copying gs://urm-pr-experiment.appspot.com/experime

In [3]:
data_dir = "data"
valid_tasks = ["1_stroop", "2_digit_span", "3_symmetry_span"]
filenames = [
    f
    for f in os.listdir(data_dir)
    if f.endswith(".csv")
    and any(task in f for task in valid_tasks)
    and (f.startswith("music") or f.startswith("silence"))
]
filenames.sort()
filenames


['music_1_stroop_31762cb9-3f06-46f8-a6aa-0cb7e725cba9_1673029476671.csv',
 'music_1_stroop_7f0450b7-99f7-47e9-8895-9011a537396a_1673024034386.csv',
 'music_1_stroop_8184eb4a-6f2a-435f-99e0-de643d884b97_1673016760844.csv',
 'music_1_stroop_88b5c856-49c7-4113-873d-671c29bff218_1673092484960.csv',
 'music_1_stroop_9547b8d4-5448-4b85-b0da-2c8461c00bcb_1673014857531.csv',
 'music_1_stroop_a4948d9f-faa2-415f-a376-4c3dea9296ce_1673083909712.csv',
 'music_1_stroop_a700e9ac-d53c-4dcd-8ea4-eb6bf58ad454_1673018927021.csv',
 'music_1_stroop_b8143791-ab78-4c4b-bc14-ab3f16549bb5_1672997616638.csv',
 'music_2_digit_span_31762cb9-3f06-46f8-a6aa-0cb7e725cba9_1673029701436.csv',
 'music_2_digit_span_7f0450b7-99f7-47e9-8895-9011a537396a_1673024409957.csv',
 'music_2_digit_span_8184eb4a-6f2a-435f-99e0-de643d884b97_1673017027433.csv',
 'music_2_digit_span_88b5c856-49c7-4113-873d-671c29bff218_1673092725123.csv',
 'music_2_digit_span_9547b8d4-5448-4b85-b0da-2c8461c00bcb_1673015063300.csv',
 'music_2_digit_sp

In [4]:
def get_task(filename):
    for task in valid_tasks:
        if task in filename:
            return task
    raise ValueError(f"Task not found in filename {filename}")

In [5]:
def extract_data_from_filename(filename):
    task = get_task(filename)
    filename_without_task = filename.replace(f'_{task}', '')
    data = filename_without_task.split('.')[0].split('_')
    return {
      'group': data[0],
      'task': '_'.join(task.split('_')[1:]),
      'participant_id': data[1],
      'timestamp': data[2],
      'filepath': f'{data_dir}/{filename}'
    }

data = [extract_data_from_filename(f) for f in filenames]
data[:3]

[{'group': 'music',
  'task': 'stroop',
  'participant_id': '31762cb9-3f06-46f8-a6aa-0cb7e725cba9',
  'timestamp': '1673029476671',
  'filepath': 'data/music_1_stroop_31762cb9-3f06-46f8-a6aa-0cb7e725cba9_1673029476671.csv'},
 {'group': 'music',
  'task': 'stroop',
  'participant_id': '7f0450b7-99f7-47e9-8895-9011a537396a',
  'timestamp': '1673024034386',
  'filepath': 'data/music_1_stroop_7f0450b7-99f7-47e9-8895-9011a537396a_1673024034386.csv'},
 {'group': 'music',
  'task': 'stroop',
  'participant_id': '8184eb4a-6f2a-435f-99e0-de643d884b97',
  'timestamp': '1673016760844',
  'filepath': 'data/music_1_stroop_8184eb4a-6f2a-435f-99e0-de643d884b97_1673016760844.csv'}]

In [6]:
from collections import defaultdict

def group_by_participant_id(data):
    result = defaultdict(list)
    for d in data:
        if d['participant_id'] not in result:
            result[d['participant_id']] = []
        result[d['participant_id']].append(d)
    return result

data_by_participant_id = group_by_participant_id(data)

In [7]:
valid_task_names = ['_'.join(t.split('_')[1:]) for t in valid_tasks]
valid_task_names

['stroop', 'digit_span', 'symmetry_span']

In [8]:
# quick test for function below -> should output three missing tasks
data_by_participant_id['1'] = []

In [9]:
def verify_completeness(data_by_participant_id):
    data_missing = False
    for participant_id, participant_data in data_by_participant_id.items():
        completed_tasks = set(d["task"] for d in participant_data)
        for task in valid_task_names:
            if task not in completed_tasks:
                print(f"Participant {participant_id} did not complete task {task}")
                data_missing = True
    if not data_missing:
        print("All data is complete")


verify_completeness(data_by_participant_id)


Participant 1 did not complete task stroop
Participant 1 did not complete task digit_span
Participant 1 did not complete task symmetry_span


In [10]:
del data_by_participant_id['1']
verify_completeness(data_by_participant_id)

All data is complete


In [11]:
def get_task_result_dfs(data_by_participant_id):
    results_per_task = defaultdict(list)
    for participant_id, participant_data in data_by_participant_id.items():
        for d in participant_data:
            results_df = pd.read_csv(d["filepath"])
            results_df["participant_id"] = participant_id
            results_df["group"] = d["group"]
            results_per_task[d["task"]].append(results_df)
    task_result_dfs = {}
    for task in results_per_task:
        task_result_dfs[task] = pd.concat(results_per_task[task])
    return task_result_dfs


task_result_dfs = get_task_result_dfs(data_by_participant_id)

Let's also quickly verify that the DataFrames have the same number of rows/trials for all participants:

In [74]:
for df in task_result_dfs.values():
    print(df.groupby("participant_id").size().value_counts()) # if this returns only a single value, everything is ok

80    16
dtype: int64
17    16
dtype: int64
66    16
dtype: int64


Everything fine!

## Analyze Stroop task results

In [12]:
stroop_results = task_result_dfs["stroop"].copy()
stroop_results.head()

Unnamed: 0,rt,stimulus,key_press,Congruency,letter,type,trial_type,trial_index,time_elapsed,internal_node_id,correct,participant_id,group
0,1146.0,"<table border=""0"" width=""600""><tr height=""300""...",78,Incon,n,test trial,html-keyboard-response,53,81007,0.0-6.0-1.0,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
1,631.0,"<table border=""0"" width=""600""><tr height=""300""...",78,Incon,n,test trial,html-keyboard-response,55,82652,0.0-6.0-1.1,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
2,661.0,"<table border=""0"" width=""600""><tr height=""300""...",78,Con,n,test trial,html-keyboard-response,57,83821,0.0-6.0-1.2,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
3,783.0,"<table border=""0"" width=""600""><tr height=""300""...",66,Incon,b,test trial,html-keyboard-response,59,85117,0.0-6.0-1.3,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
4,725.0,"<table border=""0"" width=""600""><tr height=""300""...",66,Incon,b,test trial,html-keyboard-response,61,86605,0.0-6.0-1.4,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music


In [13]:
stroop_results.type.value_counts()

test trial    1280
Name: type, dtype: int64

In [14]:
stroop_results.trial_type.value_counts()

html-keyboard-response    1280
Name: trial_type, dtype: int64

It's safe to remove the type and trial_type information. The internal_node_id column is also irrelevant.

In [15]:
stroop_results = stroop_results.drop(columns=["type", "trial_type", "internal_node_id"])


### Making sense of stimulus and letter columns

In [16]:
def extract_rgb(stimulus):
    return [
        int(color_str)
        for color_str in stimulus.split("rgb(")[1].split(")")[0].split(",")
    ]

stroop_results.stimulus.apply(extract_rgb).value_counts()

[0, 255, 0]      320
[255, 255, 0]    320
[0, 0, 255]      320
[255, 0, 0]      320
Name: stimulus, dtype: int64

In [17]:
def get_color(rgb_vals):
    if rgb_vals[0] == 255 and rgb_vals[1] == 0 and rgb_vals[2] == 0:
        return "red"
    elif rgb_vals[0] == 255 and rgb_vals[1] == 255 and rgb_vals[2] == 0:
        return "yellow"
    elif rgb_vals[0] == 0 and rgb_vals[1] == 255 and rgb_vals[2] == 0:
        return "green"
    elif rgb_vals[0] == 0 and rgb_vals[1] == 0 and rgb_vals[2] == 255:
        return "blue"
    else:
        return "unknown"

stroop_results.stimulus.apply(extract_rgb).apply(get_color).value_counts()

green     320
yellow    320
blue      320
red       320
Name: stimulus, dtype: int64

In [18]:
def letter_to_color(key):
    if key == "v":
        return "red"
    elif key == "b":
        return "yellow"
    elif key == "n":
        return "green"
    elif key == "m":
        return "blue"
    else:
        return "unknown"
        
stroop_results.letter.apply(letter_to_color).value_counts()

green     320
yellow    320
blue      320
red       320
Name: letter, dtype: int64

The key column contains the JavaScript keycodes that were pressed by the users (for a demo check [this](https://www.toptal.com/developers/keycode]) out).

In [19]:
def key_to_color(key):
    if key == 86:
        return "red"
    elif key == 66:
        return "yellow"
    elif key == 78:
        return "green"
    elif key == 77:
        return "blue"
    else:
        return "unknown"

stroop_results.key_press.apply(key_to_color).value_counts()

yellow    322
red       321
blue      319
green     318
Name: key_press, dtype: int64

In [21]:
def extract_true_and_response(row):
    rgb_vals = extract_rgb(row.stimulus)
    true_color = get_color(rgb_vals)
    key = row.key_press
    response = key_to_color(key)
    return pd.Series([true_color, response])


stroop_results[["true_color", "response"]] = stroop_results.apply(
    extract_true_and_response, axis=1, result_type="expand"
).rename(columns={0: "true_color", 1: "response"})
stroop_results.drop(columns=["stimulus", "letter"], inplace=True)
stroop_results

Unnamed: 0,rt,key_press,Congruency,trial_index,time_elapsed,correct,participant_id,group,true_color,response
0,1146.0,78,Incon,53,81007,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music,green,green
1,631.0,78,Incon,55,82652,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music,green,green
2,661.0,78,Con,57,83821,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music,green,green
3,783.0,66,Incon,59,85117,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music,yellow,yellow
4,725.0,66,Incon,61,86605,True,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music,yellow,yellow
...,...,...,...,...,...,...,...,...,...,...
75,938.0,66,Con,203,249473,True,f7cee892-7665-4c02-8b0e-005e0c255378,silence,yellow,yellow
76,651.0,78,Incon,205,250379,True,f7cee892-7665-4c02-8b0e-005e0c255378,silence,green,green
77,549.0,77,Con,207,252434,True,f7cee892-7665-4c02-8b0e-005e0c255378,silence,blue,blue
78,525.0,86,Con,209,254715,True,f7cee892-7665-4c02-8b0e-005e0c255378,silence,red,red


In [22]:
stroop_results.loc[stroop_results.true_color != stroop_results.response, "correct"].sum()

0

In [23]:
stroop_results.loc[stroop_results.true_color == stroop_results.response, "correct"].sum()

1245

In [24]:
stroop_results.correct.sum()

1245

In [25]:
stroop_results.groupby(["group", "participant_id"])[["rt", "correct"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,rt,correct
group,participant_id,Unnamed: 2_level_1,Unnamed: 3_level_1
music,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,829.75,0.9875
music,7f0450b7-99f7-47e9-8895-9011a537396a,1414.62625,0.95
music,8184eb4a-6f2a-435f-99e0-de643d884b97,687.65,0.975
music,88b5c856-49c7-4113-873d-671c29bff218,873.21375,0.8875
music,9547b8d4-5448-4b85-b0da-2c8461c00bcb,716.91,1.0
music,a4948d9f-faa2-415f-a376-4c3dea9296ce,688.3775,0.9625
music,a700e9ac-d53c-4dcd-8ea4-eb6bf58ad454,898.625,0.9875
music,b8143791-ab78-4c4b-bc14-ab3f16549bb5,792.51,0.9625
silence,21f08148-6cc6-4223-8364-24224a1f1945,1381.95375,1.0
silence,3cceca25-8b8b-49ad-b24d-03ed68601439,800.75,0.95


In [26]:
stroop_results.groupby(["group"])[["rt", "correct"]].describe()

Unnamed: 0_level_0,rt,rt,rt,rt,rt,rt,rt,rt
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
music,640.0,862.707812,337.317003,347.7,658.425,772.75,1007.0,3659.5
silence,640.0,890.924688,422.263237,309.0,636.925,766.85,996.275,4018.0


## Analyzing digit span task results

In [47]:
digit_span_results = task_result_dfs["digit_span"].copy()
digit_span_results

Unnamed: 0,rt,recall,stimuli,accuracy,trial_type,trial_index,time_elapsed,internal_node_id,participant_id,group
0,4606,073,073,1,digit-span-recall,5,21230,0.0-2.0-1.0,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
1,2173,3290,3290,1,digit-span-recall,11,28462,0.0-2.0-1.1,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
2,3313,20983,20938,0,digit-span-recall,18,37854,0.0-2.0-1.2,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
3,3887,230,230,1,digit-span-recall,24,53637,0.0-4.0-1.0,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
4,3200,3497,3497,1,digit-span-recall,30,61917,0.0-4.0-1.1,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
...,...,...,...,...,...,...,...,...,...,...
12,11484,6528903,6528903,1,digit-span-recall,103,232548,0.0-4.0-1.9,f7cee892-7665-4c02-8b0e-005e0c255378,silence
13,16100,6539482,59326480,0,digit-span-recall,113,257675,0.0-4.0-1.10,f7cee892-7665-4c02-8b0e-005e0c255378,silence
14,9110,07469581,07469518,0,digit-span-recall,123,275813,0.0-4.0-1.11,f7cee892-7665-4c02-8b0e-005e0c255378,silence
15,6025,5864319,5864319,1,digit-span-recall,132,289864,0.0-4.0-1.12,f7cee892-7665-4c02-8b0e-005e0c255378,silence


In [48]:
digit_span_results.accuracy.value_counts()

1    161
0    111
Name: accuracy, dtype: int64

In [49]:
digit_span_results["correct"] = digit_span_results.accuracy == 1
digit_span_results.drop(columns=["accuracy", "internal_node_id"], inplace=True)

In [52]:
digit_span_results.correct.sum() / len(digit_span_results)

0.5919117647058824

In [53]:
digit_span_results.groupby(["group", "participant_id"])[["rt", "correct"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,rt,correct
group,participant_id,Unnamed: 2_level_1,Unnamed: 3_level_1
music,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,4292.0,0.647059
music,7f0450b7-99f7-47e9-8895-9011a537396a,12258.058824,0.529412
music,8184eb4a-6f2a-435f-99e0-de643d884b97,5197.764706,0.647059
music,88b5c856-49c7-4113-873d-671c29bff218,5627.117647,0.470588
music,9547b8d4-5448-4b85-b0da-2c8461c00bcb,4353.529412,0.647059
music,a4948d9f-faa2-415f-a376-4c3dea9296ce,8219.588235,0.705882
music,a700e9ac-d53c-4dcd-8ea4-eb6bf58ad454,7235.058824,0.529412
music,b8143791-ab78-4c4b-bc14-ab3f16549bb5,5286.294118,0.647059
silence,21f08148-6cc6-4223-8364-24224a1f1945,7106.352941,0.647059
silence,3cceca25-8b8b-49ad-b24d-03ed68601439,9103.941176,0.588235


In [55]:
digit_span_results.groupby(["group"]).rt.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
music,136.0,6558.676471,7868.300954,696.0,3891.5,5511.5,7695.25,91447.0
silence,136.0,6965.955882,3013.944259,2094.0,4598.25,6361.0,8664.75,16100.0


In [57]:
digit_span_results.groupby(["group"]).correct.sum() / digit_span_results.groupby(["group"]).correct.count()

group
music      0.602941
silence    0.580882
Name: correct, dtype: float64

### Analyze results by sequence length

TODO

## Analyzing symmetry span results

In [154]:
symmetry_span_results = task_result_dfs["symmetry_span"].copy()
symmetry_span_results.set_index("participant_id")

Unnamed: 0_level_0,rt,recall,stimuli,accuracy,set_size,trial_type,trial_index,time_elapsed,internal_node_id,group
participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
31762cb9-3f06-46f8-a6aa-0cb7e725cba9,4778.0,1036,220312,3.0,3.0,spatial-span-recall,8,35011,0.0-2.0-1.0,music
31762cb9-3f06-46f8-a6aa-0cb7e725cba9,2157.0,07145,00133211,4.0,4.0,spatial-span-recall,18,42787,0.0-2.0-1.1,music
31762cb9-3f06-46f8-a6aa-0cb7e725cba9,2197.0,30811,03002023,4.0,4.0,spatial-span-recall,28,50211,0.0-2.0-1.2,music
31762cb9-3f06-46f8-a6aa-0cb7e725cba9,2526.0,,,1.0,,symmetry-judgement-task,31,66667,0.0-4.0-0.0,music
31762cb9-3f06-46f8-a6aa-0cb7e725cba9,949.0,,,1.0,,symmetry-judgement-task,33,68931,0.0-4.0-0.1,music
...,...,...,...,...,...,...,...,...,...,...
f7cee892-7665-4c02-8b0e-005e0c255378,1221.0,,,1.0,,symmetry-judgement-task,192,350272,0.0-8.0-0.8-1.32,silence
f7cee892-7665-4c02-8b0e-005e0c255378,807.0,,,1.0,,symmetry-judgement-task,195,352248,0.0-8.0-0.8-1.33,silence
f7cee892-7665-4c02-8b0e-005e0c255378,806.0,,,1.0,,symmetry-judgement-task,198,354214,0.0-8.0-0.8-1.34,silence
f7cee892-7665-4c02-8b0e-005e0c255378,1073.0,,,1.0,,symmetry-judgement-task,201,356441,0.0-8.0-0.8-1.35,silence


### Identifying and removing demo trial results

Unfortunately, there was a mistake in the symmetry span task setup: the CSV file doesn't clearly indicate what trials are actually only demo trials and which ones are part of the "real task" (after the demo). So, we need to compare the results for one arbitrary participant with the results for a local demo run we did without the `test_procedure` part of the symmetry span task. 

In [155]:
pd.set_option('display.max_rows', 500)
symmetry_span_results[symmetry_span_results.participant_id == "31762cb9-3f06-46f8-a6aa-0cb7e725cba9"]

Unnamed: 0,rt,recall,stimuli,accuracy,set_size,trial_type,trial_index,time_elapsed,internal_node_id,participant_id,group
0,4778.0,1036.0,220312.0,3.0,3.0,spatial-span-recall,8,35011,0.0-2.0-1.0,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
1,2157.0,7145.0,133211.0,4.0,4.0,spatial-span-recall,18,42787,0.0-2.0-1.1,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
2,2197.0,30811.0,3002023.0,4.0,4.0,spatial-span-recall,28,50211,0.0-2.0-1.2,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
3,2526.0,,,1.0,,symmetry-judgement-task,31,66667,0.0-4.0-0.0,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
4,949.0,,,1.0,,symmetry-judgement-task,33,68931,0.0-4.0-0.1,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
5,769.0,,,1.0,,symmetry-judgement-task,35,71051,0.0-4.0-0.2,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
6,1050.0,,,1.0,,symmetry-judgement-task,37,73419,0.0-4.0-0.3,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
7,1082.0,,,1.0,,symmetry-judgement-task,39,75827,0.0-4.0-0.4,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
8,2360.0,,,1.0,,symmetry-judgement-task,43,85507,0.0-6.0-0.0-1.0,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
9,1456.0,,,1.0,,symmetry-judgement-task,46,88395,0.0-6.0-0.0-1.1,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music


In [156]:
pd.reset_option('display.max_rows')

In [157]:
demo_run = pd.read_csv("symmetry_span_before_test_procedure.csv")
demo_run

Unnamed: 0,rt,recall,stimuli,accuracy,set_size,trial_type,trial_index,time_elapsed,internal_node_id
0,2722,760.0,131200.0,3,3.0,spatial-span-recall,8,13685,0.0-2.0-1.0
1,2495,12012.0,1020030.0,4,4.0,spatial-span-recall,18,21048,0.0-2.0-1.1
2,3119,3141213.0,3323031.0,4,4.0,spatial-span-recall,28,29162,0.0-2.0-1.2
3,1333,,,1,,symmetry-judgement-task,31,32508,0.0-4.0-0.0
4,864,,,1,,symmetry-judgement-task,33,34511,0.0-4.0-0.1
5,667,,,1,,symmetry-judgement-task,35,36340,0.0-4.0-0.2
6,670,,,0,,symmetry-judgement-task,37,38168,0.0-4.0-0.3
7,775,,,1,,symmetry-judgement-task,39,40086,0.0-4.0-0.4
8,2081,,,1,,symmetry-judgement-task,43,47437,0.0-6.0-0.0-1.0
9,836,,,1,,symmetry-judgement-task,46,49445,0.0-6.0-0.0-1.1


We see that the last "demo trial index" is 76, so we should remove all the entries in the results where the value of `trial_index` is <= 76. Again, the `internal_node_id` column is not relevant.

In [158]:
symmetry_span_results = symmetry_span_results[symmetry_span_results.trial_index > 76].reset_index(drop=True).drop(columns=["internal_node_id"])
symmetry_span_results

Unnamed: 0,rt,recall,stimuli,accuracy,set_size,trial_type,trial_index,time_elapsed,participant_id,group
0,1578.0,,,1.0,,symmetry-judgement-task,80,138347,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
1,1603.0,,,1.0,,symmetry-judgement-task,83,141267,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
2,874.0,,,1.0,,symmetry-judgement-task,86,143515,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
3,3108.0,0102,002202,3.0,3.0,spatial-span-recall,88,146634,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
4,1308.0,,,1.0,,symmetry-judgement-task,91,150108,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
...,...,...,...,...,...,...,...,...,...,...
715,1221.0,,,1.0,,symmetry-judgement-task,192,350272,f7cee892-7665-4c02-8b0e-005e0c255378,silence
716,807.0,,,1.0,,symmetry-judgement-task,195,352248,f7cee892-7665-4c02-8b0e-005e0c255378,silence
717,806.0,,,1.0,,symmetry-judgement-task,198,354214,f7cee892-7665-4c02-8b0e-005e0c255378,silence
718,1073.0,,,1.0,,symmetry-judgement-task,201,356441,f7cee892-7665-4c02-8b0e-005e0c255378,silence


We notice that the `recall` column and the `stimuli` columns don't match in terms of format: the stimuli were always presented in a 4 x 4 grid. The `recall` column contains the grid number the participant selected if we counted them from left to right, top to bottom, while the `stimuli` column contains pairs of `(row_index, column_index)` (both zero-based).

Also, it looks like the `stimuli` column only contains values for the `spatial-span-recall` task type.

In [164]:
symmetry_span_results.stimuli.notna().sum()

144

In [165]:
symmetry_span_results[symmetry_span_results.trial_type == "spatial-span-recall"].shape[0]

144

In [166]:
def reformat_recall_and_stimuli(row):
  recall = row.recall
  if type(recall) != str:
    # recall is NaN
    return float('nan'), float('nan')

  recall = [int(i) for i in recall.split(",")]
  stimuli = row.stimuli
  stimuli = stimuli.split(",")
  row_idxs = [int(idx) for idx in stimuli[::2]]
  col_idxs = [int(idx) for idx in stimuli[1::2]]
  stimuli = [4*row_idx + col_idx for row_idx, col_idx in zip(row_idxs, col_idxs)]
  return stimuli, recall

symmetry_span_results[["stimuli", "recall"]] = symmetry_span_results.apply(reformat_recall_and_stimuli, axis=1, result_type="expand")
symmetry_span_results

Unnamed: 0,rt,recall,stimuli,accuracy,set_size,trial_type,trial_index,time_elapsed,participant_id,group
0,1578.0,,,1.0,,symmetry-judgement-task,80,138347,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
1,1603.0,,,1.0,,symmetry-judgement-task,83,141267,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
2,874.0,,,1.0,,symmetry-judgement-task,86,143515,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
3,3108.0,"[0, 10, 2]","[0, 10, 2]",3.0,3.0,spatial-span-recall,88,146634,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
4,1308.0,,,1.0,,symmetry-judgement-task,91,150108,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
...,...,...,...,...,...,...,...,...,...,...
715,1221.0,,,1.0,,symmetry-judgement-task,192,350272,f7cee892-7665-4c02-8b0e-005e0c255378,silence
716,807.0,,,1.0,,symmetry-judgement-task,195,352248,f7cee892-7665-4c02-8b0e-005e0c255378,silence
717,806.0,,,1.0,,symmetry-judgement-task,198,354214,f7cee892-7665-4c02-8b0e-005e0c255378,silence
718,1073.0,,,1.0,,symmetry-judgement-task,201,356441,f7cee892-7665-4c02-8b0e-005e0c255378,silence


The DataFrame contents don't reflect the fact that the symmetry judgement tasks were shown after each cell of the spatial span recall sequence. So, the symmetry judgement tasks that follow each recall task were actually "intertwined" with it. Therefore, we can fill the NaN values in `set_size` for all symmetry-judgement-tasks with the value that follows it. This can be achieved by using `fillna(method="backfill")` on the column.

In [173]:
symmetry_span_results.set_size = symmetry_span_results.set_size.fillna(method="backfill")
symmetry_span_results

Unnamed: 0,rt,recall,stimuli,accuracy,set_size,trial_type,trial_index,time_elapsed,participant_id,group
0,1578.0,,,1.0,3.0,symmetry-judgement-task,80,138347,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
1,1603.0,,,1.0,3.0,symmetry-judgement-task,83,141267,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
2,874.0,,,1.0,3.0,symmetry-judgement-task,86,143515,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
3,3108.0,"[0, 10, 2]","[0, 10, 2]",3.0,3.0,spatial-span-recall,88,146634,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
4,1308.0,,,1.0,5.0,symmetry-judgement-task,91,150108,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
...,...,...,...,...,...,...,...,...,...,...
715,1221.0,,,1.0,4.0,symmetry-judgement-task,192,350272,f7cee892-7665-4c02-8b0e-005e0c255378,silence
716,807.0,,,1.0,4.0,symmetry-judgement-task,195,352248,f7cee892-7665-4c02-8b0e-005e0c255378,silence
717,806.0,,,1.0,4.0,symmetry-judgement-task,198,354214,f7cee892-7665-4c02-8b0e-005e0c255378,silence
718,1073.0,,,1.0,4.0,symmetry-judgement-task,201,356441,f7cee892-7665-4c02-8b0e-005e0c255378,silence


It probably makes sense to split the data for the two different types of trials into separate DataFrames.

In [175]:
symmetry_judgments = (
    symmetry_span_results[symmetry_span_results.trial_type == "symmetry-judgement-task"]
    .drop(columns=["trial_type", "stimuli", "recall"])
    .rename(columns={"accuracy": "correct"})
)
symmetry_judgments


Unnamed: 0,rt,correct,set_size,trial_index,time_elapsed,participant_id,group
0,1578.0,1.0,3.0,80,138347,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
1,1603.0,1.0,3.0,83,141267,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
2,874.0,1.0,3.0,86,143515,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
4,1308.0,1.0,5.0,91,150108,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
5,805.0,1.0,5.0,94,152234,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
...,...,...,...,...,...,...,...
713,1428.0,1.0,5.0,187,340074,f7cee892-7665-4c02-8b0e-005e0c255378,silence
715,1221.0,1.0,4.0,192,350272,f7cee892-7665-4c02-8b0e-005e0c255378,silence
716,807.0,1.0,4.0,195,352248,f7cee892-7665-4c02-8b0e-005e0c255378,silence
717,806.0,1.0,4.0,198,354214,f7cee892-7665-4c02-8b0e-005e0c255378,silence


In [191]:
symmetry_judgments[symmetry_judgments.rt.isna()]

Unnamed: 0,rt,correct,set_size,trial_index,time_elapsed,participant_id,group
565,,,3.0,150,349059,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence


One row's Response Time is NaN, but why?

In [190]:
symmetry_span_results.loc[symmetry_span_results.participant_id == "97b70c25-77c5-47b7-8a2c-eb59bd2842cc"]

Unnamed: 0,rt,recall,stimuli,accuracy,set_size,trial_type,trial_index,time_elapsed,participant_id,group
540,3833.0,,,1.0,5.0,symmetry-judgement-task,80,218635,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
541,1432.0,,,1.0,5.0,symmetry-judgement-task,83,221166,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
542,1444.0,,,1.0,5.0,symmetry-judgement-task,86,223711,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
543,2003.0,,,1.0,5.0,symmetry-judgement-task,89,226815,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
544,3287.0,,,1.0,5.0,symmetry-judgement-task,92,231204,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
545,8106.0,"[15, 0, 6, 8]","[14, 0, 12, 8, 1]",2.0,5.0,spatial-span-recall,94,239315,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
546,1776.0,,,1.0,4.0,symmetry-judgement-task,97,244191,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
547,2257.0,,,1.0,4.0,symmetry-judgement-task,100,247549,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
548,1279.0,,,1.0,4.0,symmetry-judgement-task,103,249928,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence
549,2293.0,,,1.0,4.0,symmetry-judgement-task,106,253325,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence


In [186]:
symmetry_judgments.loc[symmetry_judgments.rt.isna()]

Unnamed: 0,rt,correct,set_size,trial_index,time_elapsed,participant_id,group
565,,,3.0,150,349059,97b70c25-77c5-47b7-8a2c-eb59bd2842cc,silence


In [180]:
symmetry_judgments.correct.sum() / len(symmetry_judgments)

0.9826388888888888

In [181]:
symmetry_judgments.groupby(["group", "participant_id"])[["rt", "correct"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,rt,correct
group,participant_id,Unnamed: 2_level_1,Unnamed: 3_level_1
music,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,1133.777778,1.0
music,7f0450b7-99f7-47e9-8895-9011a537396a,2453.277778,0.916667
music,8184eb4a-6f2a-435f-99e0-de643d884b97,1093.472222,1.0
music,88b5c856-49c7-4113-873d-671c29bff218,1717.722222,1.0
music,9547b8d4-5448-4b85-b0da-2c8461c00bcb,1545.027778,1.0
music,a4948d9f-faa2-415f-a376-4c3dea9296ce,1196.361111,1.0
music,a700e9ac-d53c-4dcd-8ea4-eb6bf58ad454,2108.083333,1.0
music,b8143791-ab78-4c4b-bc14-ab3f16549bb5,1873.555556,0.972222
silence,21f08148-6cc6-4223-8364-24224a1f1945,3618.861111,0.944444
silence,3cceca25-8b8b-49ad-b24d-03ed68601439,1991.111111,0.944444


In [182]:
symmetry_judgments.groupby(["group"])[["rt", "correct"]].mean()

Unnamed: 0_level_0,rt,correct
group,Unnamed: 1_level_1,Unnamed: 2_level_1
music,1640.159722,0.986111
silence,1840.344948,0.982578


In [184]:
symmetry_judgments.groupby(["group"])[["rt"]].describe()

Unnamed: 0_level_0,rt,rt,rt,rt,rt,rt,rt,rt
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
music,288.0,1640.159722,725.796044,738.0,1081.75,1447.0,1956.25,4615.0
silence,287.0,1840.344948,968.369672,720.0,1248.0,1510.0,2124.5,5742.0


In [179]:
spatial_recall_results = (
    symmetry_span_results[symmetry_span_results.trial_type == "spatial-span-recall"]
    .drop(columns=["trial_type"])
    .rename(columns={"accuracy": "correct_cells"})
)
spatial_recall_results.insert(4, "all_correct", spatial_recall_results.correct_cells == spatial_recall_results.set_size)
spatial_recall_results


Unnamed: 0,rt,recall,stimuli,correct_cells,all_correct,set_size,trial_index,time_elapsed,participant_id,group
3,3108.0,"[0, 10, 2]","[0, 10, 2]",3.0,True,3.0,88,146634,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
9,2620.0,"[13, 4, 9, 5, 7]","[13, 4, 9, 5, 7]",5.0,True,5.0,105,162138,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
14,3180.0,"[1, 12, 15, 10]","[2, 12, 15, 10]",3.0,False,4.0,119,176482,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
20,3963.0,"[13, 8, 3, 12]","[14, 4, 6, 3, 12]",0.0,False,5.0,136,193866,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
26,2202.0,"[1, 0, 8, 5, 4]","[1, 0, 8, 5, 4]",5.0,True,5.0,153,209922,31762cb9-3f06-46f8-a6aa-0cb7e725cba9,music
...,...,...,...,...,...,...,...,...,...,...
696,3172.0,"[9, 10, 14]","[9, 10, 14]",3.0,True,3.0,138,277884,f7cee892-7665-4c02-8b0e-005e0c255378,silence
702,6803.0,"[6, 2, 1, 9, 3]","[6, 2, 1, 13, 3]",4.0,False,5.0,155,298821,f7cee892-7665-4c02-8b0e-005e0c255378,silence
708,12374.0,"[11, 9, 6, 14, 7]","[11, 9, 6, 14, 7]",5.0,True,5.0,172,326710,f7cee892-7665-4c02-8b0e-005e0c255378,silence
714,6130.0,"[2, 1, 3, 12, 11]","[2, 1, 3, 12, 11]",5.0,True,5.0,189,346219,f7cee892-7665-4c02-8b0e-005e0c255378,silence
