In [1]:
%pip install davos
import davos

davos.config.suppress_stdout = True

Note: you may need to restart the kernel to use updated packages.


In [2]:
smuggle numpy as np               # pip: numpy==1.24.2
smuggle pandas as pd              # pip: pandas==1.5.3
from tqdm smuggle tqdm            # pip: tqdm==4.64.1

smuggle requests                  # pip: requests==2.28.2
smuggle future                    # pip: future==0.18.3

from glob import glob as lsdir
import os
import pickle

from helpers import download_data, datadir

Download the dataset and load in pre- and post-experimental questionnaire responses

In [3]:
download_data()

preexperimental_fnames = lsdir(os.path.join(datadir, '*', '*', 'pre_questionnaire_*.pkl'))
postexperimental_fnames = lsdir(os.path.join(datadir, '*', '*', 'post_questionnaire_*.pkl'))

preexperimental_questions = ['Age', 'Gender', 
                             'Hispanic or Latino', 
                             'White', 'Black or African American', 'Native Hawaiian or other Pacific Islander', 'Asian', 'American Indian or Alaska Native', 'Other race', 'Decline to specify race',
                             'Highest degree',
                             'Reading impairments',
                             'Normal color vision',
                             'Medications or injuries',
                             'Meds or injuries details',
                             'Hours of sleep',
                             'Cups of coffee',
                             'Alertness']

postexperimental_questions = ['Engagement', 'Tiredness', 'Easier to remember (category)', 'Easier to remember (side)', 'Strategies']

In [4]:
def load_pre(fname):
    with open(fname, 'rb') as f:
        pre = pickle.load(f)
    
    return pd.Series({k: v for k, v in zip(preexperimental_questions, pre)}).to_frame().T

def load_post(fname):
    with open(fname, 'rb') as f:
        post = pickle.load(f)
    
    return pd.Series({k: v for k, v in zip(postexperimental_questions, post)}).to_frame().T

In [5]:
pre = pd.concat([load_pre(fname) for fname in preexperimental_fnames])
post = pd.concat([load_post(fname) for fname in postexperimental_fnames])

In [6]:
pre.head()

Unnamed: 0,Age,Gender,Hispanic or Latino,White,Black or African American,Native Hawaiian or other Pacific Islander,Asian,American Indian or Alaska Native,Other race,Decline to specify race,Highest degree,Reading impairments,Normal color vision,Medications or injuries,Meds or injuries details,Hours of sleep,Cups of coffee,Alertness
0,21,Female,No,False,False,False,True,False,False,False,some college,No,Yes,No,,5.0,0,A little slugglish
0,18,Female,No,True,False,False,False,False,False,False,some college,No,Yes,No,,8.5,0,A little slugglish
0,21,Male,Yes,True,False,False,False,False,False,False,some college,No,No,No,,6.0,2,Very alert
0,19,Female,No,True,False,False,False,False,False,False,some college,No,Yes,No,,9.0,0,A little alert
0,19,Female,No,False,False,False,True,False,False,False,some college,No,Yes,No,,5.0,0,Neutral


In [7]:
post.head()

Unnamed: 0,Engagement,Tiredness,Easier to remember (category),Easier to remember (side),Strategies
0,Neutral,Neutral,,,
0,Neutral,Very tired,the scene because it had more distinct charact...,I don't think so,I would try to pick out one distinct thing I s...
0,Neutral,A little tired,landscapes werre much easier to recall. I thin...,no,tried whispering to myaelf a key feature
0,A little boring,Neutral,"The scenes, because the faces tended to blur t...",Maybe L was a little easier. But I don't think...,I would repeat characteristics about the scene...
0,--,--,,,


# Age

In [8]:
pre[['Age']].astype('float').describe()

Unnamed: 0,Age
count,53.0
mean,18.735849
std,0.812189
min,18.0
25%,18.0
50%,19.0
75%,19.0
max,21.0


# Gender

In [9]:
pd.DataFrame(pre[['Gender']].value_counts())

Unnamed: 0_level_0,0
Gender,Unnamed: 1_level_1
Female,34
Male,18
--,1


# Ethnicity and race

In [10]:
ethnicity_cols = ['Hispanic or Latino']
race_cols = ['White', 'Black or African American', 'Native Hawaiian or other Pacific Islander', 'Asian', 'American Indian or Alaska Native', 'Other race', 'Decline to specify race']

pd.DataFrame(pre[ethnicity_cols].value_counts())

Unnamed: 0_level_0,0
Hispanic or Latino,Unnamed: 1_level_1
No,44
Yes,7
--,2


In [11]:
pd.DataFrame(pre[race_cols].sum(axis=0))

Unnamed: 0,0
White,37
Black or African American,2
Native Hawaiian or other Pacific Islander,0
Asian,13
American Indian or Alaska Native,4
Other race,1
Decline to specify race,0


# Education

In [12]:
pd.DataFrame(pre[['Highest degree']].value_counts())

Unnamed: 0_level_0,0
Highest degree,Unnamed: 1_level_1
some college,32
high school graduate,21


# Medical information

In [13]:
pd.DataFrame(pre['Reading impairments'].value_counts())

Unnamed: 0,Reading impairments
No,49
Yes,4


In [14]:
pd.DataFrame(pre['Normal color vision'].value_counts())

Unnamed: 0,Normal color vision
Yes,50
No,3


In [15]:
pd.DataFrame(pre['Medications or injuries'].value_counts())

Unnamed: 0,Medications or injuries
No,50
Yes,3


In [16]:
pd.DataFrame(pre['Meds or injuries details'].value_counts())

Unnamed: 0,Meds or injuries details
,49
recently hit my head very hard,1
,1
I am take concerta but am not on it right now,1
Amphetamine (though not at time of experiment),1


# Sleep, alertness, and coffee consumption

In [17]:
pd.DataFrame(pre['Hours of sleep'].astype('float').describe())

Unnamed: 0,Hours of sleep
count,53.0
mean,6.867925
std,1.305134
min,4.0
25%,6.0
50%,7.0
75%,8.0
max,9.0


In [18]:
pd.DataFrame(pre['Cups of coffee'].astype('float').describe())

Unnamed: 0,Cups of coffee
count,53.0
mean,0.330189
std,0.545466
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,2.0


In [19]:
alertness_dict = {'Very alert': 5, 'A little alert': 4, 'Neutral': 3, 'A little sluggish': 2, 'Very sluggish': 1}
pd.DataFrame(pre['Alertness'].map(alertness_dict).describe())

Unnamed: 0,Alertness
count,43.0
mean,3.44186
std,0.958727
min,1.0
25%,3.0
50%,3.0
75%,4.0
max,5.0


In [20]:
pd.DataFrame(pre['Alertness'].value_counts())

Unnamed: 0,Alertness
Neutral,20
A little alert,15
A little slugglish,10
Very alert,5
Very sluggish,3
