In [1]:
import numpy as np
import pandas as pd
import statsrat as sr
from statsrat import rw
from statsrat.expr.predef.cat import fast
import glob
import read_fast_surveys as rfs

In [2]:
# Define learning model.
CompAct_fast = rw.model(name = 'CompAct_fast',
                        fbase = rw.fbase.elem,
                        fweight = rw.fweight.from_aux_norm,
                        lrate = rw.lrate.from_aux_norm,
                        aux = rw.aux.gradcomp_fast)

In [3]:
# Import data collected in the lab (using Psychopy).

path = 'fast_data_inlab'
#path = 'fast_data_debug'

# Import trial by trial task data.
(ds_fall2020_inlab, summary_fall2020_inlab) = fast.read_csv(path = path,
                                                            x_col = ['cue1', 'cue2'],
                                                            resp_col = ['trial_resp.keys', 'test_resp.keys'],
                                                            resp_map = {'h' : 'cati', 'g' : 'catii', 'c' : 'cat1', 'm' : 'cat2', 's' : 'cat3', 'r' : 'cat4'},
                                                            ident_col = 'participant',
                                                            other_info = {'age': ('questionText', 'What is your age?', 'slider.response'),
                                                                          'sex': ('questionText', 'What was your biological sex assigned at birth?', 'slider.response'),
                                                                          'latinx': ('questionText', 'Do you consider yourself to be of Hispanic, Latino, or Spanish origin?', 'slider.response'),
                                                                          'race': ('questionText', 'With which racial group do you most closely identify?', 'slider.response'),
                                                                          'employment': ('questionText', 'What is your current employment status?', 'slider.response')},
                                                           n_final = 8)

# Import survey data.
foo = rfs.read_surveys(path = path,
                       fun = rfs.process_psychopy,
                       ident_col = 'participant')

# Only keep people whose surveys can be read.
index = summary_fall2020_inlab.index.isin(foo.index.values)
ds_fall2020_inlab = ds_fall2020_inlab[{'ident': index}]

# Fit model.
bar = sr.fit_em(model = CompAct_fast,
                ds = ds_fall2020_inlab)

# Concatenate data frames.
summary_fall2020_inlab = pd.concat([summary_fall2020_inlab.loc[index], foo, bar], axis = 1)
print(summary_fall2020_inlab)

b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 

['cue1', 'cue2']


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


['cue1', 'cue2']


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


['cue1', 'cue2']


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


['cue1', 'cue2']


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


0


b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


The following files could not be read by Pandas:
fast_data_inlab/_fall_faces_2020_Oct_12_1324.csv
fast_data_inlab/NDARGR305EJL_fall_faces_2020_Sep_17_1210.csv
fast_data_inlab/_fall_faces_2020_Oct_16_1602.csv
fast_data_inlab/_fall_faces_2020_Sep_21_1338.csv
Participant ID (ident) could not be read from the following files:
fast_data_inlab/_fall_faces_2020_Oct_22_1217.csv


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 

"None of [Index(['anx_slider1.response', 'anx_slider2.response', 'anx_slider3.response',\n       'anx_slider4.response', 'anx_slider5.response', 'anx_slider6.response',\n       'anx_slider7.response', 'anx_slider8.response'],\n      dtype='object')] are in the [columns]"


b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


"None of [Index(['anx_slider1.response', 'anx_slider2.response', 'anx_slider3.response',\n       'anx_slider4.response', 'anx_slider5.response', 'anx_slider6.response',\n       'anx_slider7.response', 'anx_slider8.response'],\n      dtype='object')] are in the [columns]"
"None of [Index(['anx_slider1.response', 'anx_slider2.response', 'anx_slider3.response',\n       'anx_slider4.response', 'anx_slider5.response', 'anx_slider6.response',\n       'anx_slider7.response', 'anx_slider8.response'],\n      dtype='object')] are in the [columns]"


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


"None of [Index(['anx_slider1.response', 'anx_slider2.response', 'anx_slider3.response',\n       'anx_slider4.response', 'anx_slider5.response', 'anx_slider6.response',\n       'anx_slider7.response', 'anx_slider8.response'],\n      dtype='object')] are in the [columns]"


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


"None of [Index(['anx_slider1.response', 'anx_slider2.response', 'anx_slider3.response',\n       'anx_slider4.response', 'anx_slider5.response', 'anx_slider6.response',\n       'anx_slider7.response', 'anx_slider8.response'],\n      dtype='object')] are in the [columns]"


b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


0


b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 733 fields, saw 737\n'
b'Skipping line 229: expected 737 fields, saw 741\n'
b'Skipping line 229: expected 733 fields, saw 737\n'


initial estimation with uniform prior
EM iteration 1
relative change: 0.051670212
EM iteration 2
relative change: 0.048479229
EM iteration 3
relative change: 0.021352033
EM iteration 4
relative change: 0.027519482
EM iteration 5
relative change: 0.064390395
             schedule    age     sex latinx                        race  \
ident                                                                     
NDARKM468VR6   design  18-24    Male     No                       White   
NDARTX594HBJ   design  18-24  Female     No                       White   
NDARRR883WV4   design  18-24  Female     No                       White   
NDARMA408TN4   design  18-24    Male     No                       White   
NDARBP237PTC   design  18-24  Female     No                       White   
...               ...    ...     ...    ...                         ...   
NDARAD554ZD6   design  18-24  Female     No                       Asian   
NDARAY643KPU   design  18-24  Female     No                       W

In [4]:
# Import online data (collected using Testable).

path = 'fast_data_online'
# Import trial by trial task data.
(ds_fall2020_online, summary_fall2020_online) = fast.read_csv(path = path,
                                                              x_col = ['abstract_stim1', 'abstract_stim2'],
                                                              resp_col = ['response'],
                                                              resp_map = {'h' : 'cati', 'g' : 'catii', 'l' : 'cat1', 'o' : 'cat2', 's' : 'cat3', 'r' : 'cat4'},
                                                              ident_col = None,
                                                              other_info = {'age': ('head', 'What is your age?', 'response'),
                                                                            'sex': ('head', 'What was your biological sex assigned at birth?', 'response'),
                                                                            'latinx': ('head', 'Do you consider yourself to be Hispanic, Latino, or of Spanish origin?', 'response'),
                                                                            'race': ('head', 'With which racial group do you identify (select the one with which you MOST CLOSELY identify)?', 'response'),
                                                                            'employment': ('head', 'What is your current employment status?', 'response')},
                                                              header = 2,
                                                              n_final = 8)
# Import survey data.
foo = rfs.read_surveys(path = path,
                       fun = rfs.process_testable,
                       ident_col = None,
                       header = 2)

# Only keep people whose surveys can be read.
index = summary_fall2020_online.index.isin(foo.index.values)
ds_fall2020_online = ds_fall2020_online[{'ident': index}]

# Fit model.
bar = sr.fit_em(model = CompAct_fast,
                ds = ds_fall2020_online)

# Concatenate data frames.
summary_fall2020_online = pd.concat([summary_fall2020_online.loc[index], foo, bar], axis = 1)
print(summary_fall2020_online)

# MAKE SURE THAT THE SURVEY AND SUMMARY DFS HAVE THE SAME ORDER OF PARTICIPANTS.

['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
['abstract_stim1', 'abstract_stim2']
The following files could not be read by Pandas:
fast_data_online/666579_201125_124958.csv
fast_data_online/666579_201204_055950.csv
fast_data_online/666579_201204_054131.csv
fast_data_online/666579_201201_013309.csv
fast_data_online/666579_201202_092221.csv
fast_data_online/666579_201203_214645.csv
fast_data_online/666579_201129_225628.csv
fast_data_online/666579_2011

In [5]:
# Combine online and in lab summary data.
foo = summary_fall2020_inlab
foo['source'] = 'inlab'
bar = summary_fall2020_online
bar['source'] = 'online'
summary_fall2020 = pd.concat([foo, bar], axis = 0)
summary_fall2020['avg_last8_pct_correct'] = (summary_fall2020['tutorial_0c_last8_pct_correct'] + summary_fall2020['training_last8_pct_correct'] + summary_fall2020['transfer_last8_pct_correct'])/3

# Add performance criterion.
# >= 75% on all stages (lumping all parts of stage 0 together as one stage) is the criterion for good performance that we used to analyze the Spring 2020 data. 
summary_fall2020['good_perf'] = (summary_fall2020['tutorial_0c_last8_pct_correct'] >= 75)&(summary_fall2020['training_last8_pct_correct'] >= 75)&(summary_fall2020['transfer_last8_pct_correct'] >= 75)

In [6]:
# Export processed data.
ds_fall2020.to_netcdf("ds_fall2020.nc")
summary_fall2020.to_csv('summary_fall2020.csv')


NameError: name 'ds_fall2020' is not defined