In [1]:
# import sys
# !conda install --yes --prefix {sys.prefix} pingouin

In [53]:
import datetime
import numpy as np
import math
import os
import pandas as pd
import pingouin as pg
import re
import seaborn as sns
import json
import statsmodels.api as sm 
import statsmodels.formula.api as smf
import statsmodels.graphics.api as smg
import sys

if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO
    
from IPython.display import HTML
def View(df):
    css = """<style>
    table { border-collapse: collapse; border: 3px solid #eee; }
    table tr th:first-child { background-color: #eeeeee; color: #333; font-weight: bold }
    table thead th { background-color: #eee; color: #000; }
    tr, th, td { border: 1px solid #ccc; border-width: 1px 0 0 1px; border-collapse: collapse;
    padding: 3px; font-family: monospace; font-size: 10px }</style>
    """
    s  = '<script type="text/Javascript">'
    s += 'var win = window.open("", "Title", "toolbar=no, location=no, directories=no, status=no, menubar=no, scrollbars=yes, resizable=yes, width=780, height=200, top="+(screen.height-400)+", left="+(screen.width-840));'
    s += 'win.document.body.innerHTML = \'' + (df.to_html() + css).replace("\n",'\\') + '\';'
    s += '</script>'
    return(HTML(s+css))    

os.chdir(r'C:\Users\User\GitHub\WebET_Analysis')
print("Current Working directory " , os.getcwd())

Current Working directory  C:\Users\User\GitHub\WebET_Analysis


In [3]:
# from IPython.display import HTML

# HTML('''<script>
# code_show=true; 
# function code_toggle() {
#  if (code_show){
#  $('div.input').hide();
#  } else {
#  $('div.input').show();
#  }
#  code_show = !code_show
# } 
# $( document ).ready(code_toggle);
# </script>
# <form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

# Raw Data

## Read CSV from String

In [4]:
def cleanhtml(raw_html):
    # https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
    cleanr = re.compile('<.*?>')
    cleanText = re.sub(cleanr, '', raw_html)
    return cleanText

def cleanETText(text):
    textWithinBrackets = re.findall(re.compile('\[.*?\]'), text)
    output = text
    for i in range(0,len(textWithinBrackets)):
        old = textWithinBrackets[i]
        new = re.sub(",", "$", old)
        output = output.replace(old, new)
    return output

def cleanSurveyText(text):
    output = text
    textWithinBrackets = re.findall(re.compile('\{.*?\}'), text)
    for i in range(0,len(textWithinBrackets)):
        old = textWithinBrackets[i]
        new = old.replace(',', '§')
        output = output.replace(old, new)
    return output

def compileData(path):
    subject_files = os.listdir(path)
    all_subjects = []
    for i in range(0, len(subject_files)):
        csv_thisSubject = open(path + "/" + subject_files[i]).read()
        csv_thisSubject = cleanhtml(csv_thisSubject)
        csv_thisSubject = cleanETText(csv_thisSubject)
        csv_thisSubject = cleanSurveyText(csv_thisSubject)
        all_subjects.append(pd.read_csv(StringIO(csv_thisSubject)))
    output = pd.concat(all_subjects).reset_index(drop=True)
    return output

#data_yang = compileData("data_yang2020WG")
# Custom modifications
#data_yang.loc[0:515, 'run_id'] = 0
#data_yang = data_yang.loc[~data_yang['run_id'].isin([4, 18]), :]
#data_yang['run_id'] = data_yang['run_id'] * 1000
#data_yang = data_yang.rename(columns={'eyeData': 'et_data'})
#data_cognition = compileData("data_cognition")

#data_raw = data_yang.append(data_cognition)
#data_raw['run_id'].unique()

data_raw = compileData("data_cognition")
# Custom modifications
data_raw['run_id'].unique()

array([1, 11, 12, 13, 14, 19, 24, 25, 28, 3, 30, 32, 36, 37, 38, 4, 41,
       42, 43, 5, 6, 7, 9], dtype=object)

### Exclude empty studies

In [5]:
data_raw = data_raw.loc[data_raw['trial_index']>0, :]

### survey data

In [6]:
def cleanOptionalNote(text):
    optionalNoteText = re.findall(re.compile('optionalNote":.*?\}'), text)
    if len(optionalNoteText) < 1:
        output = text
    else:
        old = optionalNoteText[0]
        new = old.replace('§', ' ')
        output = text.replace(old, new)
    return output


def surveyStringToFrame(subject, string):
    string = cleanOptionalNote(string)
    string = re.sub("""{""", '', string)
    string = re.sub("""}""", '', string)
    string = re.sub('"', '', string)
    string = re.sub('§', '$', string)
    output = pd.read_csv(StringIO(string),
                         sep=":",
                         lineterminator="$",
                         header=None,
                         index_col=0
                         ).transpose()
    return output


def surveyData_thisSubject(data):
    df_thisSubject = data.loc[
        (pd.notna(data["responses"])) &
        (data["responses"] != '"'), :].reset_index()
    subject = df_thisSubject['run_id'].unique()[0]
    output = []
    for i in range(0, len(df_thisSubject)):
        output.append(
            surveyStringToFrame(subject,
                                df_thisSubject.loc[i, 'responses'])
        )

    output = pd.concat(output, axis=1)
    output['run_id'] = subject
    return output

def addSurveyData(data):
    surveyData_allSubjects = pd.DataFrame(columns=[
            'prolificID', 'age', 'gender', 'ethnic', 'sight', 
            'glasses', 'degree', 'eyeshadow', 'masquara', 'eyeliner', 
            'browliner', 'vertPosition', 'triedChin', 'keptHead', 
            'optionalNote', 'run_id'])

    for subject in data['run_id'].unique():
        surveyData_allSubjects = \
            surveyData_allSubjects.append(
                surveyData_thisSubject(
                    data.loc[data['run_id']==subject, ['run_id', 'responses']]
                )
            )
    # print(surveyData_allSubjects)
    
    data = data.merge(surveyData_allSubjects, on='run_id', how='left')
    data = data.drop(columns='responses')
    return(data)
    
data_raw = addSurveyData(data_raw)
print(data_raw['run_id'].unique())
print(data_raw.columns)

[1 11 12 13 14 19 24 25 28 30 32 36 37 38 4 41 42 43 5 6 7 9]
Index(['run_id', 'condition', 'rt', 'stimulus', 'key_press', 'trial_type',
       'trial_index', 'time_elapsed', 'internal_node_id', 'subject',
       'chinFirst', 'choiceTask_amountLeftFirst', 'webcam_label', 'webcam_fps',
       'webcam_height', 'webcam_width', 'button_pressed', 'window_width',
       'window_height', 'chin', 'success', 'x_pos', 'y_pos', 'task_nr',
       'et_data', 'trial_duration', 'option_topLeft', 'option_bottomLeft',
       'option_topRight', 'option_bottomRight', 'recorded_at', 'ip',
       'user_agent', 'device', 'browser', 'browser_version', 'platform',
       'platform_version', 'Unnamed: 2', 'chosenAmount', 'chosenDelay',
       'prolificID', 'age', 'gender', 'ethnic', 'sight', 'glasses', 'degree',
       'eyeshadow', 'masquara', 'eyeliner', 'browliner', 'vertPosition',
       'triedChin', 'keptHead', 'optionalNote'],
      dtype='object')


In [7]:
def convertToNumeric(data, columns):
    data[columns] = data[columns].apply(pd.to_numeric, errors='coerce')
    return data


data_raw = convertToNumeric(data_raw, ['age'])
data_raw['degree'] = data_raw['degree'].replace(
        [
            'College / Undergraduate / Bachelor',
            'High School',
            'Graduate / PhD / Master',
            'Middle School'
        ],
    [
            'college',
            'highSchool',
            'grad',
            'middle'
        ]
)
data_raw['run_id'].unique()

array([1, 11, 12, 13, 14, 19, 24, 25, 28, 30, 32, 36, 37, 38, 4, 41, 42,
       43, 5, 6, 7, 9], dtype=object)

## Clean raw data

### Failed Webgazer Setup

In [8]:
subjects_failedSetups = []
for subject in data_raw['run_id'].unique():
    maxTrialIndex = data_raw.loc[data_raw['run_id']==subject, ['trial_index']].max()
    if int(maxTrialIndex) < 15:
        subjects_failedSetups.append(subject)

failedSetups = data_raw.loc[
    data_raw['run_id'].isin(subjects_failedSetups), 
    [
        'run_id', 'prolificID', 'chinFirst', 'choiceTask_amountLeftFirst',
        'webcam_label', 'webcam_fps',    
        'user_agent', 'device', 'browser', 'browser_version', 
        'platform', 'platform_version', 
        # 'stimulus', 'trial_type', 'trial_index', 'trial_duration', 
        # 'et_data'
    ]
].drop_duplicates()
    
data_raw = data_raw.loc[~data_raw['run_id'].isin(subjects_failedSetups), :]

print(data_raw['run_id'].unique())
failedSetups

[1 11 12 13 19 24 25 28 30 32 36 37 38 4 41 42 43 5 6 7 9]


Unnamed: 0,run_id,prolificID,chinFirst,choiceTask_amountLeftFirst,webcam_label,webcam_fps,user_agent,device,browser,browser_version,platform,platform_version
2074,14,5ec2cdfd1a17930ddf6f1443,1.0,0.0,HD WebCam (04f2:b526),30.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,Chrome,87.0.4280.141,Windows,10


### No EyeTracking Data

In [9]:
subjects_noet_data = []
for subject in data_raw['run_id'].unique():
    if len(data_raw.loc[data_raw['run_id']==subject, 'et_data'].unique()) < 4:
        subjects_noet_data.append(subject)

noet_data = data_raw.loc[
    data_raw['run_id'].isin(subjects_failedSetups), 
    [
        'run_id', 'prolificID', 'chinFirst', 'choiceTask_amountLeftFirst',
        'webcam_label', 'webcam_fps',
        'user_agent', 'device', 'browser', 'browser_version', 
        'platform', 'platform_version', 
        # 'stimulus', 'trial_type', 'trial_index', 'trial_duration', 
        # 'et_data'
    ]
].drop_duplicates()
    
data_raw = data_raw.loc[~data_raw['run_id'].isin(subjects_noet_data), :]

noet_data

Unnamed: 0,run_id,prolificID,chinFirst,choiceTask_amountLeftFirst,webcam_label,webcam_fps,user_agent,device,browser,browser_version,platform,platform_version


In [10]:
data_raw.run_id.unique()

array([1, 11, 12, 13, 19, 24, 25, 28, 30, 32, 36, 37, 4, 41, 42, 43, 5, 6,
       7, 9], dtype=object)

### Empty ET trials

In [11]:
print('Values that probably represent empty values')
for cell in data_raw["et_data"].unique():
    if len(cell) < 50:
        print(cell)

data_raw["et_data"] = data_raw["et_data"].apply(str)

if np.invert('emptyETData' in globals()):
    emptyETData = data_raw.loc[data_raw['et_data'].str.contains("""\[]}"""), :] \
        .groupby(['run_id', 'chinFirst', 'task_nr', 'chin', 'trial_type']) \
            ['et_data'].count()

data_raw = data_raw.loc[~data_raw['et_data'].str.contains("""\[]}"""), :]

emptyETData

Values that probably represent empty values
"
[]


Series([], Name: et_data, dtype: int64)

### Duplicate Prolific ID's

In [12]:
data_raw.loc[data_raw['run_id']==2, 'prolificID'] = 'Tim2'


duplicates = data_raw.loc[:, ['prolificID', 'trial_index']].duplicated()
duplicateSubjects = data_raw.loc[duplicates, 'run_id'].unique()

if len(duplicateSubjects) > 0:
    print('! Attention: Duplicate subjects: Check out the following: \n')
    print(duplicateSubjects)
else:
    print('Success: No duplicate subjects found')

Success: No duplicate subjects found


## Convert values

In [13]:
# Cleaning
data_raw.loc[: , 
       [
           'option_topLeft',
           'option_bottomLeft', 
           'option_topRight', 
           'option_bottomRight', 
           'chosenAmount', 
           'chosenDelay'
       ] 
    ] = data_raw.loc[:, 
       [
           'option_topLeft',
           'option_bottomLeft', 
           'option_topRight', 
           'option_bottomRight', 
           'chosenAmount', 
           'chosenDelay'
       ] 
    ].replace(['Today', 'Tomorrow', '7 days', '15 days', '30 days', '90 days', '180 days'], 
             [0, 1, 7, 15, 30, 90, 180]) \
    .replace({'\$':''}, regex = True) \
    .replace('50 cent', 0.5) 

In [14]:
data_raw = convertToNumeric(data_raw, 
    [
        'run_id', 'subject', 'chinFirst', 'chin', 'task_nr', 'trial_index', # Int
        'key_press', 
        'x_pos', 'y_pos', 'time_elapsed', 'trial_duration',
        'rt',
        'window_width', 'window_height', 
        'option_topLeft',
        'option_bottomLeft', 
        'option_topRight', 
        'option_bottomRight', 
        'chosenAmount', 
        'chosenDelay'
    ]
)

## Add variables

In [15]:
def addWindowSize(data):
    output = data
    if "window_width_max" in data.columns:
        print('window width_max already added')
    else:
        grouped = data.groupby(["run_id", "subject"])["window_width", "window_height"].max().reset_index()
        grouped.columns = ["run_id", "subject", "window_width_max", "window_height_max"]
        grouped['window_diagonal_max'] = np.sqrt(grouped['window_width_max']**2 + grouped['window_height_max']**2)
        output = data.merge(grouped, 
                            on=['run_id', "subject"],
                            how='left')
    return output

data_raw = addWindowSize(data_raw)
data_raw['window_diagonal'] = np.sqrt(data_raw['window_width']**2 + 
                                      data_raw['window_height']**2)

  


## Take a look

In [16]:
data_raw.loc[:, ['prolificID', 'run_id']].drop_duplicates()

Unnamed: 0,prolificID,run_id
0,5fccc8ac636416a4288a9f3d,1
518,5c5684ef9d244c0001b29f1e,11
1037,5b8969006651ea000118e42e,12
1555,5b4e65de09f5af0001080f45,13
2074,5f860a189bc695131f22ec9c,19
2593,5f77a902fc647a327b77004a,24
3111,5cf0ccf488abf10019fb775d,25
3629,5f561a95aa1c4ea13672f138,28
4147,5eeaa0ffaa6af11cf32ce057,30
4665,5ea94df3a879f002f3e9a812,32


# data_et

## Compile

In [17]:
def reformatYang(text):
    text = text.replace('$', ',')
    text = text.replace("relative-x", "x")
    text = text.replace("relative-y", 'y')
    text = text.replace('elapse-time', 't')
    text = (text[11:len(text)-1])
    return(text)

for subject in [0, 11000, 21000, 3000, 8000]:
    for i in data_raw.loc[(data_raw['run_id']==subject) &
                      (data_raw['et_data']!='"'), :].index:
        data_raw.loc[i, 'et_data'] = reformatYang(data_raw.loc[i, 'et_data'])

In [18]:
def textToDataframe(text):
    text = text.replace('$', ',')
    dataframe = pd.read_json(text, orient='records')
    return(dataframe)


def extractEyetrackingData(data):
    data_eyetracking = pd.DataFrame(columns=['x', 'y', 't'])
    data["et_data"] = data['et_data'].apply(str)

    for i in data.loc[~(data['et_data'].isin(['"', '[]'])), :].index:
        df = textToDataframe(data.loc[i, 'et_data'])        

        df["t_task"] = (df.loc[:, "t"] - df.loc[0, "t"])
        
        df['run_id'] = data.loc[i, 'run_id']
        df['trial_index'] = data.loc[i, 'trial_index']

        data_eyetracking = data_eyetracking.append(pd.DataFrame(data=df), ignore_index=True)
        print('Index ' + str(i) + ' extracted')
    return(data_eyetracking)

data_et = extractEyetrackingData(data_raw)

columnsToAdd = data_raw.columns.drop('et_data')
data_et = data_et.merge(data_raw.loc[:, columnsToAdd], on=['run_id', 'trial_index'], how='left')

data_et.groupby(['run_id', 'chinFirst', 'task_nr',
                 'chin', 'trial_type'])['x'].count()

Index 17 extracted
Index 19 extracted
Index 21 extracted
Index 23 extracted
Index 25 extracted
Index 27 extracted
Index 29 extracted
Index 31 extracted
Index 33 extracted
Index 35 extracted
Index 37 extracted
Index 39 extracted
Index 41 extracted
Index 43 extracted
Index 45 extracted
Index 47 extracted
Index 49 extracted
Index 51 extracted
Index 53 extracted
Index 55 extracted
Index 57 extracted
Index 59 extracted
Index 61 extracted
Index 63 extracted
Index 65 extracted
Index 67 extracted
Index 69 extracted
Index 71 extracted
Index 73 extracted
Index 75 extracted
Index 77 extracted
Index 79 extracted
Index 81 extracted
Index 83 extracted
Index 85 extracted
Index 87 extracted
Index 89 extracted
Index 91 extracted
Index 93 extracted
Index 104 extracted
Index 105 extracted
Index 107 extracted
Index 108 extracted
Index 110 extracted
Index 111 extracted
Index 113 extracted
Index 114 extracted
Index 116 extracted
Index 117 extracted
Index 119 extracted
Index 120 extracted
Index 122 extracted

Index 846 extracted
Index 847 extracted
Index 849 extracted
Index 850 extracted
Index 852 extracted
Index 853 extracted
Index 855 extracted
Index 856 extracted
Index 858 extracted
Index 859 extracted
Index 861 extracted
Index 862 extracted
Index 864 extracted
Index 865 extracted
Index 867 extracted
Index 868 extracted
Index 870 extracted
Index 871 extracted
Index 873 extracted
Index 874 extracted
Index 876 extracted
Index 877 extracted
Index 879 extracted
Index 880 extracted
Index 882 extracted
Index 883 extracted
Index 885 extracted
Index 886 extracted
Index 888 extracted
Index 889 extracted
Index 891 extracted
Index 892 extracted
Index 894 extracted
Index 895 extracted
Index 897 extracted
Index 898 extracted
Index 900 extracted
Index 901 extracted
Index 903 extracted
Index 904 extracted
Index 907 extracted
Index 908 extracted
Index 910 extracted
Index 911 extracted
Index 913 extracted
Index 914 extracted
Index 916 extracted
Index 917 extracted
Index 919 extracted
Index 920 extracted


Index 1590 extracted
Index 1592 extracted
Index 1594 extracted
Index 1596 extracted
Index 1598 extracted
Index 1600 extracted
Index 1602 extracted
Index 1604 extracted
Index 1606 extracted
Index 1608 extracted
Index 1610 extracted
Index 1612 extracted
Index 1614 extracted
Index 1616 extracted
Index 1618 extracted
Index 1620 extracted
Index 1622 extracted
Index 1624 extracted
Index 1626 extracted
Index 1628 extracted
Index 1630 extracted
Index 1632 extracted
Index 1634 extracted
Index 1636 extracted
Index 1638 extracted
Index 1640 extracted
Index 1642 extracted
Index 1644 extracted
Index 1646 extracted
Index 1648 extracted
Index 1659 extracted
Index 1660 extracted
Index 1662 extracted
Index 1663 extracted
Index 1665 extracted
Index 1666 extracted
Index 1668 extracted
Index 1669 extracted
Index 1671 extracted
Index 1672 extracted
Index 1674 extracted
Index 1675 extracted
Index 1677 extracted
Index 1678 extracted
Index 1680 extracted
Index 1681 extracted
Index 1683 extracted
Index 1684 ex

Index 2375 extracted
Index 2376 extracted
Index 2378 extracted
Index 2379 extracted
Index 2381 extracted
Index 2382 extracted
Index 2384 extracted
Index 2385 extracted
Index 2387 extracted
Index 2388 extracted
Index 2390 extracted
Index 2391 extracted
Index 2393 extracted
Index 2394 extracted
Index 2396 extracted
Index 2397 extracted
Index 2399 extracted
Index 2400 extracted
Index 2402 extracted
Index 2403 extracted
Index 2405 extracted
Index 2406 extracted
Index 2408 extracted
Index 2409 extracted
Index 2411 extracted
Index 2412 extracted
Index 2414 extracted
Index 2415 extracted
Index 2417 extracted
Index 2418 extracted
Index 2420 extracted
Index 2421 extracted
Index 2423 extracted
Index 2424 extracted
Index 2426 extracted
Index 2427 extracted
Index 2429 extracted
Index 2430 extracted
Index 2432 extracted
Index 2433 extracted
Index 2435 extracted
Index 2436 extracted
Index 2438 extracted
Index 2439 extracted
Index 2441 extracted
Index 2442 extracted
Index 2444 extracted
Index 2445 ex

Index 3085 extracted
Index 3087 extracted
Index 3088 extracted
Index 3090 extracted
Index 3091 extracted
Index 3093 extracted
Index 3094 extracted
Index 3096 extracted
Index 3097 extracted
Index 3099 extracted
Index 3100 extracted
Index 3128 extracted
Index 3130 extracted
Index 3132 extracted
Index 3134 extracted
Index 3136 extracted
Index 3138 extracted
Index 3140 extracted
Index 3142 extracted
Index 3144 extracted
Index 3146 extracted
Index 3148 extracted
Index 3150 extracted
Index 3152 extracted
Index 3154 extracted
Index 3156 extracted
Index 3158 extracted
Index 3160 extracted
Index 3162 extracted
Index 3164 extracted
Index 3166 extracted
Index 3168 extracted
Index 3170 extracted
Index 3172 extracted
Index 3174 extracted
Index 3176 extracted
Index 3178 extracted
Index 3180 extracted
Index 3182 extracted
Index 3184 extracted
Index 3186 extracted
Index 3188 extracted
Index 3190 extracted
Index 3192 extracted
Index 3194 extracted
Index 3196 extracted
Index 3198 extracted
Index 3200 ex

Index 3854 extracted
Index 3856 extracted
Index 3857 extracted
Index 3859 extracted
Index 3860 extracted
Index 3862 extracted
Index 3863 extracted
Index 3865 extracted
Index 3866 extracted
Index 3868 extracted
Index 3869 extracted
Index 3871 extracted
Index 3872 extracted
Index 3874 extracted
Index 3875 extracted
Index 3877 extracted
Index 3878 extracted
Index 3880 extracted
Index 3881 extracted
Index 3883 extracted
Index 3884 extracted
Index 3886 extracted
Index 3887 extracted
Index 3889 extracted
Index 3890 extracted
Index 3893 extracted
Index 3894 extracted
Index 3896 extracted
Index 3897 extracted
Index 3899 extracted
Index 3900 extracted
Index 3902 extracted
Index 3903 extracted
Index 3905 extracted
Index 3906 extracted
Index 3908 extracted
Index 3909 extracted
Index 3911 extracted
Index 3912 extracted
Index 3914 extracted
Index 3915 extracted
Index 3917 extracted
Index 3918 extracted
Index 3920 extracted
Index 3921 extracted
Index 3923 extracted
Index 3924 extracted
Index 3926 ex

Index 4589 extracted
Index 4591 extracted
Index 4593 extracted
Index 4595 extracted
Index 4597 extracted
Index 4599 extracted
Index 4601 extracted
Index 4603 extracted
Index 4605 extracted
Index 4607 extracted
Index 4609 extracted
Index 4611 extracted
Index 4613 extracted
Index 4615 extracted
Index 4617 extracted
Index 4629 extracted
Index 4630 extracted
Index 4632 extracted
Index 4633 extracted
Index 4635 extracted
Index 4636 extracted
Index 4638 extracted
Index 4639 extracted
Index 4641 extracted
Index 4642 extracted
Index 4644 extracted
Index 4645 extracted
Index 4647 extracted
Index 4648 extracted
Index 4650 extracted
Index 4651 extracted
Index 4653 extracted
Index 4654 extracted
Index 4682 extracted
Index 4684 extracted
Index 4686 extracted
Index 4688 extracted
Index 4690 extracted
Index 4692 extracted
Index 4694 extracted
Index 4696 extracted
Index 4698 extracted
Index 4700 extracted
Index 4702 extracted
Index 4704 extracted
Index 4706 extracted
Index 4708 extracted
Index 4710 ex

Index 5375 extracted
Index 5377 extracted
Index 5378 extracted
Index 5380 extracted
Index 5381 extracted
Index 5383 extracted
Index 5384 extracted
Index 5386 extracted
Index 5387 extracted
Index 5389 extracted
Index 5390 extracted
Index 5392 extracted
Index 5393 extracted
Index 5395 extracted
Index 5396 extracted
Index 5398 extracted
Index 5399 extracted
Index 5401 extracted
Index 5402 extracted
Index 5404 extracted
Index 5405 extracted
Index 5407 extracted
Index 5408 extracted
Index 5410 extracted
Index 5411 extracted
Index 5413 extracted
Index 5414 extracted
Index 5416 extracted
Index 5417 extracted
Index 5419 extracted
Index 5420 extracted
Index 5422 extracted
Index 5423 extracted
Index 5425 extracted
Index 5426 extracted
Index 5428 extracted
Index 5429 extracted
Index 5431 extracted
Index 5432 extracted
Index 5434 extracted
Index 5435 extracted
Index 5437 extracted
Index 5438 extracted
Index 5440 extracted
Index 5441 extracted
Index 5443 extracted
Index 5444 extracted
Index 5447 ex

Index 6074 extracted
Index 6076 extracted
Index 6077 extracted
Index 6079 extracted
Index 6080 extracted
Index 6082 extracted
Index 6083 extracted
Index 6095 extracted
Index 6097 extracted
Index 6099 extracted
Index 6101 extracted
Index 6103 extracted
Index 6105 extracted
Index 6107 extracted
Index 6109 extracted
Index 6111 extracted
Index 6113 extracted
Index 6115 extracted
Index 6117 extracted
Index 6119 extracted
Index 6121 extracted
Index 6123 extracted
Index 6125 extracted
Index 6127 extracted
Index 6129 extracted
Index 6131 extracted
Index 6133 extracted
Index 6135 extracted
Index 6137 extracted
Index 6139 extracted
Index 6141 extracted
Index 6143 extracted
Index 6145 extracted
Index 6147 extracted
Index 6149 extracted
Index 6151 extracted
Index 6153 extracted
Index 6155 extracted
Index 6157 extracted
Index 6159 extracted
Index 6161 extracted
Index 6163 extracted
Index 6165 extracted
Index 6167 extracted
Index 6169 extracted
Index 6171 extracted
Index 6183 extracted
Index 6184 ex

Index 6882 extracted
Index 6884 extracted
Index 6885 extracted
Index 6887 extracted
Index 6888 extracted
Index 6890 extracted
Index 6891 extracted
Index 6893 extracted
Index 6894 extracted
Index 6896 extracted
Index 6897 extracted
Index 6899 extracted
Index 6900 extracted
Index 6902 extracted
Index 6903 extracted
Index 6905 extracted
Index 6906 extracted
Index 6908 extracted
Index 6909 extracted
Index 6911 extracted
Index 6912 extracted
Index 6914 extracted
Index 6915 extracted
Index 6917 extracted
Index 6918 extracted
Index 6920 extracted
Index 6921 extracted
Index 6923 extracted
Index 6924 extracted
Index 6926 extracted
Index 6927 extracted
Index 6929 extracted
Index 6930 extracted
Index 6932 extracted
Index 6933 extracted
Index 6935 extracted
Index 6936 extracted
Index 6938 extracted
Index 6939 extracted
Index 6941 extracted
Index 6942 extracted
Index 6944 extracted
Index 6945 extracted
Index 6947 extracted
Index 6948 extracted
Index 6950 extracted
Index 6951 extracted
Index 6953 ex

Index 7581 extracted
Index 7583 extracted
Index 7584 extracted
Index 7586 extracted
Index 7587 extracted
Index 7589 extracted
Index 7590 extracted
Index 7592 extracted
Index 7593 extracted
Index 7595 extracted
Index 7596 extracted
Index 7598 extracted
Index 7599 extracted
Index 7601 extracted
Index 7602 extracted
Index 7604 extracted
Index 7605 extracted
Index 7607 extracted
Index 7608 extracted
Index 7610 extracted
Index 7611 extracted
Index 7613 extracted
Index 7614 extracted
Index 7616 extracted
Index 7617 extracted
Index 7619 extracted
Index 7620 extracted
Index 7622 extracted
Index 7623 extracted
Index 7625 extracted
Index 7626 extracted
Index 7628 extracted
Index 7629 extracted
Index 7631 extracted
Index 7632 extracted
Index 7634 extracted
Index 7635 extracted
Index 7637 extracted
Index 7638 extracted
Index 7650 extracted
Index 7652 extracted
Index 7654 extracted
Index 7656 extracted
Index 7658 extracted
Index 7660 extracted
Index 7662 extracted
Index 7664 extracted
Index 7666 ex

Index 8357 extracted
Index 8359 extracted
Index 8361 extracted
Index 8363 extracted
Index 8365 extracted
Index 8367 extracted
Index 8369 extracted
Index 8371 extracted
Index 8373 extracted
Index 8375 extracted
Index 8377 extracted
Index 8379 extracted
Index 8381 extracted
Index 8383 extracted
Index 8385 extracted
Index 8396 extracted
Index 8397 extracted
Index 8399 extracted
Index 8400 extracted
Index 8402 extracted
Index 8403 extracted
Index 8405 extracted
Index 8406 extracted
Index 8408 extracted
Index 8409 extracted
Index 8411 extracted
Index 8412 extracted
Index 8414 extracted
Index 8415 extracted
Index 8417 extracted
Index 8418 extracted
Index 8420 extracted
Index 8421 extracted
Index 8435 extracted
Index 8436 extracted
Index 8438 extracted
Index 8439 extracted
Index 8441 extracted
Index 8442 extracted
Index 8444 extracted
Index 8445 extracted
Index 8447 extracted
Index 8448 extracted
Index 8450 extracted
Index 8451 extracted
Index 8453 extracted
Index 8454 extracted
Index 8456 ex

Index 9124 extracted
Index 9125 extracted
Index 9127 extracted
Index 9128 extracted
Index 9130 extracted
Index 9131 extracted
Index 9133 extracted
Index 9134 extracted
Index 9136 extracted
Index 9137 extracted
Index 9139 extracted
Index 9140 extracted
Index 9142 extracted
Index 9143 extracted
Index 9145 extracted
Index 9146 extracted
Index 9148 extracted
Index 9149 extracted
Index 9151 extracted
Index 9152 extracted
Index 9154 extracted
Index 9155 extracted
Index 9157 extracted
Index 9158 extracted
Index 9160 extracted
Index 9161 extracted
Index 9163 extracted
Index 9164 extracted
Index 9166 extracted
Index 9167 extracted
Index 9169 extracted
Index 9170 extracted
Index 9172 extracted
Index 9173 extracted
Index 9175 extracted
Index 9176 extracted
Index 9178 extracted
Index 9179 extracted
Index 9181 extracted
Index 9182 extracted
Index 9184 extracted
Index 9185 extracted
Index 9187 extracted
Index 9188 extracted
Index 9190 extracted
Index 9191 extracted
Index 9193 extracted
Index 9194 ex

Index 9828 extracted
Index 9830 extracted
Index 9831 extracted
Index 9833 extracted
Index 9834 extracted
Index 9836 extracted
Index 9837 extracted
Index 9866 extracted
Index 9868 extracted
Index 9870 extracted
Index 9872 extracted
Index 9874 extracted
Index 9876 extracted
Index 9878 extracted
Index 9880 extracted
Index 9882 extracted
Index 9884 extracted
Index 9886 extracted
Index 9888 extracted
Index 9890 extracted
Index 9892 extracted
Index 9894 extracted
Index 9896 extracted
Index 9898 extracted
Index 9900 extracted
Index 9902 extracted
Index 9904 extracted
Index 9906 extracted
Index 9908 extracted
Index 9910 extracted
Index 9912 extracted
Index 9914 extracted
Index 9916 extracted
Index 9918 extracted
Index 9920 extracted
Index 9922 extracted
Index 9924 extracted
Index 9926 extracted
Index 9928 extracted
Index 9930 extracted
Index 9932 extracted
Index 9934 extracted
Index 9936 extracted
Index 9938 extracted
Index 9940 extracted
Index 9942 extracted
Index 9953 extracted
Index 9954 ex

run_id  chinFirst  task_nr  chin  trial_type             
1.0     1.0        0.0      1.0   eyetracking-calibration      440
                   1.0      1.0   eyetracking-fix-object       162
                   2.0      0.0   eyetracking-calibration      441
                            1.0   eyetracking-choice           628
                                  eyetracking-fix-object       376
                                                             ...  
43.0    0.0        1.0      0.0   eyetracking-fix-object      1909
                            1.0   eyetracking-calibration     4602
                   2.0      1.0   eyetracking-fix-object      1920
                   3.0      1.0   eyetracking-choice         10922
                                  eyetracking-fix-object      3057
Name: x, Length: 120, dtype: int64

In [19]:
data_et = convertToNumeric(data_et,
                        ['x', 'y', 't', 't_task', # Float 
                         'x_pos', 'y_pos', 
                         'chinFirst', 'chin', 'key_press'])

In [20]:
def convertToFactor(data, columns):
    stacked = data[columns].stack()
    data[columns] = pd.Series(stacked.factorize()[0], index=stacked.index).unstack()
    return data

## Add variables

In [21]:
def addXCount(data):
    if 'count' in data.columns:
        print('Count already added!')
    else: 
        grouped = pd.DataFrame(
                data.groupby(["run_id", "trial_index"])["x"].count()
            ) \
            .reset_index() \
            .rename(columns={'x': 'count'})
        data = data.merge(grouped, 
                          on=["run_id", "trial_index"], 
                          how='left')
    return(data)

data_et = addXCount(data_et)

In [22]:
def addMeans(data):
    if ('x_mean' in data.columns):
        print('X_mean already added!')
        data_output = data
    elif ('y_mean' in data.columns):
        print('Y_mean already added!')
        data_output = data
    else: 
        grouped = data.loc[:, ['run_id', 'subject', 'trial_index', 'x', 'y']] \
                             .groupby(['run_id', 'subject', 'trial_index']).mean() \
                             .rename(columns={'x': 'x_mean', 
                                              'y': 'y_mean'})
        data_output = data.merge(grouped,
                                 on=['run_id', 'subject', 'trial_index'],
                                 how='left')
    return data_output

data_et = addMeans(data_et)

In [23]:
def euclideanDistance(x, x_target, y, y_target):
    x_diff = x - x_target
    y_diff = y - y_target
    euclideanDistance = np.sqrt(x_diff**2 + y_diff**2)
    return(euclideanDistance)

data_et["offset"] = euclideanDistance(data_et["x"], data_et['x_pos'], 
                                      data_et["y"], data_et['y_pos'])

In [24]:
def addPrecision(data):
    if 'precision' in data.columns: 
        print('precision already added!')
    else:
        data['deviationFromAVG'] = euclideanDistance(
                data['x'], data['x_mean'], data['y'], data['y_mean']
            )
        data['deviationFromAVG_square'] = np.power(data['deviationFromAVG'], 2)
        grouped = data.groupby(['run_id', 'trial_index']).mean() \
            .reset_index()
        grouped['precision'] = np.sqrt(grouped['deviationFromAVG_square'])
        data = data.merge(
                grouped.loc[:, ['run_id', 'trial_index', 'precision']], 
                on=['run_id', 'trial_index'],
                how='left'
            )
                                
    return data

data_et = addPrecision(data_et)

In [25]:
def withinTaskIndex(data): 
    allTrialIndices = []
    for subject in data["run_id"].unique():
        df_subj = data.loc[data['run_id']==subject, :]
        
        for trial_type in df_subj['trial_type'].unique():
            df_trial = df_subj.loc[df_subj['trial_type']==trial_type, :]
                
            for task_nr in df_trial["task_nr"].unique():
                df_thisTask = df_trial.loc[df_trial['task_nr']==task_nr, 
                         [
                             'run_id',
                             'trial_index'
                         ]
                    ] \
                    .drop_duplicates() \
                    .reset_index(drop=True)
        
                df_thisTask['withinTaskIndex'] = df_thisTask.index + 1
                allTrialIndices.append(df_thisTask)
    allTrialIndices = pd.concat(allTrialIndices).reset_index(drop=True)
    return allTrialIndices

def addWithinTaskIndex(data):
    if 'withinTaskIndex' in data.columns: 
        print('withinTaskIndex already added')
    else:
        newIndices = withinTaskIndex(data_et) \
            .reset_index(drop=True)
        data = data.merge(newIndices, 
                          on = ['run_id', 'trial_index'], 
                          how = 'left')
    return data

data_et = addWithinTaskIndex(data_et)

In [26]:
def multiply(x):
    return x*10

def positionIndex(data):
    allPositionIndices = []
    for subject in data["run_id"].unique():
        df_subj = data.loc[
            (
                (data['run_id']==subject) &
                (data['trial_type'].isin(
                        [
                            'eyetracking-calibration', 
                            'eyetracking-fix-object'
                        ]
                    )
                )
            ), :]
        
        for trial_type in df_subj['trial_type'].unique():
            df_trial = df_subj.loc[df_subj['trial_type']==trial_type, :]
                
            for task_nr in df_trial["task_nr"].unique():
                df_thisTask = df_trial.loc[
                        df_trial['task_nr']==task_nr, 
                        ['run_id', 'trial_index', 'x_pos', 'y_pos']
                    ] \
                    .drop_duplicates() \
                    .reset_index(drop=True)

                df_thisTask['positionIndex'] = df_thisTask.loc[:, ['x_pos', 'y_pos']] \
                    .apply(multiply) \
                    .astype(int) \
                    .astype(str) \
                    .apply(''.join, 1)
                df_thisTask['positionIndex'] = df_thisTask['positionIndex'].astype(int)
                df_thisTask['positionIndex'] = df_thisTask.loc[:, 'positionIndex'] \
                    .replace(
                            np.sort(df_thisTask['positionIndex'].unique()), 
                            range(0, len(df_thisTask['positionIndex'].unique()))
                        )        
                allPositionIndices.append(df_thisTask)
                
    allPositionIndices = pd.concat(allPositionIndices).reset_index(drop=True)
    return allPositionIndices

def addPositionIndex(data):
    if 'positionIndex' in data.columns: 
        print('positionIndex already added')
    else:
        newIndices = positionIndex(data_et) \
            .reset_index(drop=True)
        data = data.merge(newIndices, 
                          on = ['run_id', 'trial_index', 'x_pos', 'y_pos'], 
                          how = 'left')
    return data

data_et = addPositionIndex(data_et)

In [27]:
data_et.loc[(data_et['run_id']==8) & (data_et['trial_index']==18), :]

Unnamed: 0,x,y,t,t_task,run_id,trial_index,condition,rt,stimulus,key_press,...,window_diagonal,count,x_mean,y_mean,offset,deviationFromAVG,deviationFromAVG_square,precision,withinTaskIndex,positionIndex


## data_et_calibration

In [28]:
data_et_calibration = data_et.loc[data_et["trial_type"]=="eyetracking-calibration", :]
data_et_calibration

Unnamed: 0,x,y,t,t_task,run_id,trial_index,condition,rt,stimulus,key_press,...,window_diagonal,count,x_mean,y_mean,offset,deviationFromAVG,deviationFromAVG_square,precision,withinTaskIndex,positionIndex
0,0.260714,0.462839,85956.255,0.000,1.0,18.0,1,,"""",,...,2202.907170,6,0.317473,0.438487,0.599928,0.061763,0.003815,0.068407,1,10.0
1,0.264363,0.461402,86451.430,495.175,1.0,18.0,1,,"""",,...,2202.907170,6,0.317473,0.438487,0.596019,0.057843,0.003346,0.068407,1,10.0
2,0.276912,0.455906,86914.860,958.605,1.0,18.0,1,,"""",,...,2202.907170,6,0.317473,0.438487,0.582330,0.044143,0.001949,0.068407,1,10.0
3,0.307805,0.443429,87395.590,1439.335,1.0,18.0,1,,"""",,...,2202.907170,6,0.317473,0.438487,0.549102,0.010858,0.000118,0.068407,1,10.0
4,0.358234,0.419854,87846.505,1890.250,1.0,18.0,1,,"""",,...,2202.907170,6,0.317473,0.438487,0.493450,0.044818,0.002009,0.068407,1,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260819,0.368741,0.542908,2009569.820,2582.795,9.0,470.0,1,,"""",,...,1835.755975,8,0.428941,0.553246,0.300958,0.061082,0.003731,0.067107,39,9.0
260820,0.351669,0.534063,2010497.050,3510.025,9.0,470.0,1,,"""",,...,1835.755975,8,0.428941,0.553246,0.320067,0.079618,0.006339,0.067107,39,9.0
260821,0.416215,0.565098,2011412.400,4425.375,9.0,470.0,1,,"""",,...,1835.755975,8,0.428941,0.553246,0.248725,0.017390,0.000302,0.067107,39,9.0
260822,0.510409,0.562400,2012340.795,5353.770,9.0,470.0,1,,"""",,...,1835.755975,8,0.428941,0.553246,0.164801,0.081980,0.006721,0.067107,39,9.0


## data_et_fixation

In [29]:
data_et_fixation = data_et.loc[(
           (data_et["trial_type"]=="eyetracking-fix-object") &
           (pd.notna(data_et['withinTaskIndex'])) 
        ), :
    ]
data_et_fixation.columns

Index(['x', 'y', 't', 't_task', 'run_id', 'trial_index', 'condition', 'rt',
       'stimulus', 'key_press', 'trial_type', 'time_elapsed',
       'internal_node_id', 'subject', 'chinFirst',
       'choiceTask_amountLeftFirst', 'webcam_label', 'webcam_fps',
       'webcam_height', 'webcam_width', 'button_pressed', 'window_width',
       'window_height', 'chin', 'success', 'x_pos', 'y_pos', 'task_nr',
       'trial_duration', 'option_topLeft', 'option_bottomLeft',
       'option_topRight', 'option_bottomRight', 'recorded_at', 'ip',
       'user_agent', 'device', 'browser', 'browser_version', 'platform',
       'platform_version', 'Unnamed: 2', 'chosenAmount', 'chosenDelay',
       'prolificID', 'age', 'gender', 'ethnic', 'sight', 'glasses', 'degree',
       'eyeshadow', 'masquara', 'eyeliner', 'browliner', 'vertPosition',
       'triedChin', 'keptHead', 'optionalNote', 'window_width_max',
       'window_height_max', 'window_diagonal_max', 'window_diagonal', 'count',
       'x_mean', 'y_me

## data_et_choice

In [30]:
data_et_choice = data_et.loc[(data_et["trial_type"]=="eyetracking-choice"), :] \
    .reset_index(drop=True)

In [31]:
def lookDirections(data):
    data["look_left"] = (data["x"] < 0.5).astype(int)
    data["look_top"] = (data["y"] < 0.5).astype(int)
    return data

data_et_choice = lookDirections(data_et_choice)

In [32]:
def addChoiceVariables(data):
    data.loc[
                (
                    (data['choiceTask_amountLeftFirst']==1) &
                    (data['withinTaskIndex'] <41)
                ), 'amountLeft'] = 1
    data.loc[
                (
                    (data['choiceTask_amountLeftFirst']==0) &
                    (data['withinTaskIndex'] >40)
                ), 'amountLeft'] = 0

    data.loc[data['amountLeft'] == 1, 'aSS'] = \
        data.loc[data['amountLeft'] == 1,["option_topLeft", "option_bottomLeft"]].values.min(1)
    data.loc[data['amountLeft'] == 0, 'aSS'] = \
        data.loc[data['amountLeft'] == 0,["option_topRight", "option_bottomRight"]].values.min(1)

    data.loc[data['amountLeft'] == 1, 'aLL'] = \
        data.loc[data['amountLeft'] == 1,["option_topLeft", "option_bottomLeft"]].values.max(1)
    data.loc[data['amountLeft'] == 0, 'aLL'] = \
        data.loc[data['amountLeft'] == 0,["option_topRight", "option_bottomRight"]].values.max(1)

    data.loc[:, "tSS"] = 0 

    data.loc[data['amountLeft'] == 1, 'tLL'] = \
        data.loc[data['amountLeft'] == 1,["option_topRight", "option_bottomRight"]].values.max(1)
    data.loc[data['amountLeft'] == 0, 'tLL'] = \
        data.loc[data['amountLeft'] == 0,["option_topLeft", "option_bottomLeft"]].values.max(1)

    data.loc[(data["key_press"]==38), "choseTop"] = 1
    data.loc[(data["key_press"]==40), "choseTop"] = 0

    data.loc[data['amountLeft'] == 1, 'tLL'] = \
        data.loc[data['amountLeft'] == 1,["option_topRight", "option_bottomRight"]].values.max(1)

    data['LL_top'] = (data["option_topLeft"] > data["option_bottomLeft"]).astype(int)

    data.loc[
        (
             (data["choseTop"]==1) & 
             (data["LL_top"] == 1)
        ), "choseLL"] = 1
    data.loc[
        (
             (data["choseTop"]==1) & 
             (data["LL_top"] == 0)
        ), "choseLL"] = 0

    return(data)

data_et_choice = addChoiceVariables(data_et_choice)

In [33]:
def cleanETChoice(data):
    output = data.loc[(data_et["x"]>-1) & 
                      (data_et["y"]>-1) & 
                      (data_et["x"]<data_et['window_width_max']) & 
                      (data_et["y"]<data_et['window_height_max']), :]
    return output
# data_et_choice = cleanETChoice(data_et_choice)

In [34]:
def addAOI(data): 
    aoiCenters = pd.DataFrame(
        [
            [(0.05+0.9*0.2), 0.25],
            [(0.05+0.9*0.8), 0.25],
            [(0.05+0.9*0.2), 0.75],
            [(0.05+0.9*0.8), 0.75]
        ], 
        columns = ['x', 'y'],
        index = ['TL', 'TR', 'BL', 'BR']
    )

    for aoi in aoiCenters.index:
        data.loc[
            (
                (data['x'] > (aoiCenters.loc[aoi, 'x'] - 0.175)) & \
                (data['x'] < (aoiCenters.loc[aoi, 'x'] + 0.175)) & \
                (data['y'] > (aoiCenters.loc[aoi, 'y'] - 0.175)) & \
                (data['y'] < (aoiCenters.loc[aoi, 'y'] + 0.175))
             ), 'aoi'] = aoi
    return data 

data_et_choice = addAOI(data_et_choice)

In [35]:
def createAOIColumns(data):
    # If amounts are on the left side
    # If the gaze point is in the top option
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='TL')), 
             'aoi_amount_LL'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='TR')), 
             'aoi_delay_LL'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='BL')), 
             'aoi_amount_SS'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='BR')), 
             'aoi_delay_SS'] = 1
    
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='TL')), 
             'aoi_amount_SS'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='TR')), 
             'aoi_delay_SS'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='BL')), 
             'aoi_amount_LL'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='BR')), 
             'aoi_delay_LL'] = 1
    
    # If amounts are on the right side
    # If the gaze point is in the top option
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='TL')), 
             'aoi_delay_LL'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='TR')), 
             'aoi_amount_LL'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='BL')), 
             'aoi_delay_SS'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='BR')),
             'aoi_amount_SS'] = 1

    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='TL')), 
             'aoi_delay_SS'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='TR')), 
             'aoi_amount_SS'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='BL')), 
             'aoi_delay_LL'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='BR')), 
             'aoi_amount_LL'] = 1
    return data

data_et_choice = createAOIColumns(data_et_choice)

In [36]:
def add_transition_type(data):
    data = data.loc[(pd.notna(data['aoi'])), :]
    data['newAOIIndex'] = 0
    data.loc[(data['aoi_amount_LL']==1), 'newAOIIndex'] = 1
    data.loc[(data['aoi_delay_LL']==1), 'newAOIIndex'] = 2
    data.loc[(data['aoi_amount_SS']==1), 'newAOIIndex'] = 4
    data.loc[(data['aoi_delay_SS']==1), 'newAOIIndex'] = 8
    data.sort_values(by=['run_id', 'withinTaskIndex'])
    # Add a 0 due to the way np.diff works
    data['transition_type'] = np.append(np.diff(data['newAOIIndex']), [0])
    data['transition_type'] = abs(data['transition_type']) 
    return(data)

def cleanTransitions(data):
    indices = []
    for subject in data['run_id'].unique():
        df_subj = data.loc[data['run_id']==subject, :]
        for trial in df_subj['withinTaskIndex'].unique():
            df_thisTrial = data.loc[(
                                    (data['run_id'] == subject) &
                                    (data['withinTaskIndex'] == trial)
                                ), 'transition_type']
            indices.append(df_thisTrial.index.max())
    # last gaze point of each trial
    data.loc[indices, 'transition_type'] = 0 
    return(data)

data_et_choice = add_transition_type(data_et_choice)

data_et_choice = cleanTransitions(data_et_choice)
# View(data_et_choice.tail(20))

data_et_choice

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from 

Unnamed: 0,x,y,t,t_task,run_id,trial_index,condition,rt,stimulus,key_press,...,choseTop,LL_top,choseLL,aoi,aoi_amount_LL,aoi_delay_LL,aoi_amount_SS,aoi_delay_SS,newAOIIndex,transition_type
2,0.622740,0.387974,510486.345,939.735,1.0,145.0,1,4311.830,"""",38.0,...,1.0,1,1.0,TR,,,,,0,0
5,0.396658,0.222469,511919.315,2372.705,1.0,145.0,1,4311.830,"""",38.0,...,1.0,1,1.0,TL,,,,,0,0
49,0.349956,0.349347,552865.440,1985.120,1.0,160.0,1,4445.110,"""",38.0,...,1.0,1,1.0,TL,,,,,0,0
52,0.404848,0.330331,554340.740,3460.420,1.0,160.0,1,4445.110,"""",38.0,...,1.0,1,1.0,TL,,,,,0,0
57,0.595567,0.594855,560653.315,1425.190,1.0,163.0,1,4323.775,"""",38.0,...,1.0,1,1.0,BR,,,,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67076,0.376268,0.385248,1572862.385,2403.530,9.0,377.0,1,4058.970,"""",40.0,...,0.0,0,,TL,,,,1.0,8,0
67078,0.370544,0.604963,1578597.765,0.000,9.0,380.0,1,4012.005,"""",38.0,...,1.0,1,1.0,BL,,,,1.0,8,0
67085,0.387504,0.126841,1588263.015,1580.210,9.0,383.0,1,5681.680,"""",40.0,...,0.0,0,,TL,,,,1.0,8,0
67086,0.384424,0.238046,1589092.415,2409.610,9.0,383.0,1,5681.680,"""",40.0,...,0.0,0,,TL,,,,1.0,8,0


# data_trial

In [37]:
data_trial = data_raw.loc[
    :, 
    [
        'run_id', 'subject', 'chinFirst', 'choiceTask_amountLeftFirst', 
        'rt', 'stimulus',
        'window_width', 'window_height', 'trial_type', 'trial_index',
        'time_elapsed', 
        'webcam_label', 'webcam_fps',
        'webcam_height', 'webcam_width', 'key_press',
        'success', 'x_pos', 'y_pos', 'chin', 'task_nr',
        'trial_duration', 'option_topLeft', 'option_bottomLeft',
        'option_topRight', 'option_bottomRight', 'recorded_at',
        'user_agent', 'device', 'browser', 'browser_version', 'platform',
        'platform_version', 
        'chosenAmount', 'chosenDelay',
        'prolificID', 'age', 'gender', 'ethnic', 'sight', 'glasses', 'degree',
        'eyeshadow', 'masquara', 'eyeliner', 'browliner', 'vertPosition',
        'triedChin', 'keptHead', 'optionalNote', 'window_width_max',
        'window_height_max', 'window_diagonal_max', 'window_diagonal'
    ]
]

## Add variables from data_et

In [38]:
def mergeByTrialIndex(data, largeData, varName):
    if (varName in data_trial.columns):
        print(varName + ' already added!')
    else:
        grouped = largeData.groupby(['run_id', 'trial_index'])[varName].mean() \
            .reset_index()
        
        
        data = data.merge(grouped, on=['run_id', 'trial_index'], how='left') 
    return data

for column in ['x_mean', 'y_mean', 'count', 'offset', 'precision', 'withinTaskIndex']:
    data_trial = mergeByTrialIndex(data_trial, data_et, column)

## Trial duration

In [39]:
def checkTimeDeviation(data, column1, column2, maxTimeDiffAllowed):
    diff = data[column1] - data['trial_duration_exact']
    longtrials_runID = data.loc[diff[diff > maxTimeDiffAllowed].index, 'run_id']
    longtrials_previousrunID = pd.DataFrame(data.loc[diff[diff > maxTimeDiffAllowed].index-1, 'run_id']) \
        .rename(columns={'run_id':'previous_run_id'})
    longtrials_previousrunID.index = longtrials_runID.index
    compare_runIDs = pd.concat([longtrials_runID, longtrials_previousrunID], axis=1)

    if sum(compare_runIDs['run_id'] == compare_runIDs['previous_run_id']) > 0: 
        print(column1 + ' and ' + column2 + ' show a deviation of ' +
              '>' + str(maxTimeDiffAllowed) + 
              ' ms. Please check on the following indices: \n')
        print(compare_runIDs.loc[(compare_runIDs['run_id'] == compare_runIDs['previous_run_id']), :].index)

    else:
        print('Success! ' + column1 + ' and ' + column2 + ' do not deviate by ' +
              '>' + str(maxTimeDiffAllowed) + 'ms.')
        
def exactTrialDuration(data):
    output = data
    output["t_startTrial"] = pd.concat([pd.Series([0]), output["time_elapsed"]], ignore_index=True)
    output["trial_duration_exact"] = output.loc[:, ("time_elapsed")] - output.loc[:, ("t_startTrial")]
    output.drop(len(output)-1)
    
    checkTimeDeviation(data, 'rt', 'trial_duration_exact', 50)
    checkTimeDeviation(data, 'trial_duration', 'trial_duration_exact', 50)
    
    return output

data_trial = exactTrialDuration(data_trial)

Success! rt and trial_duration_exact do not deviate by >50ms.
Success! trial_duration and trial_duration_exact do not deviate by >50ms.


In [40]:
data_trial["fps"] = 1000 * data_trial["count"] / data_trial["trial_duration_exact"]
data_trial

Unnamed: 0,run_id,subject,chinFirst,choiceTask_amountLeftFirst,rt,stimulus,window_width,window_height,trial_type,trial_index,...,window_diagonal,x_mean,y_mean,count,offset,precision,withinTaskIndex,t_startTrial,trial_duration_exact,fps
0,1,229.0,1.0,0.0,3169.830,"Welcome, dear participant! Thank you for your ...",1920.0,920.0,html-button-response,1.0,...,2129.037341,,,,,,,0.0,3283.0,
1,1,229.0,1.0,0.0,5270.860,"""",,,survey-html-form,2.0,...,,,,,,,,3283.0,5272.0,
2,1,229.0,1.0,0.0,7162.285,"Please, use only one monitor for this study.If...",,,html-button-response,3.0,...,,,,,,,,8555.0,7165.0,
3,1,229.0,1.0,0.0,7010.125,Please turn your mobile phone into Airplane Mo...,,,html-button-response,4.0,...,,,,,,,,15720.0,7012.0,
4,1,229.0,1.0,0.0,,"""",1920.0,920.0,eyetracking-init,5.0,...,2129.037341,,,,,,,22732.0,13826.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10362,9,288.0,1.0,0.0,2815.080,You completed the study to 90% Press Space to ...,,,html-keyboard-response,514.0,...,,,,,,,,2189781.0,2817.0,
10363,9,288.0,1.0,0.0,24299.165,"""",,,survey-html-form,515.0,...,,,,,,,,2192598.0,24336.0,
10364,9,288.0,1.0,0.0,7227.500,"""",,,survey-html-form,516.0,...,,,,,,,,2216934.0,7230.0,
10365,9,288.0,1.0,0.0,10693.720,"""",,,survey-html-form,517.0,...,,,,,,,,2224164.0,10710.0,


## data_trial_fixation

In [41]:
data_trial_fixation = data_trial.loc[
    (data_trial['trial_type'] == 'eyetracking-fix-object') &
    (pd.notna(data_et['withinTaskIndex'])) &
    (data_trial['task_nr'] != 3), :] \
    .reset_index(drop=True)
# View(data_trial_fixation.drop(columns=['et_data']).tail(20))

## data_trial_choice

In [42]:
data_trial_choice = data_trial.loc[
    data_trial["trial_type"] == "eyetracking-choice", 
    [
        'run_id', 'chinFirst',
        'trial_duration_exact',
        'window_width', 'window_height', 
        'window_width_max', 'window_height_max',
        'window_diagonal_max', 'window_diagonal', 
        
        'trial_index',  'withinTaskIndex',
        'task_nr',
        'time_elapsed',

        'choiceTask_amountLeftFirst', 
        'key_press', 
        'option_topLeft', 'option_bottomLeft',
        'option_topRight', 'option_bottomRight', 
        'x_mean', 'y_mean', 
        'count', 'fps'        
    ]
]

In [43]:
for column in [
                  'choseTop',
                  'LL_top',
                  'choseLL',
                  'aLL',
                  'aSS',
                  'tLL',
                  'tSS',
                  'amountLeft'
              ]:
    data_trial_choice = mergeByTrialIndex(data_trial_choice, data_et_choice, column)

In [44]:
def addOptionIndex(data):
    if "optionIndex" in data.columns:
        print("Option Index already added!")
        data_output = data
    else:
        grouped = data_et_choice.groupby(['run_id', 'trial_index']) \
            ['aoi', 'aoi_amount_SS', 'aoi_amount_LL', 'aoi_delay_SS', 'aoi_delay_LL'].count() \
            .reset_index() \
            .rename(columns={"aoi": "count"})
        grouped['gazePoints_immediate'] = (grouped['aoi_amount_SS'] + grouped['aoi_delay_SS'])
        grouped['gazePoints_delay'] = (grouped['aoi_amount_LL'] + grouped['aoi_delay_LL'])
        grouped['optionIndex'] = (grouped['gazePoints_immediate'] - grouped['gazePoints_delay']) / \
                                 (grouped['gazePoints_immediate'] + grouped['gazePoints_delay'])
        data_output = data.merge(grouped[['run_id', 'trial_index', 'optionIndex']], 
                                          on=['run_id', 'trial_index'])
    return(data_output)

data_trial_choice = addOptionIndex(data_trial_choice)

  import sys


In [45]:
def addAttributeIndex(data):
    if "attributeIndex" in data.columns:
        print('Attribute Index already added!')
        data_output = data
    else:
        grouped = data_et_choice.groupby(['run_id', 'trial_index']) \
            ['aoi', 'aoi_amount_SS', 'aoi_amount_LL', 'aoi_delay_SS', 'aoi_delay_LL'].count() \
            .reset_index() \
            .rename(columns={"aoi": "count"})
        grouped['gazePoints_amount'] = (grouped['aoi_amount_LL'] + grouped['aoi_amount_SS'])
        grouped['gazePoints_time'] = (grouped['aoi_delay_LL'] + grouped['aoi_delay_SS'])
        grouped['attributeIndex'] = (grouped['gazePoints_amount'] - grouped['gazePoints_time']) / \
                                 (grouped['gazePoints_amount'] + grouped['gazePoints_time'])

        data_output = data.merge(grouped[['run_id', 'trial_index', 'attributeIndex']], 
                                          on=['run_id', 'trial_index'])
    return(data_output)

data_trial_choice = addAttributeIndex(data_trial_choice)

  import sys


In [46]:
transition_count = pd.pivot_table(data_et_choice.loc[:, ['run_id', 'trial_index', 'transition_type']], 
                    index = ['run_id', 'trial_index'],
                    columns = ['transition_type'], 
                    aggfunc = len,
                    fill_value = 0
                    ).reset_index()


In [47]:
def addTransition_type(data_trial, data_et):
    if len(data_et.loc[:, 'transition_type'].unique())<2:
        transition_count = pd.DataFrame(
            columns = ['run_id', 'trial_index', 
                    "trans_type_0",
                    "trans_type_1",
                    "trans_type_2",
                    "trans_type_3",
                    "trans_type_4",
                    "trans_type_6",
                    "trans_type_7"]
        )
    else:
        transition_count = pd.pivot_table(data_et.loc[:, ['run_id', 'trial_index', 'transition_type']], 
                            index = ['run_id', 'trial_index'],
                            columns = ['transition_type'], 
                            aggfunc = len,
                            fill_value = 0
                            ).reset_index()
        transition_count.columns = ['run_id', 'trial_index', 
                    "trans_type_0",
                    "trans_type_1",
                    "trans_type_2",
                    "trans_type_3",
                    "trans_type_4",
                    "trans_type_6",
                    "trans_type_7"]

    if "trans_type_0" in data_trial:
        print("Transitions already added!")
        data_trial = data_trial
    else:
        data_trial = data_trial.merge(transition_count, on=['run_id', 'trial_index']) 
    return(data_trial)

data_trial_choice = addTransition_type(data_trial_choice, data_et_choice)

In [48]:
def addPayneIndex(data):
    if "payneIndex" in data.columns:
        print("PayneIndex already added!")
    else: 
        # Option-wise: amount_LL-delay_LL = 1; amount_SS - delay_SS = 4
        # Attribute-wise: amount_LL-amount_SS = 3; delay_LL - delay_SS = 6
        # Cross: amount_LL-delay_SS = 7; delay_LL - amount_SS = 2
        optionWise_transition = data.loc[:, 'trans_type_1'] + data.loc[:, 'trans_type_4']
        attributeWise_transition = data.loc[:, 'trans_type_3'] + data.loc[:, 'trans_type_6']  
        data['payneIndex'] = (optionWise_transition - attributeWise_transition) / \
            (optionWise_transition + attributeWise_transition)      
    return(data)

data_trial_choice = addPayneIndex(data_trial_choice)

In [49]:
def k(aLL, aSS, tLL):
    k = ((aLL / aSS) - 1) / tLL
    return k

data_trial_choice['k'] = k(data_trial_choice['aLL'], data_trial_choice['aSS'], data_trial_choice['tLL']) 

In [50]:
def cleanChoiceData(data):
    output = data.loc[data['trial_duration_exact'] < 10000, :]
    return output

data_trial_choice = cleanChoiceData(data_trial_choice)
# View(data_trial_choice.tail(20)) 

# data_subject

In [51]:
data_subject = data_raw.loc[: , 
          [
               'run_id', 'subject', 'chinFirst',
               'choiceTask_amountLeftFirst', 'webcam_label', 'webcam_fps',
               'webcam_height', 'webcam_width', 
               'user_agent', 'device', 'browser',
               'browser_version', 'platform', 'platform_version', 
               'chosenAmount', 'chosenDelay', 
               'prolificID', 'age',
               'gender', 'ethnic', 'sight', 'glasses', 'degree', 'eyeshadow',
               'masquara', 'eyeliner', 'browliner', 'vertPosition', 'triedChin',
               'keptHead', 'optionalNote', 'window_width_max', 'window_height_max',
               'window_diagonal_max'
          ]
     ].drop_duplicates()

In [54]:
if 'date' in data_subject.columns:
    print('date already added!')
else:
    output = []

    for subject in data_trial['prolificID'].unique():
        thisSubject = data_trial.loc[data_trial['prolificID']==subject] \
            .reset_index(drop=True)
        date_time_str=thisSubject.loc[0, 'recorded_at']
        date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S')

        output.append([thisSubject.loc[0, 'run_id'], subject, date_time_obj.date()])
        
    output = pd.DataFrame(output,
             columns=['run_id', 'prolificID', 'date']
            )   
    data_subject = data_subject.merge(output, on=['run_id', 'prolificID'], how='left')

In [55]:
def merge_group_means_by_subject(data, sourceData, varName):
    if np.invert(varName in sourceData.columns): 
        print(varName + ' not in source Data!')
    else:
        if varName in data.columns:
            print(varName + ' already added!')
        else:
            grouped = sourceData.groupby(['run_id']).mean() \
                .reset_index() \
                .loc[:, ['run_id', varName]]        
            data = data.merge(grouped.loc[:, ['run_id', varName]], on=['run_id'], how='left')
    return data

In [56]:
for column in [
    'trial_duration_exact', 
    't_startTrial', 
    'window_width', 
    'window_height',
    'fps'
               ]:
    data_subject = merge_group_means_by_subject(data_subject, data_trial, column)    

In [57]:
for column in [
    'choseLL'
               ]:
    data_subject = merge_group_means_by_subject(data_subject, data_trial_choice, column)    

In [58]:
for column in [

                   'offset',
                   'precision', 
               ]:
    data_subject = merge_group_means_by_subject(data_subject, data_et, column)    

In [59]:
df = data_et_fixation.rename(columns={
                                                'x_mean': 'x_mean_fixTask',
                                                'y_mean': 'y_mean_fixTask'
                                            })  
for column in ['x_mean_fixTask', 'y_mean_fixTask']:
    data_subject = merge_group_means_by_subject(data_subject, df, column)

In [60]:
columns = [
            'chinFirst',
            'eyeshadow', 
            'masquara',
            'eyeliner',
            'browliner',
            'triedChin', 
            'keptHead',
        ]

data_subject[columns] = data_subject[columns].replace({'no': 0, 'yes': 1}) 
data_subject

Unnamed: 0,run_id,subject,chinFirst,choiceTask_amountLeftFirst,webcam_label,webcam_fps,webcam_height,webcam_width,user_agent,device,...,trial_duration_exact,t_startTrial,window_width,window_height,fps,choseLL,offset,precision,x_mean_fixTask,y_mean_fixTask
0,1,229.0,1.0,0.0,Microsoft LifeCam VX-3000 (045e:00f5),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,3154.351351,829392.4,1920.0,1078.252708,2.030567,0.625,0.278014,0.151708,0.537234,0.535832
1,11,915.0,0.0,1.0,Lenovo EasyCamera (174f:14e8),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,-861.157996,679069.2,1280.0,718.808664,21.45608,0.304348,0.219583,0.180708,0.539989,0.356839
2,12,908.0,0.0,1.0,Fotocamera HD FaceTime,30.0,480.0,640.0,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6...,Macintosh,...,341.380309,742381.4,1280.0,798.407942,17.99721,0.972973,0.279895,0.009254,0.352149,0.37425
3,13,520.0,1.0,0.0,USB2.0 HD UVC WebCam (13d3:56a2),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,-209.788054,645605.7,2132.512635,1198.299639,21.704953,0.777778,0.170985,0.174057,0.605638,0.517575
4,19,603.0,0.0,0.0,USB 2.0 Webcam Device (04f2:b521),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,189.930636,734794.0,1920.0,1078.797834,21.565793,0.833333,0.15151,0.151707,0.521838,0.503481
5,24,338.0,1.0,0.0,Full HD webcam (1bcf:2284),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,1006.212355,1113784.0,1279.927798,720.873646,15.268646,0.567568,0.174037,0.16618,0.541618,0.42306
6,25,973.0,0.0,1.0,Integrated Webcam (0c45:671e),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,-184.169884,1120496.0,1536.0,862.462094,13.329251,0.75,0.136703,0.176546,0.54511,0.483952
7,28,161.0,1.0,0.0,HD Webcam (5986:1140),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,-1242.608108,595741.5,1532.33935,861.292419,24.590346,0.210526,0.174646,0.15394,0.467118,0.555563
8,30,337.0,1.0,0.0,DroidCam Source 3,29.97003,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,192.368726,643789.9,1423.061372,824.841155,14.284995,0.315789,0.202931,0.144134,0.48706,0.455283
9,32,373.0,1.0,1.0,Integrated Camera (174f:244c),30.0,480.0,640.0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,WebKit,...,5992.84749,3673092.0,1536.0,862.462094,19.953132,0.972222,0.157183,0.16215,0.491835,0.49989


In [61]:
# View(data_subject)

# Add data_subject variables to data_trial variables

In [62]:
def merge_uniques_by_subject(data, sourceData, varName):
    if np.invert(varName in sourceData.columns): 
        print(varName + ' not in source Data!')
    else:
        if varName in data.columns:
            print(varName + ' already added!')
        else:
            grouped = sourceData.loc[:, 
                [
                    'run_id', varName
                ]
            ] \
            .reset_index(drop=True) \
            .drop_duplicates()     
            data = data.merge(grouped.loc[:, ['run_id', varName]], on=['run_id'], how='left')
    return data

for column in ['age', 'gender', 'ethnic', 'degree']:
    data_trial_choice = merge_uniques_by_subject(data_trial_choice, data_subject, column)
data_trial_choice

Unnamed: 0,run_id,chinFirst,trial_duration_exact,window_width,window_height,window_width_max,window_height_max,window_diagonal_max,window_diagonal,trial_index,...,trans_type_3,trans_type_4,trans_type_6,trans_type_7,payneIndex,k,age,gender,ethnic,degree
0,1,1.0,4326.0,1920.0,1080.0,1920.0,1080.0,2202.907170,2202.907170,145.0,...,0,0,0,0,,,1995.0,male,caucasian,highSchool
1,1,1.0,4457.0,1920.0,1080.0,1920.0,1080.0,2202.907170,2202.907170,160.0,...,0,0,0,0,,,1995.0,male,caucasian,highSchool
2,1,1.0,4334.0,1920.0,1080.0,1920.0,1080.0,2202.907170,2202.907170,163.0,...,0,0,0,0,,,1995.0,male,caucasian,highSchool
3,1,1.0,4380.0,1920.0,1080.0,1920.0,1080.0,2202.907170,2202.907170,196.0,...,0,0,0,0,,,1995.0,male,caucasian,highSchool
4,1,1.0,3521.0,1920.0,1080.0,1920.0,1080.0,2202.907170,2202.907170,199.0,...,0,0,0,0,,,1995.0,male,caucasian,highSchool
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1498,9,1.0,5671.0,1600.0,900.0,1600.0,900.0,1835.755975,1835.755975,365.0,...,0,0,0,0,,inf,1976.0,male,caucasian,grad
1499,9,1.0,4370.0,1600.0,900.0,1600.0,900.0,1835.755975,1835.755975,368.0,...,0,0,0,0,,inf,1976.0,male,caucasian,grad
1500,9,1.0,4078.0,1600.0,900.0,1600.0,900.0,1835.755975,1835.755975,377.0,...,0,0,0,0,,inf,1976.0,male,caucasian,grad
1501,9,1.0,4020.0,1600.0,900.0,1600.0,900.0,1835.755975,1835.755975,380.0,...,0,0,0,0,,inf,1976.0,male,caucasian,grad


# Export data

In [63]:
if not os.path.exists('./data_jupyter'):
    os.mkdir('./data_jupyter')

data_et.to_csv("data_jupyter/data_et.csv", index=False, header=True)
data_et_fixation.to_csv("data_jupyter/data_et_fixation.csv", index=False, header=True)
data_et_choice.to_csv("data_jupyter/data_et_choice.csv", index=False, header=True)

data_trial.to_csv("data_jupyter/data_trial.csv", index=False, header=True)
data_trial_fixation.to_csv("data_jupyter/data_trial_fixation.csv", index=False, header=True)
data_trial_choice.to_csv("data_jupyter/data_trial_choice.csv", index=False, header=True)

data_subject.to_csv("data_jupyter/data_subject.csv", index=False, header=True)

MatLab input

In [64]:
if not os.path.exists('./amasino_dataPrep/data_source'):
    os.mkdir('./amasino_dataPrep/data_source')

data_et_choice['fixationCounter'] = 1
data_et_choice.loc[:, 
                       [
                           'run_id', 
                           'withinTaskIndex', 
                           'x', 
                           'y', 
                           't_task', 
                           'window_width', 
                           'window_height',
                       ]
                  ] \
   .to_csv("amasino_dataPrep/data_source/schneegansEtAl_ET.csv", index=False, header=False)

In [65]:
data_trial_choice.loc[:, 
                          [
                              'run_id', 
                              'aSS', 
                              'aLL', 
                              'tSS', 
                              'tLL', 
                              'choseLL', 
                              'trial_duration_exact', 
                              'LL_top',
                              'choseTop'
                          ]
                     ] \
    .to_csv("amasino_dataPrep/data_source/schneegansEtAl_behavior.csv", index=False, header=False)

In [66]:
data_trial_choice.loc[:, 
                       [
                           'run_id', 
                           'withinTaskIndex', 
                           'optionIndex', 
                           'attributeIndex', 
                           'payneIndex'
                       ]
                  ] \
    .fillna(0) \
    .to_csv("amasino_dataPrep/intermediateCSVs/ET_indices.csv", index=False, header=False)

In [67]:
data_subject.loc[:, ['run_id', 'choseLL']] \
    .to_csv("amasino_dataPrep/intermediateCSVs/percLeft.csv", index=False, header=False)

# Feedback

In [68]:
print('Success! Script ran through')

Success! Script ran through
