In [57]:
# import sys
# !conda install --yes --prefix {sys.prefix} pingouin

In [58]:
import datetime
import numpy as np
import math
import os
import pandas as pd
import pingouin as pg
import re
import seaborn as sns
import json
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm 
import statsmodels.formula.api as smf
import statsmodels.graphics.api as smg
import sys


if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO
    
from IPython.display import HTML
def View(df):
    css = """<style>
    table { border-collapse: collapse; border: 3px solid #eee; }
    table tr th:first-child { background-color: #eeeeee; color: #333; font-weight: bold }
    table thead th { background-color: #eee; color: #000; }
    tr, th, td { border: 1px solid #ccc; border-width: 1px 0 0 1px; border-collapse: collapse;
    padding: 3px; font-family: monospace; font-size: 10px }</style>
    """
    s  = '<script type="text/Javascript">'
    s += 'var win = window.open("", "Title", "toolbar=no, location=no, directories=no, status=no, menubar=no, scrollbars=yes, resizable=yes, width=780, height=200, top="+(screen.height-400)+", left="+(screen.width-840));'
    s += 'win.document.body.innerHTML = \'' + (df.to_html() + css).replace("\n",'\\') + '\';'
    s += '</script>'
    return(HTML(s+css))    

os.chdir(r'C:\Users\User\GitHub\WebET_Analysis')
print("Current Working directory " , os.getcwd())

Current Working directory  C:\Users\User\GitHub\WebET_Analysis


In [59]:
# from IPython.display import HTML

# HTML('''<script>
# code_show=true; 
# function code_toggle() {
#  if (code_show){
#  $('div.input').hide();
#  } else {
#  $('div.input').show();
#  }
#  code_show = !code_show
# } 
# $( document ).ready(code_toggle);
# </script>
# <form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

# Read Data

In [60]:
data_et = pd.read_csv(r'C:/Users/User/GitHub/WebET_Analysis/data_jupyter/data_et.csv')
data_trial = pd.read_csv(r'C:/Users/User/GitHub/WebET_Analysis/data_jupyter/data_trial.csv')
data_subject = pd.read_csv(r'C:/Users/User/GitHub/WebET_Analysis/data_jupyter/data_subject.csv')

print(pd.DataFrame([[len(data_et)], [len(data_trial)], [len(data_subject)]], 
                   columns=['length'],
                   index=['data_et', 'data_trial', 'data_subject'])
     )

              length
data_et       618566
data_trial     30362
data_subject      84


In [61]:
data_trial.columns

Index(['run_id', 'subject', 'prolificID', 'chinFirst', 'trial_index',
       'trial_type', 'task_nr', 'rt', 'stimulus', 'key_press', 'time_elapsed',
       'trial_duration', 'recorded_at', 'window_width', 'window_height',
       'success', 'chin', 'x_pos', 'y_pos', 'choiceTask_amountLeftFirst',
       'option_topLeft', 'option_bottomLeft', 'option_topRight',
       'option_bottomRight', 'chosenAmount', 'chosenDelay',
       'window_height_max_x', 'window_diagonal_max_x', 'window_diagonal',
       't_startTrial', 'trial_duration_exact', 'fps', 'window_height_max_y',
       'window_diagonal_max_y', 'window_width_max', 'window_height_max',
       'window_diagonal_max', 'withinTaskIndex', 'x_count'],
      dtype='object')

# data_trial_choice

In [62]:
data_trial_choice = data_trial.loc[
    data_trial['trial_type']=='eyetracking-choice', 
    [
        'run_id', 'chinFirst', 
        'task_nr', 
        'trial_index', 'trial_type', 'withinTaskIndex', 
        'choiceTask_amountLeftFirst', 
        'option_topLeft', 'option_bottomLeft',
        'option_topRight', 'option_bottomRight',
        'key_press', 'trial_duration_exact',
        'window_width', 'window_height',
        'fps'
    ]
]

## Cleaning

Enough trials?

In [63]:
NTrials = data_trial_choice.groupby(['run_id'])['withinTaskIndex'].max() \
    .reset_index()
subjects_not_enough_trials = NTrials.loc[NTrials['withinTaskIndex']<40, 'run_id']
print('These subjects do not have enough trials: \n' + 
      str(NTrials.loc[NTrials['withinTaskIndex']<40, :])
     )

These subjects do not have enough trials: 
    run_id  withinTaskIndex
55    2002              4.0


Trials too long

In [64]:
data_trial_choice.loc[data_trial_choice['trial_duration_exact']>10000, :]

Unnamed: 0,run_id,chinFirst,task_nr,trial_index,trial_type,withinTaskIndex,choiceTask_amountLeftFirst,option_topLeft,option_bottomLeft,option_topRight,option_bottomRight,key_press,trial_duration_exact,window_width,window_height,fps
787,103,0.0,3.0,270.0,eyetracking-choice,1.0,0.0,180.0,0.0,4.5,3.0,40.0,12336.0,1536.0,864.0,21.400778
6013,25,0.0,3.0,303.0,eyetracking-choice,12.0,1.0,4.5,5.0,0.0,7.0,38.0,10705.0,1536.0,864.0,14.666044
9192,38,1.0,2.0,374.0,eyetracking-choice,77.0,1.0,0.0,1.0,3.0,4.0,40.0,12120.0,1920.0,1080.0,
9728,4,0.0,3.0,391.0,eyetracking-choice,41.0,1.0,90.0,0.0,4.0,2.5,40.0,10375.0,1920.0,1080.0,10.698795
9740,4,0.0,3.0,403.0,eyetracking-choice,45.0,1.0,0.0,30.0,2.5,5.0,40.0,18030.0,1920.0,1007.0,10.704382
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29558,2008,1.0,2.0,231.0,eyetracking-choice,30.0,1.0,5.0,1.5,30.0,0.0,40.0,26293.0,1920.0,1080.0,12.893165
29561,2008,1.0,2.0,234.0,eyetracking-choice,31.0,1.0,4.5,2.0,180.0,0.0,40.0,14377.0,1920.0,1080.0,13.006886
29592,2008,1.0,2.0,265.0,eyetracking-choice,41.0,1.0,30.0,0.0,4.5,4.0,40.0,12630.0,1920.0,1080.0,12.905780
29658,2008,1.0,2.0,331.0,eyetracking-choice,63.0,1.0,0.0,90.0,2.5,5.0,38.0,12765.0,1920.0,1080.0,12.769291


Kept head still?

In [65]:
data_subject.loc[(data_subject['keptHead']==0), ['run_id', 'prolificID', 'keptHead']]

Unnamed: 0,run_id,prolificID,keptHead
81,2002,Tim2,0.0
82,2008,Studie1970,0.0


Filter

In [66]:
cleanSubjects = data_subject.loc[
        (data_subject['status'].isin(['APPROVED', 'NOTPROLIFIC'])) &
        (data_subject['keptHead']!=0) &
        pd.notna(data_subject['fps']) &
        ~(data_subject['run_id'].isin(subjects_not_enough_trials)), 
        'run_id'
    ].unique()
print(cleanSubjects)
print(len(cleanSubjects))

[   1  103   11   12  126  128   13  130  131   19   24   25   28   30
   32   36   37    4   41   42   43   45   47   48    5   58   59    6
   61   63   66   67    7   70   74   80   81   85    9   91   92   93
   94   96   97 1000 1011 1021 1003 1008 2011 2012 2013 2009]
54


In [67]:
data_trial_choice = data_trial_choice.loc[
    (data_trial_choice['trial_duration_exact']<10000) &
    (data_trial_choice['run_id'].isin(cleanSubjects)), :
]

# Choice options

In [68]:
def reformatChoiceOptions(data):
    data['amountLeft']=0
    data.loc[(data['choiceTask_amountLeftFirst']==1) &
             (data['withinTaskIndex'] <41), 
             'amountLeft'] = 1

    data['aSS'] = \
        data.loc[:, ["option_topRight", "option_bottomRight"]].values.min(1)
    data.loc[data['amountLeft'] == 1, 'aSS'] = \
        data.loc[data['amountLeft'] == 1, ["option_topLeft", "option_bottomLeft"]].values.min(1)
    
    data['aLL'] = \
        data.loc[:, ["option_topLeft", "option_bottomLeft"]].values.max(1)
    data.loc[data['amountLeft'] == 0, 'aLL'] = \
        data.loc[data['amountLeft'] == 0, ["option_topRight", "option_bottomRight"]].values.max(1)

    data.loc[:, "tSS"] = 0 
    
    data['tLL'] = \
        data.loc[:, ["option_topRight", "option_bottomRight"]].values.max(1)
    data.loc[data['amountLeft'] == 0, 'tLL'] = \
        data.loc[data['amountLeft'] == 0, ["option_topLeft", "option_bottomLeft"]].values.max(1)
    
    data['LL_top'] = (data["option_topLeft"] > data["option_bottomLeft"]).astype(int)
        
    data["choseTop"] = 0
    data.loc[(data["key_press"]==38), "choseTop"] = 1

    data["choseLL"] = 0
    data.loc[(data["choseTop"]==1) & (data["LL_top"] == 1), "choseLL"] = 1
    
    return(data)

data_trial_choice = reformatChoiceOptions(data_trial_choice)

In [69]:
def merge_by_subject(data, large_data, varName):
    if varName in data.columns: data = data.drop(columns=[varName])
    grouped = large_data.groupby(['run_id'])[varName].mean() \
        .reset_index()        
    data = data.merge(grouped, on=['run_id'], how='left')
    return data

data_subject = merge_by_subject(data_subject, data_trial_choice, 'choseLL')

# k

In [70]:
def k(aLL, aSS, tLL):
    k = ((aLL / aSS) - 1) / tLL
    return k

data_trial_choice['k'] = k(data_trial_choice['aLL'], data_trial_choice['aSS'], data_trial_choice['tLL']) 
data_trial_choice

Unnamed: 0,run_id,chinFirst,task_nr,trial_index,trial_type,withinTaskIndex,choiceTask_amountLeftFirst,option_topLeft,option_bottomLeft,option_topRight,...,fps,amountLeft,aSS,aLL,tSS,tLL,LL_top,choseTop,choseLL,k
144,1,1.0,2.0,145.0,eyetracking-choice,1.0,0.0,30.0,0.0,4.0,...,2.080444,0,3.0,4.0,0,30.0,1,1,1,0.011111
147,1,1.0,2.0,148.0,eyetracking-choice,2.0,0.0,30.0,0.0,5.0,...,2.044990,0,3.5,5.0,0,30.0,1,1,1,0.014286
150,1,1.0,2.0,151.0,eyetracking-choice,3.0,0.0,0.0,180.0,2.5,...,2.045455,0,2.5,5.0,0,180.0,0,0,0,0.005556
153,1,1.0,2.0,154.0,eyetracking-choice,4.0,0.0,180.0,0.0,4.5,...,2.072300,0,4.0,4.5,0,180.0,1,0,0,0.000694
156,1,1.0,2.0,157.0,eyetracking-choice,5.0,0.0,15.0,0.0,4.5,...,2.047782,0,4.0,4.5,0,15.0,1,0,0,0.008333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30339,2009,0.0,3.0,495.0,eyetracking-choice,76.0,0.0,4.0,2.5,90.0,...,13.240858,0,0.0,90.0,0,4.0,1,1,1,inf
30342,2009,0.0,3.0,498.0,eyetracking-choice,77.0,0.0,4.5,1.0,180.0,...,12.753188,0,0.0,180.0,0,4.5,1,1,1,inf
30345,2009,0.0,3.0,501.0,eyetracking-choice,78.0,0.0,4.0,3.0,90.0,...,12.531328,0,0.0,90.0,0,4.0,1,1,1,inf
30348,2009,0.0,3.0,504.0,eyetracking-choice,79.0,0.0,2.0,4.5,0.0,...,12.742718,0,0.0,7.0,0,4.5,0,0,0,inf


# data_et_choice

In [71]:
temp = data_et.merge(data_trial_choice.loc[:, 
                                           [
                                               'run_id', 'trial_index', 'trial_type',
                                               'amountLeft', 'LL_top', 'withinTaskIndex'
                                           ]
                                          ], 
                    on=['run_id', 'trial_index'],
                    how='left')

data_et_choice = temp.loc[temp['trial_type']=='eyetracking-choice', 
                          [
                              'run_id', 'trial_index', 
                              'x', 'y', 't', 't_task',
                              'amountLeft', 'LL_top', 'withinTaskIndex'
                          ]
                         ] \
    .reset_index(drop=True)

## Cleaning

In [72]:
def cleanETData(data, cleanSubjects):
    return data.loc[
        (data['x']>0) & (data['x']<1) &(data['y']>0) & (data['y']<1) &
        (data['run_id'].isin(cleanSubjects)), :] \
        .reset_index(drop=True)

print('data_et_choice')
print('raw: ' + str(len(data_et_choice)))
data_et_choice = cleanETData(data_et_choice, cleanSubjects)
print('cleaned: ' + str(len(data_et_choice)))

data_et_choice
raw: 144884
cleaned: 141719


# Look direction

In [73]:
def lookDirections(data):
    data["look_left"] = (data["x"] < 0.5).astype(int)
    data["look_top"] = (data["y"] < 0.5).astype(int)
    return data

data_et_choice = lookDirections(data_et_choice)

# AOIs

In [74]:
def addAOI(data): 
    aoiCenters = pd.DataFrame(
        [
            [(0.05+0.9*0.2), 0.25],
            [(0.05+0.9*0.8), 0.25],
            [(0.05+0.9*0.2), 0.75],
            [(0.05+0.9*0.8), 0.75]
        ], 
        columns = ['x', 'y'],
        index = ['TL', 'TR', 'BL', 'BR']
    )
    
    data['aoi'] = 0 
    for aoi in aoiCenters.index:
        data.loc[
            (
                (data['x'] > (aoiCenters.loc[aoi, 'x'] - 0.175)) & \
                (data['x'] < (aoiCenters.loc[aoi, 'x'] + 0.175)) & \
                (data['y'] > (aoiCenters.loc[aoi, 'y'] - 0.175)) & \
                (data['y'] < (aoiCenters.loc[aoi, 'y'] + 0.175))
             ), 'aoi'] = aoi
    return data 

data_et_choice = addAOI(data_et_choice)
data_et_choice['aoi'].unique()

array([0, 'TR', 'TL', 'BR', 'BL'], dtype=object)

In [75]:
def createAOIColumns(data):
    data['aoi_aLL'] = 0
    data['aoi_tLL'] = 0 
    data['aoi_aSS'] = 0 
    data['aoi_tSS'] = 0
    
    # If amounts are on the left side
    # If the gaze point is in the top option
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='TL')), 
             'aoi_aLL'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='TR')), 
             'aoi_tLL'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='BL')), 
             'aoi_aSS'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==1) & (data['aoi']=='BR')), 
             'aoi_tSS'] = 1
    
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='TL')), 
             'aoi_aSS'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='TR')), 
             'aoi_tSS'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='BL')), 
             'aoi_aLL'] = 1
    data.loc[((data['amountLeft']==1) & (data['LL_top']==0) & (data['aoi']=='BR')), 
             'aoi_tLL'] = 1
    
    # If amounts are on the right side
    # If the gaze point is in the top option
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='TL')), 
             'aoi_tLL'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='TR')), 
             'aoi_aLL'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='BL')), 
             'aoi_tSS'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==1) & (data['aoi']=='BR')),
             'aoi_aSS'] = 1

    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='TL')), 
             'aoi_tSS'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='TR')), 
             'aoi_aSS'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='BL')), 
             'aoi_tLL'] = 1
    data.loc[((data['amountLeft']==0) & (data['LL_top']==0) & (data['aoi']=='BR')), 
             'aoi_aLL'] = 1
    return data

data_et_choice = createAOIColumns(data_et_choice)

# Eye-Tracking indices

##  Option Index

In [95]:
def addOptionIndex(data, data_et_choice):

    grouped = data_et_choice.groupby(['run_id', 'trial_index']) \
        ['aoi_aSS', 'aoi_aLL', 'aoi_tSS', 'aoi_tLL'].sum() \
        .reset_index() 
    
    grouped['gazePoints_immediate'] = (grouped['aoi_aSS'] + grouped['aoi_tSS'])
    grouped['gazePoints_delay'] = (grouped['aoi_aLL'] + grouped['aoi_tLL'])
    grouped['optionIndex'] = (grouped['gazePoints_immediate'] - grouped['gazePoints_delay']) / \
                             (grouped['gazePoints_immediate'] + grouped['gazePoints_delay'])

    if "optionIndex" in data.columns: data = data.drop(columns=['optionIndex'])

    data = data.merge(grouped[['run_id', 'trial_index', 'optionIndex']], 
                             on=['run_id', 'trial_index'])
    return(data)

data_trial_choice = addOptionIndex(data_trial_choice, data_et_choice)
data_trial_choice['optionIndex'].describe()

  after removing the cwd from sys.path.


count    3714.000000
mean       -0.047728
std         0.743975
min        -1.000000
25%        -0.826892
50%        -0.085714
75%         0.666667
max         1.000000
Name: optionIndex, dtype: float64

## Attribute Index

In [97]:
def addAttributeIndex(data, data_et_choice):

    grouped = data_et_choice.groupby(['run_id', 'trial_index']) \
        ['aoi', 'aoi_aSS', 'aoi_aLL', 'aoi_tSS', 'aoi_tLL'].sum() \
        .reset_index()
    grouped['gazePoints_amount'] = (grouped['aoi_aLL'] + grouped['aoi_aSS'])
    grouped['gazePoints_time'] = (grouped['aoi_tLL'] + grouped['aoi_tSS'])
    grouped['attributeIndex'] = \
        (grouped['gazePoints_amount'] - grouped['gazePoints_time']) / \
        (grouped['gazePoints_amount'] + grouped['gazePoints_time'])

    if "attributeIndex" in data.columns: data = data.drop(columns=['attributeIndex'])
    data_output = data.merge(grouped[['run_id', 'trial_index', 'attributeIndex']], 
                             on=['run_id', 'trial_index'])
    return(data_output)

data_trial_choice = addAttributeIndex(data_trial_choice, data_et_choice)
data_trial_choice['attributeIndex'].describe()

  after removing the cwd from sys.path.


count    3714.000000
mean        0.172333
std         0.686453
min        -1.000000
25%        -0.304348
50%         0.200000
75%         1.000000
max         1.000000
Name: attributeIndex, dtype: float64

## Payne Index

### Transitions between AOIs

In [137]:
def et_data_transition_type(data):
    data = data.loc[
        pd.notna(data['aoi']) &
        (data['aoi']!=0), :]
    data['newAOIIndex'] = 0
    data.loc[(data['aoi_aLL']==1), 'newAOIIndex'] = 1
    data.loc[(data['aoi_tLL']==1), 'newAOIIndex'] = 2
    data.loc[(data['aoi_aSS']==1), 'newAOIIndex'] = 4
    data.loc[(data['aoi_tSS']==1), 'newAOIIndex'] = 8
    data.sort_values(by=['run_id', 'withinTaskIndex'])
    # Add a 0 due to the way np.diff works
    data['transition_type'] = np.append([0], np.diff(data['newAOIIndex']))
    data['transition_type'] = abs(data['transition_type']) 

    data.loc[data['t_task']==0, 'transition_type'] = 0

    return data.loc[:, ['run_id', 'trial_index', 't_task', 'transition_type']]


In [138]:
def addTransition_type(data, data_et):
    data_et = et_data_transition_type(data_et)
    data_et.loc[:, 'transition_type'] = data_et.loc[:, 'transition_type']
    
    transition_count = pd.pivot_table(
        data_et.loc[:, ['run_id', 'trial_index', 'transition_type']], 
        index = ['run_id', 'trial_index'],
        columns = ['transition_type'], 
        aggfunc = len,
        fill_value = 0) \
        .reset_index() \
        .rename(columns={
        0: "trans_type_0",
        1: "trans_type_aLLtLL",
        2: "trans_type_tLLaSS",
        3: "trans_type_aLLaSS",
        4: "trans_type_aSStSS",
        6: "trans_type_tLLtSS",
        7: "trans_type_aLLtSS"
    })

    if "trans_type_0" in data: data = data.drop(columns=[
        "trans_type_0", "trans_type_aLLtLL", "trans_type_tLLaSS", "trans_type_aLLaSS",
        "trans_type_aSStSS", "trans_type_tLLtSS", "trans_type_aLLtSS"])
        
    data = data.merge(transition_count, on=['run_id', 'trial_index']) 
    return(data)

data_trial_choice = addTransition_type(data_trial_choice, data_et_choice)
data_trial_choice.loc[
    data_trial_choice['fps']>15, 
    [
        'run_id', 'trial_index', 'trans_type_0', 'trans_type_aLLtLL', 'trans_type_tLLaSS',
        'trans_type_aLLaSS', 'trans_type_aSStSS', 'trans_type_tLLtSS',
        'trans_type_aLLtSS'
    ]
]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation

Unnamed: 0,run_id,trial_index,trans_type_0,trans_type_aLLtLL,trans_type_tLLaSS,trans_type_aLLaSS,trans_type_aSStSS,trans_type_tLLtSS,trans_type_aLLtSS
34,103,273.0,44,4,2,3,2,2,0
35,103,276.0,72,7,1,3,4,1,1
36,103,279.0,76,5,1,3,4,1,0
37,103,282.0,45,1,2,1,2,1,0
38,103,285.0,32,2,0,2,2,0,0
...,...,...,...,...,...,...,...,...,...
3630,2013,486.0,11,0,0,0,0,0,0
3631,2013,489.0,2,0,0,1,0,0,0
3632,2013,492.0,12,0,0,0,0,0,0
3633,2013,498.0,10,0,0,1,0,0,0


In [142]:
def addPayneIndex(data):
    if "payneIndex" in data.columns: data = data.drop(columns='payneIndex')
    optionWise_transition = data.loc[:, 'trans_type_aLLtLL'] + data.loc[:, 'trans_type_aSStSS']
    attributeWise_transition = data.loc[:, 'trans_type_aLLaSS'] + data.loc[:, 'trans_type_tLLtSS']  
    data['payneIndex'] = \
        (optionWise_transition - attributeWise_transition) / \
        (optionWise_transition + attributeWise_transition) 
    data['payneIndex'] = data['payneIndex'].fillna(0)
    return(data)

data_trial_choice = addPayneIndex(data_trial_choice)

print(data_trial_choice.columns)
print(data_trial_choice['payneIndex'].describe())

data_trial_choice.loc[
    data_trial_choice['fps']>15, 
    ['run_id', 'trial_duration_exact',
     'trans_type_aLLtLL', 'trans_type_aSStSS', 'trans_type_aLLaSS', 'trans_type_tLLtSS',
     'payneIndex'
    ]
]

Index(['run_id', 'chinFirst', 'task_nr', 'trial_index', 'trial_type',
       'withinTaskIndex', 'choiceTask_amountLeftFirst', 'option_topLeft',
       'option_bottomLeft', 'option_topRight', 'option_bottomRight',
       'key_press', 'trial_duration_exact', 'window_width', 'window_height',
       'fps', 'amountLeft', 'aSS', 'aLL', 'tSS', 'tLL', 'LL_top', 'choseTop',
       'choseLL', 'k', 'optionIndex', 'attributeIndex', 'cluster2', 'cluster3',
       'cluster4', 'trans_type_0', 'trans_type_aLLtLL', 'trans_type_tLLaSS',
       'trans_type_aLLaSS', 'trans_type_aSStSS', 'trans_type_tLLtSS',
       'trans_type_aLLtSS', 'payneIndex'],
      dtype='object')
count    3714.000000
mean        0.031493
std         0.634506
min        -1.000000
25%        -0.312500
50%         0.000000
75%         0.333333
max         1.000000
Name: payneIndex, dtype: float64


Unnamed: 0,run_id,trial_duration_exact,trans_type_aLLtLL,trans_type_aSStSS,trans_type_aLLaSS,trans_type_tLLtSS,payneIndex
34,103,4929.0,4,2,3,2,0.090909
35,103,7106.0,7,4,3,1,0.466667
36,103,6489.0,5,4,3,1,0.384615
37,103,3878.0,1,2,1,1,0.200000
38,103,2967.0,2,2,2,0,0.333333
...,...,...,...,...,...,...,...
3630,2013,1074.0,0,0,0,0,0.000000
3631,2013,974.0,0,0,1,0,-1.000000
3632,2013,1722.0,0,0,0,0,0.000000
3633,2013,1222.0,0,0,1,0,-1.000000


## Aggregate on subject-level

In [None]:
data_subject = merge_by_subject(data_subject, data_trial_choice, 'attributeIndex')
data_subject = merge_by_subject(data_subject, data_trial_choice, 'optionIndex')
data_subject = merge_by_subject(data_subject, data_trial_choice, 'payneIndex')

# Reaction time on subject-level

In [None]:
grouped = data_trial_choice.groupby(['run_id'])['trial_duration_exact'].mean() \
    .reset_index() \
    .rename(columns={'trial_duration_exact': 'choice_rt'})

if 'choice_rt' in data_subject.columns: data_subject = data_subject.drop(columns=['choice_rt'])
data_subject = data_subject.merge(grouped, on='run_id', how='left')
data_subject['choice_rt'].describe()

# Clusters

In [153]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data_trial_choice.loc[
    :, 
    [
       'trans_type_0',
       'trans_type_aLLtLL', 'trans_type_tLLaSS', 'trans_type_aLLaSS',
       'trans_type_aSStSS', 'trans_type_tLLtSS']
    ]
)

In [154]:
def clusters(n_clusters):
    kmeans = KMeans(
        init="random",
        n_clusters=n_clusters,
        n_init=10,
        max_iter=300,
        random_state=42
    )
    kmeans.fit(scaled_features)
            
#     https://realpython.com/k-means-clustering-python/
#     print(kmeans.inertia_)
#     print(kmeans.cluster_centers_)
#     print(kmeans.n_iter_)

    return kmeans.labels_

In [155]:
output = []
for n_cluster in range(2, 5):
    data_trial_choice['cluster' + str(n_cluster)] = clusters(n_cluster)
    X = data_trial_choice[["run_id", "withinTaskIndex", 'cluster' + str(n_cluster)]]
    X_ = sm.add_constant(X)
    y = 1-data_trial_choice[["choseLL"]]  
    log_reg = sm.Logit(y, X_).fit() 
    output.append([n_cluster, log_reg.bic, log_reg.aic]) 

output = pd.DataFrame(output, columns = ['n_clusters', 'BIC', 'AIC']) \
    .set_index('n_clusters')
output

Optimization terminated successfully.
         Current function value: 0.657236
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.657528
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.657625
         Iterations 4


Unnamed: 0_level_0,BIC,AIC
n_clusters,Unnamed: 1_level_1,Unnamed: 2_level_1
2,4914.82594,4889.946481
3,4916.995649,4892.11619
4,4917.71695,4892.837491


# Export data

In [156]:
if not os.path.exists('./data_jupyter'):
    os.mkdir('./data_jupyter')

data_et_choice.to_csv("data_jupyter/data_et_choice.csv", index=False, header=True)
data_trial_choice.to_csv("data_jupyter/data_trial_choice.csv", index=False, header=True)
data_subject.to_csv("data_jupyter/data_subject.csv", index=False, header=True)

MatLab input

In [157]:
if not os.path.exists('./amasino_dataPrep/data_source'):
    os.mkdir('./amasino_dataPrep/data_source')

data_et_choice['fixationCounter'] = 1
data_et_choice.loc[:, 
                       [
                           'run_id', 
                           'withinTaskIndex', 
                           'x', 
                           'y', 
                           't_task'
                       ]
                  ] \
   .to_csv("amasino_dataPrep/data_source/schneegansEtAl_ET.csv", index=False, header=False)

In [158]:
data_trial_choice.loc[:, 
                       [
                           'run_id', 
                           'withinTaskIndex', 
                           'optionIndex', 
                           'attributeIndex', 
                           'payneIndex'
                       ]
                  ] \
    .fillna(0) \
    .to_csv("amasino_dataPrep/intermediateCSVs/ET_indices.csv", index=False, header=False)

In [159]:
data_trial_choice.loc[:, 
                          [
                              'run_id', 
                              'aSS', 
                              'aLL', 
                              'tSS', 
                              'tLL', 
                              'choseLL', 
                              'trial_duration_exact', 
                              'LL_top',
                              'choseTop'
                          ]
                     ] \
    .to_csv("amasino_dataPrep/data_source/schneegansEtAl_behavior.csv", index=False, header=False)

In [160]:
data_subject.loc[:, ['run_id', 'choseLL']] \
    .to_csv("amasino_dataPrep/intermediateCSVs/percLeft.csv", index=False, header=False)

# Feedback

In [161]:
print('Success! Script ran through')

Success! Script ran through
