# Load packages

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

# Define Functions

## Function for preprocessing the gainloss data

In [2]:
def preprocess_gainloss(df):
    
    #drop columns Unnamed, id, AID, HID, session_id, practice, loss_or_reward, instruction_number, est_left_over_right
    df = df.drop(['Unnamed: 0', 'id', 'AID', 'HID', 'session_id', 'practice', 'loss_or_reward', 'instruct_number', 'est_left_over_right', 'prop_left', 'prop_right'], axis = 1)
    #calculate revealed prop_left and prop_right from revealed tokens
    df['prop_left'] = df['revealed_x_l']/(df['revealed_x_l'] + df['revealed_o_l'])
    df['prop_right'] = df['revealed_x_r']/(df['revealed_x_r'] + df['revealed_o_r'])
    #add column with percentage revealed in ambiguous urn (=1 when both urns are unambiguous) and calculate how many tokens were presented in ambigupus urn (info_ambi) + the sqrt transformation (P)
    df['revealed_ambi'] =df[['revealed_left','revealed_right']].min(axis = 1)
    df['info'] = df['revealed_ambi']*50
    df['P'] = np.sqrt(df['info'])
    #indicate whether trial is gain or loss
    df['gain'] = (df['mag_left']>0)
    #add sections 
    df['section'] = df['trial_number']
    df.loc[df['section'] < 51, 'section'] = 1 
    df.loc[df['section'] > 100, 'section'] = 3 
    df.loc[df['section'] > 3, 'section'] = 2
    
    #gainloss_df['sections'] = gainloss_df.loc[gainloss_df['trial_number'] > 50 & gainloss_df['trial_number'] < 101, 'sections'] = 2 

    return(df)

## Functions to calculate no-brainer performance for each subject


create subset with unambiguous trials for no brainer analysis

In [3]:
def drop_ambi_trials(df):
    df = df[df.revealed_ambi == 1]
    return(df)

create variables indicating whether left or right was the better option

In [4]:
def better_choice_gainloss(df):

    index = df.index
    left_better = []
    right_better = []

    for i in index:

        if df['gain'][i].all() == True:
            lb = (df['prop_left'][i]>df['prop_right'][i]) & (df['mag_left'][i]>df['mag_right'][i])
            rb = (df['prop_left'][i]<df['prop_right'][i]) & (df['mag_left'][i]<df['mag_right'][i])
    
        elif df['gain'][i].all() == False:
            lb = (df['prop_left'][i]<df['prop_right'][i]) & (df['mag_left'][i]>df['mag_right'][i])
            rb = (df['prop_left'][i]>df['prop_right'][i]) & (df['mag_left'][i]<df['mag_right'][i])
        
        left_better.append(lb)
        right_better.append(rb)
        
    df['left_better']=left_better
    df['right_better']=right_better   
    return(df)

indicate whether the better box was chosen

In [5]:
def right_choice(df):
    df['choseBetter'] = (df['resp'] == 'left') & (df['left_better']== True) | (df['resp'] == 'right') & (df['right_better']==True)
    return(df)

only keep trials that are 'no brainers'

In [6]:
def keep_nobrainers(df):
    df['noBrainer'] = (df['right_better'] != df['left_better'])
    df = df[df.noBrainer == True]
    return(df)

calculate performance

In [7]:
def vp_perf(df):
    df = df['choseBetter'].mean()
    return(df)

In [8]:

#df['choseBetter'].groupby('sections').mean().add_prefix('mean_')





# Load and preprocess the online data and check no brainer trials

In [9]:
df_list = []
vp_perform_gainloss_list = []
vp_nb_gainloss_list = []
vp_list = ['06', '07', '10', '12', '13', '15', '16', '17', '18', '19', '20', '22', '23_2', '25_2', '26_2', '27_2', '28_2', '29', '30']
for vp in vp_list:
    path = os.path.join(os.getcwd(),'..','data','data_gainloss_logfiles','vp' + vp + '_gainloss_processed.csv')
    df = pd.read_csv(path, sep=",")
    #preprocess data
    df=preprocess_gainloss(df)
    #store prepocessed data in list that contains data for all subjects (for later analysis)
    df_list.append(df)
    #create subset with unambiguous trials for no brainer analysis
    df = drop_ambi_trials(df)
    #create variables indicating whether left or right was the better option
    better_choice_gainloss(df)
    #add whether the better box was chosen
    right_choice(df)
    #only keep trials that are 'no brainers'
    df = keep_nobrainers(df)
    #calculate performance
    vp_perform_gainloss = ['vp' + vp, vp_perf(df)]
    #store each vp performance in list
    vp_perform_gainloss_list.append(vp_perform_gainloss)
    #vp performance sectionwise
    vp_nb_gainloss = df.groupby('section').mean().add_prefix('gainloss_')[['gainloss_choseBetter']]
    vp_nb_gainloss['MID'] = 'vp'+ vp
    vp_nb_gainloss_list.append(vp_nb_gainloss)
    
#Merge dataframe list to single dataframe. "inner": Just take columns which exist in all dataframes    
gainloss_df = pd.concat(df_list, ignore_index = True, join = 'inner')  
#vp_gainloss_perf = pd.concat(vp_performance_list, ignore_index = True, join = 'inner')
    


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [10]:
vp_nb_gainloss_list

[         gainloss_choseBetter   MID
 section                            
 1                    0.833333  vp06
 2                    1.000000  vp06
 3                    0.960000  vp06,          gainloss_choseBetter   MID
 section                            
 1                    1.000000  vp07
 2                    0.666667  vp07
 3                    1.000000  vp07,          gainloss_choseBetter   MID
 section                            
 1                         1.0  vp10
 2                         1.0  vp10
 3                         1.0  vp10,          gainloss_choseBetter   MID
 section                            
 1                        1.00  vp12
 2                        1.00  vp12
 3                        0.92  vp12,          gainloss_choseBetter   MID
 section                            
 1                        1.00  vp13
 2                        1.00  vp13
 3                        0.88  vp13,          gainloss_choseBetter   MID
 section                            
 

In [11]:
section1 = df[df.section == 1]
section2 = df[df.section == 2]
section3 = df[df.section == 3]



In [12]:
len(section3)


36

# Functions for shock data
## preprocessing

In [13]:
def preprocess_shock(df):
    
    #drop columns Unnamed, id, AID, HID, session_id, practice, loss_or_reward, instruction_number, est_left_over_right
    df = df.drop(['br', 'UrnsPresented_duration', 'ChoiceTime_duration', 'ChoiceDisplayed_duration', 'OutcomeDisplayed_duration',
                  'ITI_duration', 'OutcomeHistoryDisplayed_duration', 'ShockOutcomeDisplayed_duration', 'ExtraITI_duration',
                  'time_urns_presented', 'time_participant_choice_presented', 'token_chosen_presented_time', 'shock_time',
                  'resultpicture_time', 'time_ITI_begin', 'time_Extra_ITI_begin', 'choicetime', 'computerchoice_outcome',
                  'numberofshocks', 'outcome_chosen', 'numberbin1', 'numberbin2', 'numberbin3', 'numberbin0', 'outcome_intoarray',
                  'breaktime', 'length_break', 'FIRST_ITI_start', 'ITI_start', 'UrnsPresented_start',
                  'QuestionMark_start', 'ButtonPress_start', 'ChoiceDisplayed_start', 'Outcome_start',
                  'OutcomeHistoryDisplayed_start', 'ShockOutcomeDisplay_start', 'Shock_start', 'ExtraITI_start', 
                  'Trial_starttime', 'p_left', 'p_right'], axis=1)
    
    #### MATCH TO GAIN/LOSS ####
    
    #rename variable names to match gain/loss
    df.rename(columns={'pr': 'revealed_right', 'pr_left': 'revealed_left', 'magnitude_left': 'mag_left', 'magnitude_right': 'mag_right', 'time_button_press': 'reaction_time', 'participantsbet': 'resp', 'outcome': 'mag_outcome', 'trialnumber': 'trial_number', 'result_given1in10': 'five_trials_outcome'}, inplace=True)
    #rename values to match gain/loss
    df['resp'] = df['resp'].map({'bet_left': 'left', 'bet_right': 'right'})
    df['outcome'] = df['mag_outcome']
    df.loc[df['outcome'] > 0, 'outcome'] = 'X' 
    df.loc[df['outcome'] == 0, 'outcome'] = 'O'
    #add variable revealed_x_right etc from colors
    
    #calculate revealed prop_left and prop_right from revealed tokens
    df['prop_left'] = df['revealed_x_l']/(df['revealed_x_l'] + df['revealed_o_l'])
    df['prop_right'] = df['revealed_x_r']/(df['revealed_x_r'] + df['revealed_o_r'])
    #add column with percentage revealed in ambiguous urn (=1 when both urns are unambiguous) and calculate how many tokens were presented in ambigupus urn (info_ambi) + the sqrt transformation (P)
    df['revealed_ambi'] = df[['revealed_left','revealed_right']].min(axis = 1)
    df['info'] = df['revealed_ambi']*50
    df['P'] = np.sqrt(df['info'])
    #indicate whether trial is shock
    df['shock'] = (df['mag_left']>0)

    return(df)

## read in colors with first 50 elements of a line representing the tokens in right box, last 50 elements of left box

In [14]:
def addTokens(df):
    
    path = os.path.join(os.getcwd(),'..','data','data_shock_mscl','colours_Behaviour_Analysis.txt')
    tokens_df = pd.read_csv(path, sep=",", skiprows=[0], header=None)
    tokens_df[0] = tokens_df[0].str.replace('{','')
    tokens_df[99] = tokens_df[99].str.replace('}', '')
    tokens_df = tokens_df.drop(tokens_df.columns[100], axis=1)
    tokens_df = tokens_df.astype('int64')
    freq_right = tokens_df.iloc[:, :50].apply(pd.value_counts, axis=1).fillna(0)
    freq_left = tokens_df.iloc[:, 50:].apply(pd.value_counts, axis=1).fillna(0)
    
    df['revealed_x_r'] = freq_right.loc[:, 0]
    df['revealed_o_r'] = freq_right.loc[:, 1]

    df['revealed_x_l'] = freq_left.loc[:, 0]
    df['revealed_o_l'] = freq_left.loc[:, 1]
    
    return(df)

In [None]:
def better_choice_shock(df):
    
    lb = (df['prop_left']<df['prop_right']) & (df['mag_left']<df['mag_right'])
    rb = (df['prop_left']>df['prop_right']) & (df['mag_left']>df['mag_right'])
    
    #left_better.append(lb)
    #right_better.append(rb)
        
    df['left_better']=lb
    df['right_better']=rb
    
    return(df)

# Load the shock data
- one dataframe per subject
- match column names to match the gain/loss names.

- read in coloursblabl_9_14_17.txt, count 0, 1, 2 and first 50 right box, last 50 are left box, check if 0 correspond to Os
    - read file line by line
    -with file.txt
    -readline

In [None]:
df_list = []
vp_list = ['06', '07', '10', '12', '13', '15', '16', '17', '18', '19', '20', '22', '23', '25', '26', '27', '28', '29', '30']
vp_perform_shock_list = []
vp_nb_shock_list = []
section_list = ['1', '2', '3']
for vp in vp_list:
    df = []
    for sec in section_list:
        path = os.path.join(os.getcwd(),'..','data','data_behavioral','Expt1Pain_Behaviour_vp' + vp + '_' + sec + '.txt')
        df_dummy = pd.read_csv(path, sep="\t", skiprows = [0])
        df_dummy['MID'] = 'vp'+ vp
        df_dummy['section'] = sec
        df_dummy.columns = df_dummy.columns.str.replace(' ','')
        df.append(df_dummy)
    #create a df that contains data from all sections    
    df = pd.concat(df, ignore_index = True, join = 'inner')
    df = addTokens(df)
    #preprocess shock data
    df = preprocess_shock(df)
    #store prepocessed data in list that contains data for all subjects (for later analysis)
    df_list.append(df)
    #create subset with unambiguous trials for no brainer analysis
    df = drop_ambi_trials(df)
    #create variables indicating whether left or right was the better option
    df = better_choice_shock(df)
    #add whether the better box was chosen
    df = right_choice(df)
    #only keep trials that are 'no brainers'
    df = keep_nobrainers(df)
    #calculate performance
    vp_perform_shock = ['vp' + vp, vp_perf(df)]
    #store each vp performance in list
    vp_perform_shock_list.append(vp_perform_shock)
    #vp performance sectionwise
    vp_nb_shock = df.groupby('section').mean().add_prefix('shock_')[['shock_choseBetter']]
    vp_nb_shock['MID'] = 'vp'+ vp
    vp_nb_shock_list.append(vp_nb_shock)

#create complete df for shock condition with all vps        
shock_df = pd.concat(df_list, ignore_index = True, join = 'inner')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [None]:
vp_perform_gainloss_list

In [None]:
vp_nb_gainloss_list

In [None]:
vp_perform_shock_list

In [None]:
vp_nb_shock_list

In [None]:
#section1 = df[df.section == '1']
#section2 = df[df.section == '2']
#section3 = df[df.section == '3']

In [None]:
#df[['mag_left', 'mag_right', 'resp', 'prop_left', 'prop_right', 'left_better', 'right_better', 'choseBetter']]