For the RAID study, this script will return, for each participant, choice proportions for:
* p(risk)
* p(ambig)
* p(ambig_corr)
* p(risk_25)
* p(risk_50)
* p(risk_75)
* p(amb_24)
* p(amb_50)
* p(amb_74)
* p(amb_24_corr)
* p(amb_50_corr)
* p(amb_74_corr)

Under:
* gains
* losses
* combined

Necessary inputs are the MATLAB data files for each participant, e.g. for `subject 10`:
* `RA_GAINS_10.mat` that has all the data from 2 gain blocks on day 1 and 2 gain blocks on day 2
* `RA_LOSS_10.mat` that has all the data from 2 loss blocks on day 1 and 2 loss blocks on day 2

These files for all participants should be in the **same** directory, referred to as `data_behav_root` in the script

In [1]:
import pandas as pd
import numpy as np
import scipy.io as spio
from glob import glob
from scipy import stats
import datetime
import time
import os

In [2]:
# directory containing MATLAB data files
data_behav_root = 'D:\\Chelsea\\Projects_in_the_lab\\RAID\\behavioral'

# output directory for saving your dataset
out_root = 'D:\\Chelsea\\Projects_in_the_lab\\RAID\\output\\files_for_megan'

# subjects for analysis
sub_num = [11,12,13,15,16,17,19,20,21,22,24,25,27,28,29,30,31,32,36,39,40,41,42,43,45,46,47,48,50,51,55,56,57,61,62]

In [3]:
def _todict(matobj):
    '''
    Author: Or
    
    A recursive function which constructs from matobjects nested dictionaries
    '''
    dict = {}
    for strg in matobj._fieldnames:
        elem = matobj.__dict__[strg]
        if isinstance(elem, spio.matlab.mio5_params.mat_struct):
            dict[strg] = _todict(elem)
        else:
            dict[strg] = elem
    return dict

def _check_keys(dict):
    '''
    Author: Or
    
    checks if entries in dictionary are mat-objects. If yes todict is called to change them to nested dictionaries
    '''
    for key in dict:
        if isinstance(dict[key], spio.matlab.mio5_params.mat_struct):
            dict[key] = _todict(dict[key])
    return dict 

def loadmat(filename):
    '''
    Author: Or
    
    this function should be called instead of direct spio.loadmat
    as it cures the problem of not properly recovering python dictionaries
    from mat files. It calls the function check keys to cure all entries
    which are still mat-objects
    
    from: `StackOverflow <http://stackoverflow.com/questions/7008608/scipy-io-loadmat-nested-structures-i-e-dictionaries>`_
    '''
    data = spio.loadmat(filename, struct_as_record=False, squeeze_me=True)
    return _check_keys(data)

In [4]:
def readConditions(subNum, domain, matFile):
    """ read conditions
    
    Parameters
    -------------
    subNum: subject id
    domain: domain name, 'gains' or 'loss'
    matFile: filename
    
    Return
    -------------
    events
    """
    metaData = loadmat(matFile)       
    data_keyname = list(metaData.keys())[3]
    
    proportions = []
    p_risk = []
    p_ambig = []
    p_ambig_corr = []
    risk_25 = []
    risk_50 = []
    risk_75 = []
    amb_24 = []
    amb_50 = []
    amb_74 = []
    amb_24_corr = []
    amb_50_corr = []
    amb_74_corr = []
   
    ambigs = metaData[data_keyname]['ambigs']
    probs = metaData[data_keyname]['probs']
    if domain == 'gains':
        vals = metaData[data_keyname]['vals']
    else:
        vals = -1*metaData[data_keyname]['vals']
    choice = metaData[data_keyname]['choice']
    refside = metaData[data_keyname]['refSide']
    
    # calculate response from choice and refside
    resp = np.ones(choice.shape) # 1-choose lottery
    resp[choice == refside] = 0 # 0-choose reference
    resp[choice == 0] = 2 # 2-no response
    
    vals = np.delete(vals, np.where(resp==2))
    ambigs = np.delete(ambigs, np.where(resp==2))
    probs = np.delete(probs, np.where(resp==2))
    new_array = np.delete(resp, np.where(resp == 2))
    
    risk_25 = np.mean(new_array[(vals !=5) & (ambigs ==0) & (probs ==0.25)])
    risk_50 = np.mean(new_array[(vals !=5) & (ambigs ==0) & (probs ==0.5)])
    risk_75 = np.mean(new_array[(vals !=5) & (ambigs ==0) & (probs ==0.75)])
    
    amb_24 = np.mean(new_array[(vals !=5) & (ambigs ==0.24)])
    amb_50 = np.mean(new_array[(vals !=5) & (ambigs ==0.5)])
    amb_74 = np.mean(new_array[(vals !=5) & (ambigs ==0.74)])
    
    amb_24_corr = np.mean(new_array[(vals !=5) & (ambigs ==0.24)]) - np.mean(new_array[(vals !=5) & (ambigs ==0) & (probs ==0.5)])
    amb_50_corr = np.mean(new_array[(vals !=5) & (ambigs ==0.5)]) - np.mean(new_array[(vals !=5) & (ambigs ==0) & (probs ==0.5)])
    amb_74_corr = np.mean(new_array[(vals !=5) & (ambigs ==0.74)]) - np.mean(new_array[(vals !=5) & (ambigs ==0) & (probs ==0.5)])
    
    p_risk = np.mean(new_array[(vals !=5) & (ambigs ==0)])
    p_ambig = np.mean(new_array[(vals !=5) & (ambigs !=0)])
    p_ambig_corr = np.mean(new_array[(vals !=5) & (ambigs !=0)]) - np.mean(new_array[(vals !=5) & (ambigs ==0) & (probs ==0.5)])
    
    events= pd.DataFrame({'sub':subNum,
                          'p(risk)':p_risk, 'p(ambig)':p_ambig, 'p(ambig_corr)':p_ambig_corr, 
                          'p(risk_25)': risk_25, 
                          'p(risk_50)': risk_50, 
                          'p(risk_75)': risk_75, 
                          'p(amb_24)': amb_24,
                          'p(amb_50)': amb_50,
                          'p(amb_74)': amb_74,
                          'p(amb_24_corr)': amb_24_corr,
                          'p(amb_50_corr)': amb_50_corr,
                          'p(amb_74_corr)': amb_74_corr}, index = [0]) # building data frame from what we took. Removing first row because its not used. 
    
    return events

In [5]:
def organizeBlocks(subNum):

    mat_loss_name = os.path.join(data_behav_root, 'RA_LOSS_%s.mat' %subNum)    
    mat_gain_name = os.path.join(data_behav_root, 'RA_GAINS_%s.mat' %subNum)    
    
    metaDataLoss = loadmat(mat_loss_name)
    data_loss_keyname = list(metaDataLoss.keys())[3]
    metaDataGain = loadmat(mat_gain_name)
    data_gain_keyname = list(metaDataGain.keys())[3]
    
    totalEvent_gain = readConditions(subNum, 'gains', mat_gain_name)
    totalEvent_loss = readConditions(subNum, 'loss', mat_loss_name)
    
    return totalEvent_gain, totalEvent_loss

In [6]:
# the participants you want to analyze
sub_num = [11,12,13,15,16,17,19,20,21,22,24,25,27,28,29,30,31,32,36,39,40,41,42,43,45,46,47,48,50,51,55,56,57,61,62]

In [7]:
for sub_id in sub_num:
    sub_gain, sub_loss = organizeBlocks(sub_id)
    sub_total = pd.DataFrame((sub_gain.append(sub_loss)).mean(axis=0)).transpose()
    
    if sub_id ==sub_num[0]:
        total_gain = pd.DataFrame(sub_gain)
        total_loss = pd.DataFrame(sub_loss)
        total_both = pd.DataFrame(sub_total)
    else:
        total_gain = total_gain.append(pd.DataFrame(sub_gain))
        total_loss = total_loss.append(pd.DataFrame(sub_loss))
        total_both = total_both.append(pd.DataFrame(sub_total))
    
pd.DataFrame(total_gain).to_excel(os.path.join(out_root, 'task_proportions_gains_test.xlsx'),
                                     index = False) 
pd.DataFrame(total_loss).to_excel(os.path.join(out_root, 'task_proportions_losses_test.xlsx'),
                                     index = False) 
pd.DataFrame(total_both).to_excel(os.path.join(out_root, 'task_proportions_both_test.xlsx'),
                                     index = False) 