In [232]:
import os
import csv
import pandas as pd
import numpy as np
import scipy as sp
import pickle
import seaborn as sns

In [233]:
folder = '../2_PAT/'
files = os.listdir(folder)

In [234]:
# Make list of participants
IDs = []
for file in files:
    IDs.append(file.split('_')[1].split('-')[0])

uniqueIDs = list(set(IDs))
uniqueIDs.sort()
N_part = len(uniqueIDs)
print(N_part)

# Save uniqueIDs list as pickle
with open('uniqueIDs.pkl', 'wb') as f:
    pickle.dump(uniqueIDs, f)

170


In [4]:
def create_2df(file_name):
    
    ''' 
    Creates 2 data frames from the inputed log file 
    '''
    
    # Read log file
    with open(file_name, newline = '') as file_:                                                                                          
        file_reader = csv.reader(file_, delimiter='\t')
        text = list(file_reader)
    
    table1_index = text.index(['Subject', 'Trial', 'Event Type', 'Code', 'Time', 'TTime', 'Uncertainty', 'Duration', 'Uncertainty', 'ReqTime', 'ReqDur', 'Stim Type', 'Pair Index'])
    table2_index = text.index(['Event Type', 'Code', 'Type', 'Response', 'RT', 'RT Uncertainty', 'Time', 'Uncertainty', 'Duration', 'Uncertainty', 'ReqTime', 'ReqDur'])
    
    df1 = pd.DataFrame.from_records(text[table1_index+1:table2_index])
    df1.columns = text[table1_index]
    df1.dropna(how='all', inplace=True)

    df2 = pd.DataFrame.from_records(text[table2_index+1::])
    df2.columns = text[table2_index]
    df2.dropna(how='all', inplace=True)
    
    return df1, df2

In [5]:
def clean_df2(df2):
    
    ''' 
    Remove useless rows and columns in df2
    '''
        
    # Rows: Keep only hit/miss and fb trials
    df2new = df2.drop(df2[(df2.Code=='Fix')].index, inplace=False)
    df2new = df2new.drop(df2new[(df2new.Code=='Instruction_Run_1')].index, inplace=False)
    df2new = df2new.drop(df2new[(df2new.Code=='Instruction_Run_2')].index, inplace=False)
    df2new = df2new.drop(df2new[(df2new.Code=='Wait_for_scanner')].index, inplace=False)
    df2new = df2new.drop(df2new[(df2new.Code=='EndText')].index, inplace=False)

    # Columns: Drop unimportant columns
    df2new = df2new.drop(columns=['RT', 'RT Uncertainty', 'Uncertainty', 'ReqTime', 'ReqDur'], inplace=False)
    
    return df2new

In [6]:
def reformat_cues_fb(df2):
    
    ''' 
    Reformat (have cues and fb in columns instead of rows after each other)
    '''
    
    df2_cues = df2[0::2]
    df2_cues.reset_index(inplace=True)

    df2_fb = df2[1::2]
    df2_fb.reset_index(inplace=True)
    
    # Change names to avoid duplicates
    df2_fb.set_axis(['FB index', 'FB Event Type', 'FB Code', 'FB Type', 'FB Response', 'FB Time', 'FB Duration', 'FB Run'], axis=1, inplace=True) 

    df2_new = pd.concat([df2_cues, df2_fb], axis=1)
    
    df2_new.reset_index(inplace=True)
    df2_new.drop('index', axis=1, inplace=True)
    df2_new.drop('level_0', axis=1, inplace=True)
    df2_new.drop('FB Run', axis=1, inplace=True)
    
    # Make new columns
    df2_new['isHit'] = (df2_new['Type']=='hit').astype(int)
    df2_new['FBs'] = [int(fb.strip('FB_no')) for fb in df2_new['FB Code'].tolist()]
    df2_new['rews'] = df2_new['isHit']*df2_new['FBs']
    
    # Create a trial column
    df2_new.insert(0, "Trial", list(range(1,len(df2_new)+1)))
    
    return df2_new

In [7]:
def count_fb_by_cue(df2):
    
    ''' 
    Count the recieved fb split in cue condition
    '''
    
    # Create an empty df with all columns and rows names
    FB_values = ['FB_1','FB_-1','FB_5','FB_-5','FB_no_1','FB_no_-1','FB_no_5','FB_no_-5']
    cue_values = list(sorted(set(df2['Code'])))
    big_df = pd.DataFrame([],columns=cue_values, index=FB_values)

    for cue_value in cue_values:

        # for each cue condition create df with the counts
        small_df = df2[df2['Code']==cue_value]['FB Code'].value_counts().to_frame(cue_value)

        # fill in the df
        big_df = big_df.combine_first(small_df)

    df2_count = big_df.transpose()
    df2_count.reset_index(level=0, inplace=True)
    
    return df2_count

# Reformat data for each user

In [8]:
# Clean and save data for each user
for ID in uniqueIDs: 

    # Creates user folder and file if does not already exist
    user_folder = 'data/user_' + ID + '/'
    isExist = os.path.exists(user_folder)
    if not isExist:
        os.makedirs(user_folder)

        print(ID)

        # Extract data into 2 dfs (for run1 and run2)
        df1_run1, df2_run1 = create_2df(folder + 'MARS_' + ID + '-PA_europ.log')
        df1_run2, df2_run2 = create_2df(folder + 'MARS_' + ID + '-PA_europ1.log') 

        # Focus on df2 becase has hit/miss/FB info

        # Clean (remove useless rows and cols)
        df2_run1_c = clean_df2(df2_run1)
        df2_run2_c = clean_df2(df2_run2)

        # Add run information 
        df2_run1_c['Run'] = 1
        df2_run2_c['Run'] = 2

        # Concatenate
        df2_c = pd.concat([df2_run1_c, df2_run2_c])

        # Reformat (have fb in a column instead of row below)
        df2_cf = reformat_cues_fb(df2_c)

        # Save df2_cf as pickle
        df2_cf.to_pickle(user_folder + 'df2_cf.pkl')

# Concatenate all users

In [90]:
### Counts all and store in ALL COUNTS
all_counts = pd.DataFrame([])

for ID in uniqueIDs: 
    
    # Creates all_users folder and all_counts file if does not already exist
    all_users_folder = 'data/all_users/'
    isExist = os.path.exists(all_users_folder)
    if not isExist: 
        os.makedirs(all_users_folder)

    print(ID)

    # Load df2_cf 
    user_folder = 'data/user_' + ID + '/'
    df2_cf = pd.read_pickle(user_folder + 'df2_cf.pkl')

    ### ALL COUNTS
    # Count fb for each cue
    df2_count = count_fb_by_cue(df2_cf)

    # Append each participant df in a big df (=all_counts)
    tmp=df2_count.fillna(0)
    tmp.insert(0,'ID',ID)
    all_counts = pd.concat([all_counts, tmp])

all_counts.rename(columns={"index": "Cue"}, inplace=True)
all_counts.reset_index(inplace=True)
all_counts.drop('index', axis=1, inplace=True)

# Concatenante hit and miss to have reward columns
for rew in ['-5', '-1', '1', '5']:
    all_counts['R_' + rew] = all_counts['FB_' + rew]+all_counts['FB_no_' + rew]

# Save all_counts as pickle
all_counts.to_pickle(all_users_folder + 'all_counts.pkl')

001
003
006
007
008
009
010
012
014
023
028
032
033
034
035
036
038
044
045
049
050
054
055
057
058
060
072
073
075
076
077
078
080
084
085
086
088
090
093
096
100
101
103
104
106
107
108
109
112
114
115
118
119
120
121
122
130
132
133
136
137
138
142
143
145
147
149
151
155
158
159
165
170
172
173
175
176
177
178
181
185
186
187
189
199
204
207
208
211
213
217
221
229
230
236
237
239
240
244
251
252
258
259
263
269
271
280
282
283
284
286
287
291
292
293
301
302
303
306
313
314
315
319
321
322
325
326
327
329
335
336
337
339
341
345
349
351
360
361
362
368
375
376
381
384
390
391
393
395
397
400
405
406
412
413
414
421
422
423
427
437
438
440
446
450
453
462
469
470
471


In [89]:
all_counts

Unnamed: 0,ID,Cue,FB_-1,FB_-5,FB_1,FB_5,FB_no_-1,FB_no_-5,FB_no_1,FB_no_5
0,001,Cue_HP,0.0,0.0,0.0,0.0,6.0,14.0,4.0,4.0
1,001,Cue_HR,2.0,3.0,1.0,5.0,2.0,1.0,5.0,9.0
2,001,Cue_LP,0.0,1.0,0.0,0.0,10.0,9.0,4.0,4.0
3,001,Cue_LR,2.0,3.0,7.0,5.0,2.0,1.0,3.0,5.0
4,003,Cue_HP,3.0,3.0,1.0,1.0,3.0,11.0,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...
675,470,Cue_LR,4.0,4.0,10.0,10.0,0.0,0.0,0.0,0.0
676,471,Cue_HP,0.0,2.0,0.0,0.0,6.0,12.0,4.0,4.0
677,471,Cue_HR,4.0,3.0,3.0,7.0,0.0,1.0,3.0,7.0
678,471,Cue_LP,5.0,6.0,2.0,1.0,5.0,4.0,2.0,3.0


# Hits by runs

In [88]:
### Counts all and store in ALL COUNTS (by run)
all_counts_R = pd.DataFrame([])

df2_count_Rindiv = {}

for ID in uniqueIDs: 

    print(ID)

    # Load df2_cf 
    user_folder = 'data/user_' + ID + '/'
    df2_cf = pd.read_pickle(user_folder + 'df2_cf.pkl')
    
    ### ALL COUNTS by run
    N_runs = 2
    for run_no in range(1, N_runs+1):
        df2_count_Rindiv[str(run_no)] = count_fb_by_cue(df2_cf[df2_cf['Run']==run_no]).fillna(0)
        df2_count_Rindiv[str(run_no)].insert(0, 'Run', run_no)
        df2_count_Rindiv[str(run_no)].insert(0, 'ID', ID)
    
    # Concat
    tmp = pd.concat([df for df in df2_count_Rindiv.values()])
    all_counts_R = pd.concat([all_counts_R, tmp])
    
all_counts_R.rename(columns={"index": "Cue"}, inplace=True)
all_counts_R.reset_index(inplace=True)
all_counts_R.drop('index', axis=1, inplace=True)

# Concatenante hit and miss to have reward columns
for rew in ['-5', '-1', '1', '5']:
    all_counts_R['R_' + rew] = all_counts_R['FB_' + rew]+all_counts_R['FB_no_' + rew]

# Save all_counts_R as pickle
all_counts_R.to_pickle(all_users_folder + 'all_counts_R.pkl')

001
003
006
007
008
009
010
012
014
023
028
032
033
034
035
036
038
044
045
049
050
054
055
057
058
060
072
073
075
076
077
078
080
084
085
086
088
090
093
096
100
101
103
104
106
107
108
109
112
114
115
118
119
120
121
122
130
132
133
136
137
138
142
143
145
147
149
151
155
158
159
165
170
172
173
175
176
177
178
181
185
186
187
189
199
204
207
208
211
213
217
221
229
230
236
237
239
240
244
251
252
258
259
263
269
271
280
282
283
284
286
287
291
292
293
301
302
303
306
313
314
315
319
321
322
325
326
327
329
335
336
337
339
341
345
349
351
360
361
362
368
375
376
381
384
390
391
393
395
397
400
405
406
412
413
414
421
422
423
427
437
438
440
446
450
453
462
469
470
471


# Split by half runs (= blocks)

In [None]:
Nblocks_per_run = 4

all_counts_B = pd.DataFrame([])
df2_count_Bindiv = {}

for ID in uniqueIDs: 

    # Load df2_cf 
    user_folder = 'data/user_' + ID + '/'
    df2_cf = pd.read_pickle(user_folder + 'df2_cf.pkl')

    Nruns = 2
    trials_per_run = {}

    for run_no in range(1,Nruns+1):

        # extract run data
        df2_cf_R = df2_cf[df2_cf['Run']==run_no]

        # compute Ntrials
        Ntrials = len(df2_cf_R)

        # Compute block size
        if Ntrials%Nblocks_per_run != 0:
            print('PROBLEM: Nblocks_per_run has to divide Ntrials')
        else:
            block_size = int(len(df2_cf_R)/Nblocks_per_run)

        # Split trials in groups
        trials = df2_cf_R['Trial'].tolist()
        trials_per_run[str(run_no)] = [trials[i:i + block_size] for i in range(0, len(trials), block_size)]

    # Make list of trials (concatenate blocks)
    lst_trials = []
    for trials_per_R in trials_per_run.values():
        for trials in trials_per_R:
            lst_trials.append(trials)

    # Compute fb_by_cue per block
    df2_count_Rindiv = {}
    for i, trials in enumerate(lst_trials):
        df2_count_Bindiv[i] = count_fb_by_cue(df2_cf[df2_cf['Trial'].isin(trials)]).fillna(0)
        df2_count_Bindiv[i].insert(0, 'Block', i+1)
        df2_count_Bindiv[i].insert(0, 'ID', ID)

    # Concat
    tmp = pd.concat([df for df in df2_count_Bindiv.values()])
    all_counts_B = pd.concat([all_counts_B, tmp])
    
all_counts_B.rename(columns={"index": "Cue"}, inplace=True)
all_counts_B.reset_index(inplace=True)
all_counts_B.drop('index', axis=1, inplace=True)

# Concatenante hit and miss to have reward columns
for rew in ['-5', '-1', '1', '5']:
    all_counts_B['R_' + rew] = all_counts_B['FB_' + rew] + all_counts_B['FB_no_' + rew]
    
# Save all_counts_B as pickle
all_counts_B.to_pickle(all_users_folder + 'all_counts_B.pkl')