In [3]:
import pandas as pd
from pathlib import Path

In [4]:
BASE_DIR = '../../data/rawdata/'

In [None]:
def check_correct(row):
    if row['big_small'] == 'S' and row['opt'] == 4:
        return True
    elif row['big_small'] == 'B' and row['opt'] == 3:
        return True
    return False

def read_bh(file):
    '''Auxiliary function to read the data from the behavioural file
    in the specified format and with meaningful column names.'''
    df = pd.read_csv(file, header=None,
        names=['o', 'ab', 'show_times', 'stim', 'PER', 'big_small', 'nl', 'onset', 'opt', 'response_time']).drop(
        columns=['o', 'PER']
    )
    df['opt'] = df['opt'].astype('Int64')

    df['correct'] = df[df['opt'].notna()].apply(check_correct, axis=1)
    return df

def add_consecutive(df):
    df['sequence'] = df.sort_values(by='onset').groupby('stim').cumcount() + 1
    return df

def get_trials_condition(df):
    '''Auxiliary function to get the number of trials and unique stimuli
    in the dataset, as well as the number of trials for each condition.'''
    dummy = df[(df['opt'].isna()) | (df['correct'] == False)]
    p11 = df[(df['show_times'] == 1) & (df['correct'] == True)]
    p31 = df[(df['show_times'] == 3) & (df['correct'] == True) & (df['sequence'] == 1)]
    p32 = df[(df['show_times'] == 3) & (df['correct'] == True) & (df['sequence'] == 2)]
    p33 = df[(df['show_times'] == 3) & (df['correct'] == True) & (df['sequence'] == 3)]

    print('Number of trials:', len(df))
    print('Number of unique stimuli:', len(df['stim'].unique()))
    print('------')
    print(f'                Number of 1-show trials: {len(p11)}')
    print(f'Number of 3-show trials with sequence 1: {len(p31)}')
    print(f'Number of 3-show trials with sequence 2: {len(p32)}')
    print(f'Number of 3-show trials with sequence 3: {len(p33)}')
    print(f'                 Number of dummy trials: {len(dummy)}')
    
    return {
        'p11': p11,
        'p31': p31,
        'p32': p32,
        'p33': p33,
        'dummy': dummy
    }

In [17]:
def get_ps_from_file(file):
    '''Auxiliary function to read the data from the behavioural file
    and add the sequence column to it.'''
    print('*'*50)
    print(f'Processing file {file}')
    df = read_bh(file)
    df = add_consecutive(df)
    return get_trials_condition(df)

In [18]:
# Get all subjects' directories
base_path = Path(BASE_DIR)
subjects = sorted([d for d in base_path.iterdir() if d.is_dir()])
data = {}

# Load all behaviour files
for subject in subjects:
    # Behaviour files are CSVs in the beh directory
    beh_files = sorted(subject.glob('beh/*.csv'))
    csvs = {}
    # Each subject should have 2 behaviour files
    if len(beh_files) != 2:
        print(f'Error: {subject.name} does not have 2 behaviour files')
        continue
    # Load the CSVs with pandas.read_csv
    csvs['rep1'] = get_ps_from_file(beh_files[0])
    csvs['rep2'] = get_ps_from_file(beh_files[1])
    data[subject.name] = csvs

**************************************************
Processing file ../../data/rawdata/o01/beh/o01-1.csv
Number of trials: 48
Number of unique stimuli: 24
------
                Number of 1-show trials: 10
Number of 3-show trials with sequence 1: 7
Number of 3-show trials with sequence 2: 7
Number of 3-show trials with sequence 3: 10
                 Number of dummy trials: 14
**************************************************
Processing file ../../data/rawdata/o01/beh/o01-2.csv
Number of trials: 48
Number of unique stimuli: 24
------
                Number of 1-show trials: 10
Number of 3-show trials with sequence 1: 10
Number of 3-show trials with sequence 2: 9
Number of 3-show trials with sequence 3: 9
                 Number of dummy trials: 10
**************************************************
Processing file ../../data/rawdata/o02/beh/o02-1.csv
Number of trials: 48
Number of unique stimuli: 24
------
                Number of 1-show trials: 11
Number of 3-show trials with sequence

In [19]:
print(data.keys())

dict_keys(['o01', 'o02', 'o03', 'o04', 'o05', 'o06', 'o07', 'o08', 'o09', 'o10', 'o11', 'o12', 'o13', 'o14', 'o15', 'o16', 'o17', 'o18', 'y01', 'y02', 'y03', 'y04', 'y05', 'y06', 'y07', 'y08', 'y09', 'y10', 'y11', 'y12', 'y13', 'y14', 'y15', 'y16', 'y17', 'y18', 'y19'])


In [20]:
data['y01']['rep1']

{'p11':    ab  show_times                   stim big_small  nl   onset  opt  \
 2   D           1    stim/flashlight.bmp         S  NL   35988    4   
 11  D           1  stim/sequoia_tree.bmp         B   L   82988    3   
 15  D           1        stim/basket.bmp         B  NL   98988    3   
 21  D           1       stim/blender.bmp         B  NL  116988    3   
 28  D           1          stim/bear.bmp         B   L  141988    3   
 31  D           1     stim/saxophone.bmp         B  NL  166988    3   
 33  D           1          stim/dove.bmp         S   L  188988    4   
 34  D           1          stim/yarn.bmp         S  NL  199988    4   
 36  D           1       stim/grappes.bmp         S   L  221988    4   
 37  D           1     stim/raspberry.bmp         S   L  224988    4   
 39  D           1           stim/dog.bmp         B   L  230989    3   
 47  D           1        stim/camera.bmp         S  NL  262988    4   
 
     response_time  correct  sequence  
 2             

In [39]:
data['y01']['rep1']['dummy']

Unnamed: 0,ab,show_times,stim,big_small,nl,onset,opt,response_time,correct,sequence
5,A,3,stim/penguin.bmp,B,L,44988,4,811,False,1
12,A,3,stim/envelope.bmp,S,NL,89988,3,860,False,1
27,A,3,stim/toilett.bmp,B,NL,134988,4,590,False,2


## Analysis intra-rep

In [52]:
rep_stats_df = pd.DataFrame(columns=[
    'subject',
    'rep',
    'p31',
    'p32',
    'p33',
    'p31_mean_rt',
    'p32_mean_rt',
    'p33_mean_rt',
    'p31_std_rt',
    'p32_std_rt',
    'p33_std_rt',
    'dummy_p3_',
    'mean_rt',
    'std_rt',
    'mean_acc',
    'std_acc'
    ])

In [53]:
for subject in data.keys():
    new_rows = []
    new_rows.append({
        'subject': subject,
        'rep': 'rep1',
        'p31': len(data[subject]['rep1']['p31']),
        'p32': len(data[subject]['rep1']['p32']),
        'p33': len(data[subject]['rep1']['p33']),
        'p31_mean_rt': data[subject]['rep1']['p31']['response_time'].mean(),
        'p32_mean_rt': data[subject]['rep1']['p32']['response_time'].mean(),
        'p33_mean_rt': data[subject]['rep1']['p33']['response_time'].mean(),
        'p31_std_rt': data[subject]['rep1']['p31']['response_time'].std(),
        'p32_std_rt': data[subject]['rep1']['p32']['response_time'].std(),
        'p33_std_rt': data[subject]['rep1']['p33']['response_time'].std(),
        'dummy_p3_': len(data[subject]['rep1']['dummy'][data[subject]['rep1']['dummy']['show_times'] == 3]),
        'mean_acc': 0,
        'std_acc': 0
    })
    new_rows.append({
        'subject': subject,
        'rep': 'rep2',
        'p31': len(data[subject]['rep2']['p31']),
        'p32': len(data[subject]['rep2']['p32']),
        'p33': len(data[subject]['rep2']['p33']),
        'p31_mean_rt': data[subject]['rep2']['p31']['response_time'].mean(),
        'p32_mean_rt': data[subject]['rep2']['p32']['response_time'].mean(),
        'p33_mean_rt': data[subject]['rep2']['p33']['response_time'].mean(),
        'p31_std_rt': data[subject]['rep2']['p31']['response_time'].std(),
        'p32_std_rt': data[subject]['rep2']['p32']['response_time'].std(),
        'p33_std_rt': data[subject]['rep2']['p33']['response_time'].std(),
        'dummy_p3_': len(data[subject]['rep2']['dummy'][data[subject]['rep2']['dummy']['show_times'] == 3]),
        'mean_rt': data[subject]['rep2']['p31']['response_time'].mean(),
        'std_rt': data[subject]['rep2']['p31']['response_time'].std(),
        'mean_acc': 0,
        'std_acc': 0
    })

    rep_stats_df = pd.concat([rep_stats_df, pd.DataFrame(new_rows)])

  rep_stats_df = pd.concat([rep_stats_df, pd.DataFrame(new_rows)])


In [54]:
rep_stats_df

Unnamed: 0,subject,rep,p31,p32,p33,p31_mean_rt,p32_mean_rt,p33_mean_rt,p31_std_rt,p32_std_rt,p33_std_rt,dummy_p3_,mean_rt,std_rt,mean_acc,std_acc
0,o01,rep1,7,7,10,920.571429,803.285714,860.800000,140.580768,153.969385,158.489257,12,,,0,0
1,o01,rep2,10,9,9,965.500000,838.666667,789.444444,207.437943,164.601033,159.204359,8,965.500000,207.437943,0,0
0,o02,rep1,12,12,12,891.000000,644.750000,666.666667,137.895876,112.039867,98.721218,0,,,0,0
1,o02,rep2,12,12,12,807.500000,648.500000,711.333333,150.318752,87.060585,96.502787,0,807.500000,150.318752,0,0
0,o03,rep1,12,12,12,775.583333,667.000000,722.000000,182.404375,111.949664,107.331263,0,,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,y17,rep2,10,12,12,721.400000,665.833333,635.916667,133.544999,103.270900,74.456160,2,721.400000,133.544999,0,0
0,y18,rep1,11,10,11,777.454545,660.600000,738.727273,198.030989,85.381757,168.161286,4,,,0,0
1,y18,rep2,12,12,11,765.833333,637.166667,667.272727,114.015018,102.322870,132.324670,1,765.833333,114.015018,0,0
0,y19,rep1,11,11,11,852.545455,664.272727,663.000000,196.760953,90.145539,72.908161,3,,,0,0


In [59]:
subject_stats_df = rep_stats_df.sort_values(by='rep').groupby('subject').agg({
    'p31': 'sum',
    'p32': 'sum',
    'p33': 'sum',
    'p31_mean_rt': lambda x: x.iloc[0] - x.iloc[1],
    'p32_mean_rt': lambda x: x.iloc[0] - x.iloc[1],
    'p33_mean_rt': lambda x: x.iloc[0] - x.iloc[1],
    'p31_std_rt': lambda x: x.iloc[0] - x.iloc[1],
    'p32_std_rt': lambda x: x.iloc[0] - x.iloc[1],
    'p33_std_rt': lambda x: x.iloc[0] - x.iloc[1],
    'dummy_p3_': 'sum',
    'mean_rt': lambda x: x.iloc[0] - x.iloc[1],
    'std_rt': lambda x: x.iloc[0] - x.iloc[1],
    'mean_acc': lambda x: x.iloc[0] - x.iloc[1],
    'std_acc': lambda x: x.iloc[0] - x.iloc[1]
}).reset_index()

subject_stats_df

Unnamed: 0,subject,p31,p32,p33,p31_mean_rt,p32_mean_rt,p33_mean_rt,p31_std_rt,p32_std_rt,p33_std_rt,dummy_p3_,mean_rt,std_rt,mean_acc,std_acc
0,o01,17,16,19,-44.928571,-35.380952,71.355556,-66.857174,-10.631648,-0.715102,20,,,0,0
1,o02,24,24,24,83.5,-3.75,-44.666667,-12.422876,24.979282,2.218431,0,,,0,0
2,o03,23,21,23,22.128788,-129.666667,14.727273,77.949252,-128.349108,-54.174211,5,,,0,0
3,o04,22,21,21,-122.583333,71.590909,136.583333,-189.576505,227.592514,323.11067,8,,,0,0
4,o05,17,20,22,277.028571,-127.20202,-85.466667,117.14833,-36.86288,-99.212789,13,,,0,0
5,o06,23,22,22,93.666667,70.233333,61.181818,-0.066228,115.386423,91.418164,5,,,0,0
6,o07,23,23,23,-43.69697,10.825758,103.30303,-45.579125,36.833276,54.505093,3,,,0,0
7,o08,23,22,23,-33.348485,7.616667,-157.939394,-247.953389,106.70719,-149.532472,4,,,0,0
8,o09,17,21,22,73.060606,127.427273,26.454545,83.113914,430.750151,11.53606,12,,,0,0
9,o10,22,23,23,-53.666667,-75.681818,11.742424,46.793786,-189.711035,133.400158,4,,,0,0
