In [3]:
import pandas as pd
from pathlib import Path

In [4]:
BASE_DIR = '../../data/rawdata/'

In [None]:
def check_correct(row):
    if row['big_small'] == 'S' and row['opt'] == 4:
        return True
    elif row['big_small'] == 'B' and row['opt'] == 3:
        return True
    return False

def read_bh(file):
    '''Auxiliary function to read the data from the behavioural file
    in the specified format and with meaningful column names.'''
    df = pd.read_csv(file, header=None,
        names=['o', 'ab', 'show_times', 'stim', 'PER', 'big_small', 'nl', 'onset', 'opt', 'response_time']).drop(
        columns=['o', 'PER']
    )
    df['opt'] = df['opt'].astype('Int64')

    df['correct'] = df[df['opt'].notna()].apply(check_correct, axis=1)
    return df

def add_consecutive(df):
    df['sequence'] = df.sort_values(by='onset').groupby('stim').cumcount() + 1
    return df

def get_trials_condition(df):
    '''Auxiliary function to get the number of trials and unique stimuli
    in the dataset, as well as the number of trials for each condition.'''
    dummy = df[(df['opt'].isna()) | (df['correct'] == False)]
    p11 = df[(df['show_times'] == 1) & (df['correct'] == True)]
    p31 = df[(df['show_times'] == 3) & (df['correct'] == True) & (df['sequence'] == 1)]
    p32 = df[(df['show_times'] == 3) & (df['correct'] == True) & (df['sequence'] == 2)]
    p33 = df[(df['show_times'] == 3) & (df['correct'] == True) & (df['sequence'] == 3)]

    print('Number of trials:', len(df))
    print('Number of unique stimuli:', len(df['stim'].unique()))
    print('------')
    print(f'                Number of 1-show trials: {len(p11)}')
    print(f'Number of 3-show trials with sequence 1: {len(p31)}')
    print(f'Number of 3-show trials with sequence 2: {len(p32)}')
    print(f'Number of 3-show trials with sequence 3: {len(p33)}')
    print(f'                 Number of dummy trials: {len(dummy)}')
    
    return {
        'p11': p11,
        'p31': p31,
        'p32': p32,
        'p33': p33,
        'dummy': dummy
    }

In [9]:
def get_df_from_file(file):
    '''Auxiliary function to read the data from the behavioural file
    and add the sequence column to it.'''
    print('*'*50)
    print(f'Processing file {file}')
    df = read_bh(file)
    df = add_consecutive(df)
    get_trials_condition(df)
    return df

In [10]:
# Get all subjects' directories
base_path = Path(BASE_DIR)
subjects = sorted([d for d in base_path.iterdir() if d.is_dir()])
data = {}

# Load all behaviour files
for subject in subjects:
    # Behaviour files are CSVs in the beh directory
    beh_files = sorted(subject.glob('beh/*.csv'))
    csvs = {}
    # Each subject should have 2 behaviour files
    if len(beh_files) != 2:
        print(f'Error: {subject.name} does not have 2 behaviour files')
        continue
    # Load the CSVs with pandas.read_csv
    csvs['rep1'] = get_df_from_file(beh_files[0])
    csvs['rep2'] = get_df_from_file(beh_files[1])
    data[subject.name] = csvs

**************************************************
Processing file ../../data/rawdata/o01/beh/o01-1.csv
Number of trials: 48
Number of unique stimuli: 24
------
                Number of 1-show trials: 10
Number of 3-show trials with sequence 1: 7
Number of 3-show trials with sequence 2: 7
Number of 3-show trials with sequence 3: 10
                 Number of dummy trials: 14
**************************************************
Processing file ../../data/rawdata/o01/beh/o01-2.csv
Number of trials: 48
Number of unique stimuli: 24
------
                Number of 1-show trials: 10
Number of 3-show trials with sequence 1: 10
Number of 3-show trials with sequence 2: 9
Number of 3-show trials with sequence 3: 9
                 Number of dummy trials: 10
**************************************************
Processing file ../../data/rawdata/o02/beh/o02-1.csv
Number of trials: 48
Number of unique stimuli: 24
------
                Number of 1-show trials: 11
Number of 3-show trials with sequence

In [40]:
print(data.keys())

dict_keys(['o01', 'o02', 'o03', 'o04', 'o05', 'o06', 'o07', 'o08', 'o09', 'o10', 'o11', 'o12', 'o13', 'o14', 'o15', 'o16', 'o17', 'o18', 'y01', 'y02', 'y03', 'y04', 'y05', 'y06', 'y07', 'y08', 'y09', 'y10', 'y11', 'y12', 'y13', 'y14', 'y15', 'y16', 'y17', 'y18', 'y19'])


In [17]:
data['y01']['rep1'].head(11)

Unnamed: 0,ab,show_times,stim,big_small,nl,onset,opt,delay,sequence
0,A,3,stim/faint.bmp,S,NL,29988,4,807,1
1,A,3,stim/toilett.bmp,B,NL,32988,3,636,1
2,D,1,stim/flashlight.bmp,S,NL,35988,4,667,1
3,A,3,stim/broccoli.bmp,S,L,38988,4,550,1
4,A,3,stim/car.bmp,B,NL,41988,3,585,1
5,A,3,stim/penguin.bmp,B,L,44988,4,811,1
6,A,3,stim/playcard.bmp,S,NL,51989,4,568,1
7,A,3,stim/egg.bmp,S,L,62988,4,652,1
8,A,3,stim/playcard.bmp,S,NL,73988,4,570,2
9,A,3,stim/penguin.bmp,B,L,76988,3,595,2


In [16]:
data['y01']['rep2'].head(11)

Unnamed: 0,ab,show_times,stim,big_small,nl,onset,opt,delay,sequence
0,D,1,stim/peach.bmp,S,L,29972,4,601,1
1,A,3,stim/baby.bmp,B,L,36972,3,613,1
2,A,3,stim/orange.bmp,S,L,39972,4,598,1
3,A,3,stim/orange.bmp,S,L,42973,4,458,2
4,A,3,stim/cruiseship.bmp,B,NL,45972,3,578,1
5,A,3,stim/octopus.bmp,B,L,56972,3,745,1
6,A,3,stim/hose.bmp,B,NL,67972,3,702,1
7,A,3,stim/hose.bmp,B,NL,70972,3,560,2
8,A,3,stim/corn.bmp,S,L,73972,4,596,1
9,D,1,stim/parrot.bmp,S,L,76972,4,582,1
