In [1]:
import pandas as pd
import numpy as np

# disable chained assignments, you can ignore this line of code
pd.options.mode.chained_assignment = None

# load the subject data
subj1 = pd.read_csv("data/subject-3.csv", sep=",")
subj2 = pd.read_csv("data/subject-4.csv", sep=",")

# merge the subject data in one dataframe
df = pd.concat([subj1, subj2], ignore_index=True)

In [2]:
# make a list of column names that we want to include
include_columns = ['subject_nr', 'block', 'session', 'congruency_transition_type', 'congruency_type',
                   'correct', 'response_time', 'task_transition_type', 'task_type', "response"]

# make a new df, called df_trim, that only included the columns that are in the "include_columns" list
df_trim = df[include_columns]

# show the new df
df_trim

Unnamed: 0,subject_nr,block,session,congruency_transition_type,congruency_type,correct,response_time,task_transition_type,task_type,response
0,3,1,lowswitch,,incongruent,0,1482.9738,,parity,
1,3,1,lowswitch,congruency-switch,congruent,1,706.7057,task-switch,magnitude,a
2,3,1,lowswitch,congruency-repetition,congruent,1,855.6105,task-switch,parity,a
3,3,1,lowswitch,congruency-switch,incongruent,1,867.8947,task-repetition,parity,a
4,3,1,lowswitch,congruency-repetition,incongruent,1,1078.9412,task-switch,magnitude,a
...,...,...,...,...,...,...,...,...,...,...
947,4,12,highswitch,congruency-repetition,congruent,1,699.0107,task-repetition,magnitude,num_6
948,4,12,highswitch,congruency-repetition,congruent,1,582.9837,task-repetition,magnitude,a
949,4,12,highswitch,congruency-repetition,congruent,1,731.3586,task-switch,parity,a
950,4,12,highswitch,congruency-repetition,congruent,1,437.6794,task-repetition,parity,num_6


In [3]:
print("Column types BEFORE changing: \n", df_trim.dtypes, "\n")

df_trim['subject_nr'] = df_trim['subject_nr'].astype('category')
df_trim['correct'] = df_trim['correct'].astype('category')

print("Column types AFTER changing: \n",df_trim.dtypes)

Column types BEFORE changing: 
 subject_nr                      int64
block                           int64
session                        object
congruency_transition_type     object
congruency_type                object
correct                         int64
response_time                 float64
task_transition_type           object
task_type                      object
response                       object
dtype: object 

Column types AFTER changing: 
 subject_nr                    category
block                            int64
session                         object
congruency_transition_type      object
congruency_type                 object
correct                       category
response_time                  float64
task_transition_type            object
task_type                       object
response                        object
dtype: object


In [4]:
# Here the last blocks should be 12, lets check by printing the last 5 rows of the block column using the tail function
print("The last block here is 12: \n", df_trim["block"].tail(5), "\n")

# Conditionally select rows based on if the value in the "block" column is lower than 11
df_trim_blocks = df_trim[df_trim['block'] < 11]

# Check to see if the last block is now 10 instead of 12
print("Here the last block should be 10: \n", df_trim_blocks["block"].tail(5))

The last block here is 12: 
 947    12
948    12
949    12
950    12
951    12
Name: block, dtype: int64 

Here the last block should be 10: 
 811    10
812    10
813    10
814    10
815    10
Name: block, dtype: int64


In [5]:
# Replace 3 with 1 in subject_nr column
df_trim_blocks['subject_nr'] = df_trim_blocks['subject_nr'].replace(3, 1)

# Replace 4 with 2 in subject_nr column
df_trim_blocks['subject_nr'] = df_trim_blocks['subject_nr'].replace(4, 2)

# Print out all unique values in subject_nr column, should be 1 and 2
df_trim_blocks['subject_nr'].unique()

[1, 2]
Categories (2, int64): [1, 2]

In [6]:
piv_task_transition_exp = df_trim_blocks.pivot_table(
    index=['session'], # Index on session
    columns='task_transition_type', # Group on 'task_transition_type'
    aggfunc='size') # Function to aggregate columns on, here we specify "size"

# Print out the pivot table
piv_task_transition_exp

task_transition_type,task-repetition,task-switch
session,Unnamed: 1_level_1,Unnamed: 2_level_1
highswitch,82,248
lowswitch,245,85


In [7]:
piv_cong = df_trim_blocks.pivot_table(
    index=['subject_nr'],
    columns=['congruency_type'],
    aggfunc='size') # Function to aggregate columns on, here we specify "size"

piv_cong_transition = df_trim_blocks.pivot_table(
    index=['subject_nr'],
    columns=['congruency_transition_type'],
    aggfunc='size') # Function to aggregate columns on, here we specify "size"

piv_task = df_trim_blocks.pivot_table(
    index=['subject_nr'],
    columns=['task_type'],
    aggfunc='size') # Function to aggregate columns on, here we specify "size"

piv_task_transition = df_trim_blocks.pivot_table(
    index=['subject_nr'],
    columns='task_transition_type',
    aggfunc='size') # Function to aggregate columns on, here we specify "size"

# Add all dataframes to a list
dfs = [piv_cong, piv_cong_transition, piv_task, piv_task_transition]

# Merge the dataframes, axis=1 defines that the new dataframes should be added as columns instead of new rows
pd.concat(dfs, axis=1)

Unnamed: 0_level_0,congruent,incongruent,congruency-repetition,congruency-switch,magnitude,parity,task-repetition,task-switch
subject_nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,167,173,166,164,172,168,245,85
2,172,168,163,167,168,172,82,248


In [8]:
# Specify the columns we want to check
columns_to_check = ['task_type', 'congruency_type',
                    'task_transition_type', 'congruency_transition_type']

# Make an empty list so we can populate this later
dfs = []

# Loop over columns_to_check and make a new pivot table for each column
for column in columns_to_check: # Loops over "columns_to_check"
    piv = df_trim_blocks.pivot_table(
        index=['subject_nr'],
        columns=[column], # The for-loop inserts a new column here on every iteration
        aggfunc='size') # Function to aggregate columns on, here we specify "size"

    # Append the pivot table to our "dfs" list (which was empty initially)
    dfs.append(piv)

# Merge the dataframes that are in the "dfs" list
pd.concat(dfs, axis=1)

Unnamed: 0_level_0,magnitude,parity,congruent,incongruent,task-repetition,task-switch,congruency-repetition,congruency-switch
subject_nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,172,168,167,173,245,85,166,164
2,168,172,172,168,82,248,163,167


In [9]:
df_correct = df_trim_blocks[df_trim_blocks['correct'] == 1]
df_correct

Unnamed: 0,subject_nr,block,session,congruency_transition_type,congruency_type,correct,response_time,task_transition_type,task_type,response
1,1,1,lowswitch,congruency-switch,congruent,1,706.7057,task-switch,magnitude,a
2,1,1,lowswitch,congruency-repetition,congruent,1,855.6105,task-switch,parity,a
3,1,1,lowswitch,congruency-switch,incongruent,1,867.8947,task-repetition,parity,a
4,1,1,lowswitch,congruency-repetition,incongruent,1,1078.9412,task-switch,magnitude,a
5,1,1,lowswitch,congruency-repetition,incongruent,1,819.4645,task-repetition,magnitude,a
...,...,...,...,...,...,...,...,...,...,...
810,2,10,highswitch,congruency-repetition,congruent,1,321.1855,task-switch,parity,num_6
811,2,10,highswitch,congruency-switch,incongruent,1,844.0940,task-switch,magnitude,a
812,2,10,highswitch,congruency-repetition,incongruent,1,674.5391,task-switch,parity,num_6
814,2,10,highswitch,congruency-switch,congruent,1,1353.6886,task-switch,magnitude,a


In [10]:
# Check switch costs
switch_table = pd.pivot_table(
    df_correct,
    values="response_time", # The value that will be summarized
    index=["subject_nr"], # The rows to summarize over
    columns=["task_transition_type"], # The columns to summarize over
    aggfunc=np.mean, # Calculate the mean response time per subject per task type
)

# Print out the pivot table
switch_table

task_transition_type,task-repetition,task-switch
subject_nr,Unnamed: 1_level_1,Unnamed: 2_level_1
1,749.989603,888.007569
2,638.137917,675.488397


In [11]:
switch_table['switch cost'] = switch_table['task-switch'] - switch_table['task-repetition']
switch_table

task_transition_type,task-repetition,task-switch,switch cost
subject_nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,749.989603,888.007569,138.017966
2,638.137917,675.488397,37.35048


In [12]:
df_correct['response_time'].describe()

count     599.000000
mean      725.344337
std       233.487524
min       124.173900
25%       550.588900
50%       655.866000
75%       876.164300
max      1443.708500
Name: response_time, dtype: float64

In [13]:
df_trim_blocks.to_csv('../11_plotting/data/df_cleaned.csv', index=False)

In [14]:
# Pivot table count

In [15]:
# Pivot table mean

In [16]:
# df_correct

In [17]:
# Outlier method one

In [18]:
# Outlier method two