In [2]:
from pathlib import Path
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns


In [23]:

# Adjust display options
#pd.set_option('display.max_rows', 600)  # Display all rows
#pd.set_option('display.max_columns', None)  # Display all columns
#pd.set_option('display.width', None)  # Allow unlimited width for display

# Initialize empty list 
dfs = []
participant_ids = []

# Iterate through each CSV file in the folder
pathlist = Path("data/").rglob('*.csv')
for participant_id, path in enumerate(pathlist, start=1):

    df = pd.read_csv(path)
    dfs.append(df)
    participant_ids.extend([participant_id] * len(df))

# Concatenate all DataFrames in the list into one DataFrame
combined_df = pd.concat(dfs, ignore_index=True)
combined_df['participant_id'] = participant_ids

# Reorder columns to place participant ID column first
cols = combined_df.columns.tolist()
cols = ['participant_id'] + [col for col in cols if col != 'participant_id']
combined_df = combined_df[cols]

# Clean data
combined_df = combined_df.dropna()
combined_df = combined_df[combined_df.rt != -1]

true_false_lst = []
# Iterate through each participant
for i in range(1, max(combined_df.participant_id) + 1):
    # Filter the DataFrame for the current participant
    participant_data = combined_df[combined_df.participant_id == i]
    
    # Count the True and False answers for the current participant
    true_count = participant_data['correct'].eq(True).sum()
    false_count = participant_data['correct'].eq(False).sum()
    
    # Create a dictionary with the counts
    bool_dict = {
        "Participant": i,
        "True": true_count,
        "False": false_count
    }
    
    # Append the dictionary to the list
    true_false_lst.append(bool_dict)

df_or = combined_df[combined_df.task_type != "or"]
df_mov = combined_df[combined_df.task_type != "mov"]

#combined_df
for entry in true_false_lst:
    print(entry)

{'Participant': 1, 'True': 84, 'False': 9}
{'Participant': 2, 'True': 71, 'False': 24}
{'Participant': 3, 'True': 78, 'False': 16}
{'Participant': 4, 'True': 82, 'False': 8}
{'Participant': 5, 'True': 82, 'False': 11}
{'Participant': 6, 'True': 86, 'False': 9}
{'Participant': 7, 'True': 79, 'False': 10}
{'Participant': 8, 'True': 49, 'False': 31}
{'Participant': 9, 'True': 88, 'False': 4}
{'Participant': 10, 'True': 91, 'False': 2}
{'Participant': 11, 'True': 94, 'False': 1}
{'Participant': 12, 'True': 59, 'False': 31}
{'Participant': 13, 'True': 85, 'False': 10}
{'Participant': 14, 'True': 71, 'False': 24}
{'Participant': 15, 'True': 75, 'False': 18}
{'Participant': 16, 'True': 75, 'False': 11}
{'Participant': 17, 'True': 47, 'False': 33}
{'Participant': 18, 'True': 72, 'False': 21}
{'Participant': 19, 'True': 87, 'False': 6}
{'Participant': 20, 'True': 76, 'False': 19}
{'Participant': 21, 'True': 67, 'False': 26}
{'Participant': 22, 'True': 88, 'False': 7}
{'Participant': 23, 'True':

In [None]:
# Visualization 1: Distribution of Reaction Times
plt.figure(figsize=(10, 6))
sns.histplot(data=combined_df, x="rt", kde=True, bins=30)
plt.title('Distribution of Reaction Times')
plt.xlabel('Reaction Time (ms)')
plt.ylabel('Frequency')
plt.show()

# Visualization 2: Accuracy by Task Type
plt.figure(figsize=(10, 6))
sns.barplot(x="task_type", y="correct", data=combined_df, estimator=lambda x: sum(x.astype(int)) / len(x) * 100)
plt.title('Accuracy by Task Type')
plt.xlabel('Task Type')
plt.ylabel('Percentage Correct (%)')
plt.show()

# Visualization 3: Reaction Time by Congruency
plt.figure(figsize=(10, 6))
sns.boxplot(x="congruency", y="rt", data=combined_df)
plt.title('Reaction Time by Congruency')
plt.xlabel('Congruency')
plt.ylabel('Reaction Time (ms)')
plt.show()

# Visualization 4: Task Transition Effects on Reaction Time
plt.figure(figsize=(10, 6))
sns.boxplot(x="task_transition", y="rt", data=combined_df)
plt.title('Effect of Task Transition on Reaction Time')
plt.xlabel('Task Transition')
plt.ylabel('Reaction Time (ms)')
plt.show()

# Visualization 5: Reaction Times for Repetition Tasks by Congruency
plt.figure(figsize=(10, 6))
sns.boxplot(x="task_transition", y="rt", hue="congruency", data=combined_df[combined_df['task_transition'] == 'repetition'])
plt.title('Reaction Times for Repetition Tasks by Congruency')
plt.xlabel('Task Transition (Repetition)')
plt.ylabel('Reaction Time (ms)')
plt.legend(title='Congruency')
plt.show()

# Visualization 6: Reaction Times for Switch Tasks by Congruency
plt.figure(figsize=(10, 6))
sns.boxplot(x="task_transition", y="rt", hue="congruency", data=combined_df[combined_df['task_transition'] == 'switch'])
plt.title('Reaction Times for Switch Tasks by Congruency')
plt.xlabel('Task Transition (Switch)')
plt.ylabel('Reaction Time (ms)')
plt.legend(title='Congruency')
plt.show()

# Visualization 7: Reaction Time for task_type "or" by Congruency
plt.figure(figsize=(10, 6))
sns.boxplot(x="congruency", y="rt", data=df_or)
plt.title('Reaction Time for task_type "or" by Congruency')
plt.xlabel('Congruency')
plt.ylabel('Reaction Time (ms)')
plt.show()

# Visualization 8: Reaction Time for task_type "mov" by Congruency
plt.figure(figsize=(10, 6))
sns.boxplot(x="congruency", y="rt", data=df_mov)
plt.title('Reaction Time for task_type "mov" by Congruency')
plt.xlabel('Congruency')
plt.ylabel('Reaction Time (ms)')
plt.show()

