# Rating analyses (pre task) for all recruited participants (N=415)

## Heterogeneity across items

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Access the shared folder
shared_folder_path = ""

# Collate ratings for each food item
ratings = {}

# Iterate through each subject folder
for subject_folder in os.listdir(shared_folder_path):
    subject_folder_path = os.path.join(shared_folder_path, subject_folder)

    # Check if the item is a directory (subject folder)
    if os.path.isdir(subject_folder_path):
        # Construct the path to the ratings CSV file (e.g., 654_ratings.csv)
        task_file_name = f"{subject_folder}_ratings.csv"
        task_file_path = os.path.join(subject_folder_path, task_file_name)

        # Check if the file exists
        if os.path.exists(task_file_path):
            # Read the task CSV file into a DataFrame
            df = pd.read_csv(task_file_path)

            # Append ratings to the ratings dictionary
            for index, row in df.iterrows():
                image = row['image']
                response = row['response']
                if pd.notna(image):  # Ensure 'image' is not NaN
                    if image in ratings:
                        ratings[image].append(response)
                    else:
                        ratings[image] = [response]
        else:
            print(f"No data found for participant {subject_folder}.")

# Ensure there is data
if not ratings:
    print("No ratings data found.")
else:
    # Calculate the average rating and SD for each food item
    average_ratings = {image: sum(responses) / len(responses) for image, responses in ratings.items()}
    sd_ratings = {image: np.std(responses, ddof=1) for image, responses in ratings.items()}

    # Sort the average ratings in ascending order
    sorted_ratings = sorted(average_ratings.items(), key=lambda x: x[1])

    # Separate the data for plotting
    food_items = [item[0] for item in sorted_ratings]
    average_ratings_values = [item[1] for item in sorted_ratings]
    sd_values = [sd_ratings[item[0]] for item in sorted_ratings]

    # Create a scatter plot with improved aesthetics
    plt.figure(figsize=(14, 8))
    plt.errorbar(range(len(food_items)), average_ratings_values, yerr=sd_values, fmt='o', color='steelblue', ecolor='lightgrey', elinewidth=3, capsize=0)  # light blue color with error bars
    plt.xticks(range(len(food_items)), food_items, rotation='vertical', fontsize=14)  # Increased fontsize for xticks

    # Improved label spacing to avoid overlap
    plt.subplots_adjust(bottom=0.25)

    # Bold and increase size of axes titles and main title
    plt.xlabel('Food Items', fontsize=16, fontweight='bold')
    plt.ylabel('Average Rating', fontsize=16, fontweight='bold')
    plt.title('Average Rating for Each Food Item with SD', fontsize=18, fontweight='bold')

    # Remove grid lines for a cleaner look
    plt.grid(False)

    # Specify y-axis min and max values to avoid the wonky presets
    y_min = 1 
    y_max = 5  
    y_step = 1  
    plt.ylim([y_min, y_max])
    plt.yticks(range(y_min, y_max + 1, y_step), fontsize=16)  

    # Ensure layout is tight for better spacing
    plt.tight_layout()

    plt.show()

## Correlation with caloric and nutritional content

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr

nutrition_info_path = "" #this is data obtained with Columbia FoodFolio stimuli set 

# Read the nutritional information
nutrition_df = pd.read_csv(nutrition_info_path)

# Merge the average ratings with nutritional data
merged_df = pd.merge(average_ratings_df, nutrition_df, on='Food Image')

spearman_corr, spearman_p_value = spearmanr(merged_df['Average Rating'], merged_df['Total Calories'])

# Plot Spearman correlation
plt.figure(figsize=(10, 6))
sns.regplot(x='Total Calories', y='Average Rating', data=merged_df, scatter_kws={'s': 100, 'color': 'steelblue', 'edgecolor': 'w'}, line_kws={'color': 'grey'})  # Add regression line
plt.title('Food Likert Ratings and Caloric Content', fontsize=16, weight='bold')
plt.xlabel('Total Calories', fontsize=14, weight='bold')
plt.ylabel('Average Rating', fontsize=14, weight='bold')
plt.text(0.5, 0.87,
         f'Spearman Correlation: {spearman_corr:.2f}\nP-value: {spearman_p_value:.4f}',
         fontsize=12, bbox=dict(facecolor='white', alpha=0.5),
         ha='center', transform=plt.gca().transAxes)  # Centered text overlay below title
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_linewidth(1.5)
plt.gca().spines['bottom'].set_linewidth(1.5)
plt.tight_layout()
plt.show()

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr

spearman_corr, spearman_p_value = spearmanr(merged_df['Average Rating'], merged_df['Fat (g)'])

# Plot Spearman correlation
plt.figure(figsize=(10, 6))
sns.regplot(x='Fat (g)', y='Average Rating', data=merged_df, scatter_kws={'s': 100, 'color': 'steelblue', 'edgecolor': 'w'}, line_kws={'color': 'grey'})  # Add regression line
plt.title('Food Likert Ratings and Fat Content', fontsize=16, weight='bold')
plt.xlabel('Fat (g)', fontsize=14, weight='bold')
plt.ylabel('Average Rating', fontsize=14, weight='bold')
plt.text(0.5, 0.87,
         f'Spearman Correlation: {spearman_corr:.2f}\nP-value: {spearman_p_value:.4f}',
         fontsize=12, bbox=dict(facecolor='white', alpha=0.5),
         ha='center', transform=plt.gca().transAxes)  # Centered text overlay below title
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_linewidth(1.5)
plt.gca().spines['bottom'].set_linewidth(1.5)
plt.tight_layout()
plt.show()

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr


spearman_corr, spearman_p_value = spearmanr(merged_df['Average Rating'], merged_df['Protein (g)'])

# Plot Spearman correlation
plt.figure(figsize=(10, 6))
sns.regplot(x='Protein (g)', y='Average Rating', data=merged_df, scatter_kws={'s': 100, 'color': 'steelblue', 'edgecolor': 'w'}, line_kws={'color': 'grey'})  # Add regression line
plt.title('Food Likert Ratings and Protein Content', fontsize=16, weight='bold')
plt.xlabel('Protein (g)', fontsize=14, weight='bold')
plt.ylabel('Average Rating', fontsize=14, weight='bold')
plt.text(0.5, 0.87,
         f'Spearman Correlation: {spearman_corr:.2f}\nP-value: {spearman_p_value:.4f}',
         fontsize=12, bbox=dict(facecolor='white', alpha=0.5),
         ha='center', transform=plt.gca().transAxes)  # Centered text overlay below title
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_linewidth(1.5)
plt.gca().spines['bottom'].set_linewidth(1.5)
plt.tight_layout()
plt.show()

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr


spearman_corr, spearman_p_value = spearmanr(merged_df['Average Rating'], merged_df['Carbohydrate (g)'])

# Plot Spearman correlation
plt.figure(figsize=(10, 6))
sns.regplot(x='Carbohydrate (g)', y='Average Rating', data=merged_df, scatter_kws={'s': 100, 'color': 'steelblue', 'edgecolor': 'w'}, line_kws={'color': 'grey'})  # Add regression line
plt.title('Food Likert Ratings and Carbohydrate Content', fontsize=16, weight='bold')
plt.xlabel('Carbohydrate (g)', fontsize=14, weight='bold')
plt.ylabel('Average Rating', fontsize=14, weight='bold')
plt.text(0.5, 0.87,
         f'Spearman Correlation: {spearman_corr:.2f}\nP-value: {spearman_p_value:.4f}',
         fontsize=12, bbox=dict(facecolor='white', alpha=0.5),
         ha='center', transform=plt.gca().transAxes)  # Centered text overlay below title
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_linewidth(1.5)
plt.gca().spines['bottom'].set_linewidth(1.5)
plt.tight_layout()
plt.show()

# Preparing the data for pre/post task rating analyses

In [None]:
import os
import pandas as pd

# Access the shared folder
shared_folder_path = ""

# Dictionary to hold DataFrames for each participant
participant_data = {}
participants_without_data = []

# Iterate through each subject folder
for subject_folder in os.listdir(shared_folder_path):
    subject_folder_path = os.path.join(shared_folder_path, subject_folder)

    # Check if the item is a directory (subject folder)
    if os.path.isdir(subject_folder_path):
        # Construct the path to the ratings CSV file (e.g., 654_ratings.csv)
        task_file_name = f"{subject_folder}_subset_ratings_post.csv"
        task_file_path = os.path.join(subject_folder_path, task_file_name)

        # Check if the file exists
        if os.path.exists(task_file_path):
            # Read the task CSV file into a DataFrame
            df = pd.read_csv(task_file_path)

            # Store the DataFrame in the dictionary with the subject ID as the key
            participant_data[subject_folder] = df
            #print(f"Data loaded successfully for participant {subject_folder}.")
        else:
            participants_without_data.append(subject_folder)
            print(f"No data found for participant {subject_folder}.")

# Report the number of participants
total_participants = len(participant_data) + len(participants_without_data)
print(f"\nTotal participants processed: {total_participants}")

# Final message based on loading status
if not participants_without_data:
    print("Data loaded successfully for all participants.")
else:
    print("Some participants did not have data loaded:")
    for participant in participants_without_data:
        print(f"- {participant}")

In [None]:
import os
import pandas as pd

# Access the shared folder
shared_folder_path = "/Users/alexrich/Documents/onlinedata_fullsample"

# Dictionary to hold DataFrames for each participant
participant_data = {}
participants_without_data = []

# Iterate through each subject folder
for subject_folder in os.listdir(shared_folder_path):
    subject_folder_path = os.path.join(shared_folder_path, subject_folder)

    # Check if the item is a directory (subject folder)
    if os.path.isdir(subject_folder_path):
        
        # Construct the paths to the ratings files
        pre_file_name = f"{subject_folder}_subset_ratings_pre.csv"
        post_file_name = f"{subject_folder}_subset_ratings_post.csv"
        ratings_file_name = f"{subject_folder}_ratings.csv"
        
        pre_file_path = os.path.join(subject_folder_path, pre_file_name)
        post_file_path = os.path.join(subject_folder_path, post_file_name)
        ratings_file_path = os.path.join(subject_folder_path, ratings_file_name)

        # Check if the files exist
        if os.path.exists(pre_file_path) and os.path.exists(post_file_path) and os.path.exists(ratings_file_path):
            # Read the task CSV files into DataFrames
            df_pre = pd.read_csv(pre_file_path)
            df_post = pd.read_csv(post_file_path)
            df_ratings = pd.read_csv(ratings_file_path)
            
            # Debug statements to confirm file loading
            print(f"Loaded pre data for participant {subject_folder}:")
            print(df_pre.head())
            print(f"Loaded post data for participant {subject_folder}:")
            print(df_post.head())
            
            # Merge 'rating' from df_ratings into df_pre and df_post based on 'image'
            df_pre = df_pre.merge(df_ratings[['image', 'response']], on='image', how='left')
            df_pre.rename(columns={'response': 'rating'}, inplace=True)
            
            df_post = df_post.merge(df_ratings[['image', 'response']], on='image', how='left')
            df_post.rename(columns={'response': 'rating'}, inplace=True)
            
            # Debug statements to confirm merging
            print(f"Pre data with ratings for participant {subject_folder}:")
            print(df_pre.head())
            print(f"Post data with ratings for participant {subject_folder}:")
            print(df_post.head())
            
            # Store the modified DataFrames back to the files
            df_pre.to_csv(pre_file_path, index=False)
            df_post.to_csv(post_file_path, index=False)
            
            # Store the DataFrames in the dictionary with the subject ID as the key
            participant_data[subject_folder] = {'pre': df_pre, 'post': df_post}
            # Print success message for participant
            print(f"Data loaded and saved successfully for participant {subject_folder}.")

        else:
            participants_without_data.append(subject_folder)
            print(f"No data found for participant {subject_folder}.")

# Report the number of participants
total_participants = len(participant_data) + len(participants_without_data)
print(f"\nTotal participants processed: {total_participants}")

# Final message based on loading status
if not participants_without_data:
    print("Data loaded successfully for all participants.")
else:
    print("Some participants did not have data loaded:")
    for participant in participants_without_data:
        print(f"- {participant}")

In [None]:
import os
import pandas as pd
import ast  # For converting strings to dictionaries

# Access the shared folder
shared_folder_path = ""

# Function to parse the 'response_x' column into separate columns
def parse_response_column(df, column_name='response_x'):
    parsed_df = df.copy()
    
    # Convert the 'response_x' JSON string to separate columns
    def parse_json(row):
        try:
            response_dict = ast.literal_eval(row)
            return pd.Series(response_dict)
        except (ValueError, SyntaxError):
            return pd.Series({"paying": None, "enjoyable": None, "satisfaction": None})
    
    if column_name in parsed_df.columns:
        parsed_columns = parsed_df[column_name].apply(parse_json)
        # Assign parsed columns to the DataFrame
        parsed_df = parsed_df.join(parsed_columns)
    else:
        print(f"Column '{column_name}' does not exist in the DataFrame.")
    
    return parsed_df

# Iterate through each subject folder
for subject_folder in os.listdir(shared_folder_path):
    subject_folder_path = os.path.join(shared_folder_path, subject_folder)
    
    # Check if the item is a directory (subject folder)
    if os.path.isdir(subject_folder_path):
        try:
            # Construct the paths to the ratings files
            pre_file_name = f"{subject_folder}_subset_ratings_pre.csv"
            post_file_name = f"{subject_folder}_subset_ratings_post.csv"
            
            pre_file_path = os.path.join(subject_folder_path, pre_file_name)
            post_file_path = os.path.join(subject_folder_path, post_file_name)

            # Processing pre file if exists
            if os.path.exists(pre_file_path):
                print(f"Processing 'pre' data for participant {subject_folder}")
                # Read the pre CSV file into a DataFrame
                df_pre = pd.read_csv(pre_file_path)
                
                # Check if parsed columns already exist
                if 'paying' not in df_pre.columns or 'enjoyable' not in df_pre.columns or 'satisfaction' not in df_pre.columns:
                    # Parse the 'response_x' column into separate columns
                    df_pre = parse_response_column(df_pre)
                    
                    # Store the modified DataFrame back to the file
                    df_pre.to_csv(pre_file_path, index=False)
                    
                    print(f"Pre data parsed and saved successfully for participant {subject_folder}.")
                else:
                    print(f"Pre data for participant {subject_folder} already has parsed columns.")
            
            # Processing post file if exists
            if os.path.exists(post_file_path):
                print(f"Processing 'post' data for participant {subject_folder}")
                # Read the post CSV file into a DataFrame
                df_post = pd.read_csv(post_file_path)
                
                # Check if parsed columns already exist
                if 'paying' not in df_post.columns or 'enjoyable' not in df_post.columns or 'satisfaction' not in df_post.columns:
                    # Parse the 'response_x' column into separate columns
                    df_post = parse_response_column(df_post)
                    
                    # Store the modified DataFrame back to the file
                    df_post.to_csv(post_file_path, index=False)
                    
                    print(f"Post data parsed and saved successfully for participant {subject_folder}.")
                else:
                    print(f"Post data for participant {subject_folder} already has parsed columns.")

        except Exception as e:
            print(f"An error occurred while processing participant {subject_folder}: {e}")

# Pre-post task for task-eligible participants (n=279): changes in subjective valuation measures

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel
import seaborn as sns

# will print significant t test results and plot pre/post ratings for both start groups, for each of the 3 subjective value scales (willingness to pay, enjoyability, satisfaction)

# Function to calculate statistics and t-tests
def calculate_statistics(pre_data, post_data):
    avg_pre = {key: np.mean(values) if values else None for key, values in pre_data.items()}
    avg_post = {key: np.mean(values) if values else None for key, values in post_data.items()}
    std_pre = {key: np.std(values) / np.sqrt(len(values)) if values else None for key, values in pre_data.items()}
    std_post = {key: np.std(values) / np.sqrt(len(values)) if values else None for key, values in post_data.items()}

    p_values = {}
    t_stats = {}
    dfs = {}  # Store degrees of freedom
    for key in pre_data.keys():
        p_values[key] = None
        t_stats[key] = None
        dfs[key] = None
        if pre_data[key] and post_data[key]:
            t_stat, p_value = ttest_rel(pre_data[key], post_data[key])
            p_values[key] = p_value
            t_stats[key] = t_stat
            dfs[key] = len(pre_data[key]) - 1  # Degrees of freedom

    return avg_pre, avg_post, std_pre, std_post, p_values, t_stats, dfs

# Function for processing data
def process_data(subject_folder_path, pre_file_name, post_file_name, response_column, measure_column):
    pre_congruent = {'Disliked': [], 'Liked': []}
    post_congruent = {'Disliked': [], 'Liked': []}
    pre_incongruent = {'Disliked': [], 'Liked': []}
    post_incongruent = {'Disliked': [], 'Liked': []}

    for subject_folder in os.listdir(subject_folder_path):
        subject_path = os.path.join(subject_folder_path, subject_folder)
        if os.path.isdir(subject_path):
            pre_file_path = os.path.join(subject_path, f"{subject_folder}_{pre_file_name}")
            post_file_path = os.path.join(subject_path, f"{subject_folder}_{post_file_name}")

            if not all([os.path.exists(pre_file_path), os.path.exists(post_file_path)]):
                continue

            starting_block = None
            task_file_path = os.path.join(subject_path, f"{subject_folder}_task.csv")
            if os.path.exists(task_file_path):
                df_task = pd.read_csv(task_file_path)
                if 'Trial type' in df_task.columns and not df_task.empty:
                    starting_block = df_task['Trial type'].iloc[0]

            if starting_block not in ['aligned', 'unaligned']:
                continue

            pre_data = pre_congruent if starting_block == 'aligned' else pre_incongruent
            post_data = post_congruent if starting_block == 'aligned' else post_incongruent

            # Pre data processing
            df_pre = pd.read_csv(pre_file_path)
            if measure_column in df_pre.columns and response_column in df_pre.columns:
                for response_y in [1.0, 2.0, 4.0, 5.0]:
                    key = 'Disliked' if response_y in [1.0, 2.0] else 'Liked'
                    values = df_pre[df_pre[response_column] == response_y][measure_column].astype(float).tolist()
                    pre_data[key].extend(values)

            # Post data processing
            df_post = pd.read_csv(post_file_path)
            if measure_column in df_post.columns and response_column in df_post.columns:
                for response_y in [1.0, 2.0, 4.0, 5.0]:
                    key = 'Disliked' if response_y in [1.0, 2.0] else 'Liked'
                    values = df_post[df_post[response_column] == response_y][measure_column].astype(float).tolist()
                    post_data[key].extend(values)

    return pre_congruent, post_congruent, pre_incongruent, post_incongruent

# Data directory and file names
shared_folder_path = "/Users/alexrich/Documents/onlinedata_fullsample"
response_column = 'response_y'

# Measure columns and titles for the plots to be generated
measures = [
    {'measure_column': 'paying', 'pre_file_name': 'subset_ratings_pre.csv', 'post_file_name': 'subset_ratings_post.csv', 'ylabel': 'Average Willingness to Pay ($)', 'title': 'Willingness to Pay'},
    {'measure_column': 'enjoyable', 'pre_file_name': 'subset_ratings_pre.csv', 'post_file_name': 'subset_ratings_post.csv', 'ylabel': 'Average "Enjoyable" Rating', 'title': 'Enjoyable'},
    {'measure_column': 'satisfaction', 'pre_file_name': 'subset_ratings_pre.csv', 'post_file_name': 'subset_ratings_post.csv', 'ylabel': 'Average "Satisfaction" Rating', 'title': 'Satisfaction'}
]

# Function to plot results
def plot_measure_results(ax, avg_pre, avg_post, std_pre, std_post, p_values, t_stats, dfs, response_labels, ylabel, title):
    pre_values = [avg_pre[key] for key in response_labels]
    post_values = [avg_post[key] for key in response_labels]
    pre_errors = [std_pre[key] for key in response_labels]
    post_errors = [std_post[key] for key in response_labels]

    paired_palette = sns.color_palette("Paired")
    colors = {
        'Disliked_pre': paired_palette[6],
        'Disliked_post': paired_palette[7],
        'Liked_pre': paired_palette[8],
        'Liked_post': paired_palette[9],
    }
    
    x = np.arange(len(response_labels))
    width = 0.35

    rects1 = ax.bar(x - width/2, pre_values, width, label='Pre', color=[colors[f'{label}_pre'] for label in response_labels], yerr=pre_errors, capsize=5)
    rects2 = ax.bar(x + width/2, post_values, width, label='Post', color=[colors[f'{label}_post'] for label in response_labels], yerr=post_errors, capsize=5)

    # Add significance stars and print significant t-tests in APA format
    for i, key in enumerate(response_labels):
        if p_values[key] is not None:
            significance = ""
            if p_values[key] < 0.001:
                significance = "***"
                print(f"{title} ({key}): t({dfs[key]}) = {t_stats[key]:.2f}, p < .001")
            elif p_values[key] < 0.01:
                significance = "**"
                print(f"{title} ({key}): t({dfs[key]}) = {t_stats[key]:.2f}, p = {p_values[key]:.3f}")
            elif p_values[key] < 0.05:
                significance = "*"
                print(f"{title} ({key}): t({dfs[key]}) = {t_stats[key]:.2f}, p = {p_values[key]:.3f}")
            if significance:
                ax.text(i, max(pre_values[i] + pre_errors[i], post_values[i] + post_errors[i]) + 0.1,
                        significance, ha='center', va='bottom', fontsize=20, color='black')

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_xlabel('Food Item Rating', fontsize=20, fontweight='bold')
    ax.set_ylabel(ylabel, fontsize=20, fontweight='bold')
    ax.set_title(title, fontsize=22, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(response_labels)

    # Increase font size of tick labels
    ax.tick_params(axis='both', which='major', labelsize=19)

    # Remove grid lines
    ax.grid(False)

    # Set y-axis maximum
    ax.set_ylim([0, 5])

# Create subplots
fig, axes = plt.subplots(3, 2, figsize=(18, 24), constrained_layout=True)

response_labels = ['Disliked', 'Liked']

# Iterate through measures to generate plots
for i, measure in enumerate(measures):
    pre_congruent, post_congruent, pre_incongruent, post_incongruent = process_data(shared_folder_path, measure['pre_file_name'], measure['post_file_name'], response_column, measure['measure_column'])

    avg_pre_c, avg_post_c, std_pre_c, std_post_c, p_values_c, t_stats_c, dfs_c = calculate_statistics(pre_congruent, post_congruent)
    avg_pre_i, avg_post_i, std_pre_i, std_post_i, p_values_i, t_stats_i, dfs_i = calculate_statistics(pre_incongruent, post_incongruent)

    plot_measure_results(axes[i, 0], avg_pre_c, avg_post_c, std_pre_c, std_post_c, p_values_c, t_stats_c, dfs_c, response_labels, measure['ylabel'], f"Congruent-start - {measure['title']}")
    plot_measure_results(axes[i, 1], avg_pre_i, avg_post_i, std_pre_i, std_post_i, p_values_i, t_stats_i, dfs_i, response_labels, measure['ylabel'], f"Incongruent-start - {measure['title']}")

# Custom legend
paired_palette = sns.color_palette("Paired")
custom_legend = [
    plt.Line2D([0], [0], color=paired_palette[8], lw=4),
    plt.Line2D([0], [0], color=paired_palette[9], lw=4),
    plt.Line2D([0], [0], color=paired_palette[6], lw=4),
    plt.Line2D([0], [0], color=paired_palette[7], lw=4),
]

fig.legend(custom_legend, 
           ['Pre-task (Liked)', 'Post-task (Liked)', 'Pre-task (Disliked)', 'Post-task (Disliked)'], 
           fontsize=22, loc='lower center', ncol=4, bbox_to_anchor=(0.5, -0.03))

plt.show()

# Comprehension check

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Access the shared folder
shared_folder_path = ""

# Dictionary to hold DataFrames for each participant
participant_data = {}
participants_without_data = []

# List to collect 'accuracy' data from each participant
accuracy_data = []

# Iterate through each subject folder
for subject_folder in os.listdir(shared_folder_path):
    subject_folder_path = os.path.join(shared_folder_path, subject_folder)

    # Check if the item is a directory (subject folder)
    if os.path.isdir(subject_folder_path):
        # Construct the path to the ratings CSV file (e.g., practice_trials_test.csv)
        task_file_name = "practice_trials_test.csv"
        task_file_path = os.path.join(subject_folder_path, task_file_name)

        # Check if the file exists
        if os.path.exists(task_file_path):
            # Read the task CSV file into a DataFrame
            df = pd.read_csv(task_file_path)

            # Store the DataFrame in the dictionary with the subject ID as the key
            participant_data[subject_folder] = df
            
            # Collect 'accuracy' data
            if 'accuracy' in df.columns:
                accuracies = df['accuracy'].dropna().tolist()
                accuracy_data.extend(accuracies)
            else:
                print(f"'accuracy' column not found in {task_file_name} for participant {subject_folder}")

        else:
            participants_without_data.append(subject_folder)
            print(f"No data found for participant {subject_folder}.")

# Report the number of participants
total_participants = len(participant_data) + len(participants_without_data)
print(f"\nTotal participants processed: {total_participants}")

# Final message based on loading status
if not participants_without_data:
    print("Data loaded successfully for all participants.")
else:
    print("Some participants did not have data loaded:")
    for participant in participants_without_data:
        print(f"- {participant}")

# Summary statistics for 'accuracy' column
if accuracy_data:
    accuracy_series = pd.Series(accuracy_data)
    summary_stats = accuracy_series.describe()
    print("\nSummary Statistics for 'accuracy' column across all participants:")
    print(summary_stats)
    print(f"Range: {accuracy_series.min()} - {accuracy_series.max()}")

    # Count plot of accuracies and printing counts for each category
    plt.figure(figsize=(10, 6))
    count_data = accuracy_series.value_counts().sort_index()
    count_data = count_data.reindex([25, 50, 75, 100], fill_value=0)
    count_data.plot(kind='bar', color='skyblue')
    plt.title('Count Plot of Subject Accuracies')
    plt.xlabel('Accuracy')
    plt.ylabel('Number of Participants')
    plt.xticks(rotation=0)  # Rotate x-ticks for better readability
    plt.tight_layout()
    plt.show()

    # Printing counts for each category
    print("\nCounts for each accuracy category:")
    for accuracy, count in count_data.items():
        print(f"{accuracy}%: {count} participants")

else:
    print("\nNo 'accuracy' data found across all participants.")