In [None]:
# REMOVE REWARD RATE TRIALS AND RE-SET TRIAL NUMBERS AND BLOCK NUMBERS

import os
import pandas as pd

# Define the input and output folder paths
input_folder = "/Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules"
output_folder = "/Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Iterate through all CSV files in the input folder
for file_name in os.listdir(input_folder):
    if file_name.endswith(".csv"):
        # Construct full file paths
        input_file_path = os.path.join(input_folder, file_name)
        output_file_path = os.path.join(output_folder, file_name)

        # Load the CSV file
        df = pd.read_csv(input_file_path)

        # Remove rows where 'attention_focus' column is 'reward'
        df_filtered = df[df['attention_focus'] != 'reward'].reset_index(drop=True)

        # Re-set the 'trial_in_experiment' column to range from 1 to the number of remaining trials
        df_filtered['trial_in_experiment'] = range(1, len(df_filtered) + 1)

        # Re-set the 'block_number' column based on the new trial_in_experiment
        df_filtered['block_number'] = ((df_filtered['trial_in_experiment'] - 1) // 50) + 1

        # Save the filtered DataFrame to the output folder
        df_filtered.to_csv(output_file_path, index=False)

print("Processing complete. Filtered files are saved in:", output_folder)


Processing complete. Filtered files are saved in: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate


In [7]:
# REMOVE DUPLIACTE TRIAL SCHEDULES (no more counter-balancing attention_focus necessary)

import hashlib

# Define the input and output folder paths
input_folder = "/Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate"

# Function to calculate a hash of the DataFrame content excluding 'schedule_version'
def calculate_hash(df):
    # Drop the 'schedule_version' column if it exists
    if 'schedule_version' in df.columns:
        df = df.drop(columns=['schedule_version'])
    # Convert the DataFrame to a string representation and calculate the hash
    return hashlib.md5(pd.util.hash_pandas_object(df, index=False).values).hexdigest()

# Track unique hashes and their associated files
unique_hashes = {}
files_to_remove = []

# Iterate through all CSV files in the folder
for file_name in os.listdir(input_folder):
    if file_name.endswith(".csv"):
        file_path = os.path.join(input_folder, file_name)

        # Load the CSV file
        df = pd.read_csv(file_path)

        # Calculate the hash of the DataFrame content
        file_hash = calculate_hash(df)

        # Check if this hash is already encountered
        if file_hash in unique_hashes:
            # This file is a duplicate; mark it for deletion
            files_to_remove.append(file_path)
        else:
            # This hash is unique; store it with the file name
            unique_hashes[file_hash] = file_name

# Delete duplicate files
for file_path in files_to_remove:
    os.remove(file_path)
    print(f"Deleted duplicate file: {file_path}")

print("Duplicate file removal complete.")


Deleted duplicate file: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate/schedule_A_6.csv
Deleted duplicate file: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate/schedule_A_5.csv
Deleted duplicate file: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate/schedule_A_3.csv
Deleted duplicate file: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate/schedule_A_2.csv
Deleted duplicate file: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate/schedule_B_6.csv
Deleted duplicate file: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate/schedule_B_5.csv
Deleted duplicate file: /Users/majafriedemann/Documents/GitHub/reward-effort-task/final_trial_schedules_without_reward_rate/schedule_B_2.csv
Deleted dupli