### File preparation
In this script I am subsetting the experiment participants to load to "server inputs", because I can't upload the trials for all N = 450 Participants in one go.

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import os
import shutil
import re

In [3]:
with open('/Users/born/Downloads/Triplet files 2/imagenet2012_human_eval_v2.1_category_stratified_triplets.json', 'r') as f:
    data = json.load(f)

### First, I am adding the triplet names (Path to triplet) to the file names that were provided by TU.

Then I am saving the trials to a CSV format, because the server does not accept .json

In [3]:
rows = []

for participant_id, image_triplets in data.items():
    trial_counter = 0  # Reset trial counter for each participant
    for image1, image2, image3 in image_triplets:
        # Add prefix to image paths
        image1 = "stimuli/imagenet2012_human_eval_v2.1_category_stratified_triplets/" + image1
        image2 = "stimuli/imagenet2012_human_eval_v2.1_category_stratified_triplets/" + image2
        image3 = "stimuli/imagenet2012_human_eval_v2.1_category_stratified_triplets/" + image3
        
        # Extract image names
        image1_name = image1.split('/')[-1]
        image2_name = image2.split('/')[-1]
        image3_name = image3.split('/')[-1]
        
        # Extract numbers
        image1_number = image1_name.split('_')[0][1:]
        image2_number = image2_name.split('_')[0][1:]
        image3_number = image3_name.split('_')[0][1:]
        
        # Determine triplet class
        if image1_number == image2_number == image3_number:
            triplet_class = 'within_class'
        elif image1_number == image2_number or image1_number == image3_number or image2_number == image3_number:
            triplet_class = 'class_boundary'
        else:
            triplet_class = 'between_class'
        
        rows.append({
            'id': participant_id,
            'image1': image1,
            'image2': image2,
            'image3': image3,
            'trial': trial_counter,  # Add trial counter to each row
            'triplet_class': triplet_class  # Add triplet class
        })
        trial_counter += 1  # Increment counter for next row

df = pd.DataFrame(rows)
print(df)


# Save only the first 200 rows to a CSV file
df.to_csv('triplets_experiment.csv', index=False)

                     id                                             image1  \
0         participant_0  stimuli/imagenet2012_human_eval_v2.1_category_...   
1         participant_0  stimuli/imagenet2012_human_eval_v2.1_category_...   
2         participant_0  stimuli/imagenet2012_human_eval_v2.1_category_...   
3         participant_0  stimuli/imagenet2012_human_eval_v2.1_category_...   
4         participant_0  stimuli/imagenet2012_human_eval_v2.1_category_...   
...                 ...                                                ...   
148495  participant_449  stimuli/imagenet2012_human_eval_v2.1_category_...   
148496  participant_449  stimuli/imagenet2012_human_eval_v2.1_category_...   
148497  participant_449  stimuli/imagenet2012_human_eval_v2.1_category_...   
148498  participant_449  stimuli/imagenet2012_human_eval_v2.1_category_...   
148499  participant_449  stimuli/imagenet2012_human_eval_v2.1_category_...   

                                                   image2  \
0 

### Here I am adding alternatives to subset participants in batches of 50 participants:

For the first 50 participants 

In [None]:
# Prepare an empty list to store rows
rows = []

# Iterate over participant data for the first 100 participants (participant_0 - participant_99)
for participant_id, image_triplets in data.items():
    # Check if participant_id is within the first 100 participants
    if int(participant_id.split('_')[-1]) < 50:
        trial_counter = 0  # Reset trial counter for each participant
        for image1, image2, image3 in image_triplets:
            # Add prefix to image paths
            image1 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image1
            image2 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image2
            image3 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image3

            # Extract image names
            image1_name = image1.split('/')[-1]
            image2_name = image2.split('/')[-1]
            image3_name = image3.split('/')[-1]

            # Extract numbers
            image1_number = image1_name.split('_')[0][1:]
            image2_number = image2_name.split('_')[0][1:]
            image3_number = image3_name.split('_')[0][1:]

            # Determine triplet class
            if image1_number == image2_number == image3_number:
                triplet_class = 'within_class'
            elif image1_number == image2_number or image1_number == image3_number or image2_number == image3_number:
                triplet_class = 'class_boundary'
            else:
                triplet_class = 'between_class'

            rows.append({
                'id': participant_id,
                'image1': image1,
                'image2': image2,
                'image3': image3,
                'trial': trial_counter,  # Add trial counter to each row
                'triplet_class': triplet_class  # Add triplet class
            })
            trial_counter += 1  # Increment counter for next row

# Create DataFrame for the first 100 participants and save it to a CSV file
df = pd.DataFrame(rows)
df.to_csv('triplets_experiment_first_50_participants.csv', index=False)

For the next 50 (50-100)

In [15]:
# Prepare an empty list to store rows
rows = []

# Iterate over participant data for participants with IDs ranging from 100 to 199
for participant_id, image_triplets in data.items():
    # Check if participant_id is within the range of 100 to 199
    if 351 <= int(participant_id.split('_')[-1]) < 451:
        trial_counter = 0  # Reset trial counter for each participant
        for image1, image2, image3 in image_triplets:
            # Add prefix to image paths
            image1 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image1
            image2 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image2
            image3 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image3

            # Extract image names
            image1_name = image1.split('/')[-1]
            image2_name = image2.split('/')[-1]
            image3_name = image3.split('/')[-1]

            # Extract numbers
            image1_number = image1_name.split('_')[0][1:]
            image2_number = image2_name.split('_')[0][1:]
            image3_number = image3_name.split('_')[0][1:]

            # Determine triplet class
            if image1_number == image2_number == image3_number:
                triplet_class = 'within_class'
            elif image1_number == image2_number or image1_number == image3_number or image2_number == image3_number:
                triplet_class = 'class_boundary'
            else:
                triplet_class = 'between_class'

            rows.append({
                'id': participant_id,
                'image1': image1,
                'image2': image2,
                'image3': image3,
                'trial': trial_counter,  # Add trial counter to each row
                'triplet_class': triplet_class  # Add triplet class
            })
            trial_counter += 1  # Increment counter for next row

# Create DataFrame for participants with IDs ranging from 100 to 199 and save it to a CSV file
df = pd.DataFrame(rows)
df.to_csv('triplets_experiment_participants_251_to_450.csv', index=False)

#### Here I am specifically extracting the trials for the participants that miss data to start a recollection round and use ALL trials initailly provided by the Team

In [9]:
# List of missing participants
missing_participants = [46, 278, 372, 417]

# Prepare an empty list to store rows
rows = []

# Iterate over participant data for participants within the missing participants list
for participant_id, image_triplets in data.items():
    participant_number = int(participant_id.split('_')[-1])  # Extract the numerical part of participant_id
    if participant_number in missing_participants:  # Check if this participant is one of the missing
        trial_counter = 0  # Reset trial counter for each participant
        for image1, image2, image3 in image_triplets:
            # Add prefix to image paths
            image1 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image1
            image2 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image2
            image3 = "stimuli/imagenet2012_human_eval_v2.1_images/" + image3

            # Extract image names
            image1_name = image1.split('/')[-1]
            image2_name = image2.split('/')[-1]
            image3_name = image3.split('/')[-1]

            # Extract numbers
            image1_number = image1_name.split('_')[0][1:]
            image2_number = image2_name.split('_')[0][1:]
            image3_number = image3_name.split('_')[0][1:]

            # Determine triplet class
            if image1_number == image2_number == image3_number:
                triplet_class = 'within_class'
            elif image1_number == image2_number or image1_number == image3_number or image2_number == image3_number:
                triplet_class = 'class_boundary'
            else:
                triplet_class = 'between_class'

            # Append each row to the list
            rows.append({
                'id': participant_id,
                'image1': image1,
                'image2': image2,
                'image3': image3,
                'trial': trial_counter,  # Add trial counter to each row
                'triplet_class': triplet_class  # Add triplet class
            })
            trial_counter += 1  # Increment counter for next row

# Create DataFrame from rows and save it to a CSV file
df = pd.DataFrame(rows)
df.to_csv('recollecting_participants_triplets.csv', index=False)
