In [2]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import re

PIXEL_SIZE = 16

def generate_whitenoise_image():
    return np.random.randint(0, 256, PIXEL_SIZE * PIXEL_SIZE)

def calculate_similarity(vec1, vec2):
    return np.sum((vec1 - vec2) ** 2)

def run_experiment(target_image, num_trials):
    all_images = []
    
    for _ in range(num_trials):
        stimuli = [generate_whitenoise_image() for _ in range(10)]
        similarities = [calculate_similarity(target_image, stim) for stim in stimuli]
        most_similar_index = np.argmin(similarities)
        
        selected_image = stimuli[most_similar_index]
        all_images.append(selected_image)
    
    return all_images

def create_composite_image(images):
    composite = np.mean(images, axis=0)
    return ((composite - composite.min()) / (composite.max() - composite.min()) * 255).astype(np.uint8)

def load_target_image(image_path):
    img = Image.open(image_path).convert('L')  # Convert to grayscale
    img = img.resize((PIXEL_SIZE, PIXEL_SIZE), Image.LANCZOS)
    return np.array(img).flatten()

def process_and_save_images(folder_name, file_pattern, num_trials):
    output_folder = f"composite_images_{num_trials}_trials"
    os.makedirs(output_folder, exist_ok=True)
    for filename in os.listdir(folder_name):
        if filename.endswith('.png'):
            match = re.match(file_pattern, filename)
            if match:
                image_path = os.path.join(folder_name, filename)
                target_image = load_target_image(image_path)
                experiment_results = run_experiment(target_image, num_trials)
                composite = create_composite_image(experiment_results)
                
                output_filename = f"composite_{filename}"
                output_path = os.path.join(output_folder, output_filename)
                Image.fromarray(composite.reshape((PIXEL_SIZE, PIXEL_SIZE))).save(output_path)
                
                print(f"Saved composite for {image_path} with {num_trials} trials")

# Run the experiment for 50, 100, and 200 trials
for num_trials in [50, 75, 100, 200, 400, 800, 2000]:
    print(f"\nRunning experiment with {num_trials} trials:")
    
    # Process original digits
    process_and_save_images("original_digit_images", r"digit_(\d+)\.png", num_trials)

    # Process cursive digits
    process_and_save_images("cursive_digit_images", r"digit_(\d+)_cursive\.png", num_trials)

    # Process rotated digits with any angle
    process_and_save_images("rotated_digit_images", r"digit_(\d+)_angle_(\d+)\.png", num_trials)

print("\nAll composites have been generated and saved.")


Running experiment with 50 trials:
Saved composite for original_digit_images\digit_0.png with 50 trials
Saved composite for original_digit_images\digit_1.png with 50 trials
Saved composite for original_digit_images\digit_2.png with 50 trials
Saved composite for original_digit_images\digit_3.png with 50 trials
Saved composite for original_digit_images\digit_4.png with 50 trials
Saved composite for original_digit_images\digit_5.png with 50 trials
Saved composite for original_digit_images\digit_6.png with 50 trials
Saved composite for original_digit_images\digit_7.png with 50 trials
Saved composite for original_digit_images\digit_8.png with 50 trials
Saved composite for original_digit_images\digit_9.png with 50 trials
Saved composite for cursive_digit_images\digit_0_cursive.png with 50 trials
Saved composite for cursive_digit_images\digit_1_cursive.png with 50 trials
Saved composite for cursive_digit_images\digit_2_cursive.png with 50 trials
Saved composite for cursive_digit_images\digit

the model is woefully underperforming, perhaps we can investigate which observers is giving it a hard time.

In [24]:
import json
import numpy as np

class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

In [27]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import re
import zipfile
import json

PIXEL_SIZE = 16
NUM_PARTICIPANTS = 100
TRIALS = [50, 75, 100, 200, 400, 800, 1600]
training_data_folder = 'training_data'

def generate_whitenoise_image():
    return np.random.randint(0, 256, PIXEL_SIZE * PIXEL_SIZE)

def calculate_similarity(vec1, vec2):
    return np.sum((vec1 - vec2) ** 2)

def run_experiment(target_image, num_trials, trial_num, observer_type, true_digit):
    all_images = []
    selections = []
    
    for trial in range(num_trials):
        stimuli = [generate_whitenoise_image() for _ in range(10)]
        similarities = [calculate_similarity(target_image, stim) for stim in stimuli]
        most_similar_index = np.argmin(similarities)
        
        selected_image = stimuli[most_similar_index]
        all_images.append(selected_image)
        selections.append({
            'trial_number': trial + 1,
            'true_digit': true_digit,
            'observer_type': observer_type,
            'selected_index': most_similar_index
        })
    
    return all_images, selections

def create_composite_image(images):
    composite = np.mean(images, axis=0)
    return ((composite - composite.min()) / (composite.max() - composite.min()) * 255).astype(np.uint8)

def load_target_image(image_path):
    img = Image.open(image_path).convert('L')  # Convert to grayscale
    img = img.resize((PIXEL_SIZE, PIXEL_SIZE), Image.LANCZOS)
    return np.array(img).flatten()

def process_and_save_images(folder_name, file_pattern, num_trials, participant_id, training_data, test_data):
    for filename in os.listdir(folder_name):
        if filename.endswith('.png'):
            match = re.match(file_pattern, filename)
            if match:
                image_path = os.path.join(folder_name, filename)
                target_image = load_target_image(image_path)
                true_digit = match.group(1)
                observer_type = 'original' if 'original' in folder_name else 'cursive' if 'cursive' in folder_name else 'rotated'
                
                experiment_results, selections = run_experiment(target_image, num_trials, participant_id, observer_type, true_digit)
                composite = create_composite_image(experiment_results)
                
                # Save selections to training data
                training_data.append(selections)
                
                # Save composite to test data
                test_data.append({
                    'participant_id': participant_id,
                    'num_trials': num_trials,
                    'true_digit': true_digit,
                    'observer_type': observer_type,
                    'composite': composite.tolist()
                })

def main():
    np.random.seed(42)  # Set random seed for reproducibility
        
    for participant in range(NUM_PARTICIPANTS):
        print(f"Processing participant {participant + 1}/{NUM_PARTICIPANTS}")
        
        training_data = []
        test_data = []
        
        for num_trials in TRIALS:
            process_and_save_images("original_digit_images", r"digit_(\d+)\.png", num_trials, participant, training_data, test_data)
            process_and_save_images("cursive_digit_images", r"digit_(\d+)_cursive\.png", num_trials, participant, training_data, test_data)
            process_and_save_images("rotated_digit_images", r"digit_(\d+)_angle_(\d+)\.png", num_trials, participant, training_data, test_data)
        
        # Save training data
        with open(os.path.join(training_data_folder, 'training_set', f'participant_{participant}.json'), 'w') as f:
            json.dump(training_data, f, cls=NumpyEncoder)
        
        # Save test data
        with open(os.path.join(training_data_folder, 'test_set', f'participant_{participant}.json'), 'w') as f:
            json.dump(test_data, f, cls=NumpyEncoder)
    
    # Create zip file
    with zipfile.ZipFile('training_data.zip', 'w') as zipf:
        for root, dirs, files in os.walk(training_data_folder):
            for file in files:
                zipf.write(os.path.join(root, file), 
                           os.path.relpath(os.path.join(root, file), 
                                           os.path.join(training_data_folder, '..')))
    
    print("All data has been generated and saved in training_data.zip")

if __name__ == "__main__":
    main()

Processing participant 1/100
Processing participant 2/100
Processing participant 3/100
Processing participant 4/100
Processing participant 5/100
Processing participant 6/100
Processing participant 7/100
Processing participant 8/100
Processing participant 9/100
Processing participant 10/100
Processing participant 11/100
Processing participant 12/100
Processing participant 13/100
Processing participant 14/100
Processing participant 15/100
Processing participant 16/100
Processing participant 17/100
Processing participant 18/100
Processing participant 19/100
Processing participant 20/100
Processing participant 21/100
Processing participant 22/100
Processing participant 23/100
Processing participant 24/100
Processing participant 25/100
Processing participant 26/100
Processing participant 27/100
Processing participant 28/100
Processing participant 29/100
Processing participant 30/100
Processing participant 31/100
Processing participant 32/100
Processing participant 33/100
Processing particip

We made a critical mistake and forgot to actually save the training images we just saved the index :(

In [28]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import re
import zipfile
import json
import base64
import io

PIXEL_SIZE = 16
NUM_PARTICIPANTS = 100
TRIALS = [50, 75, 100, 200, 400, 800, 1000]
training_data_folder = 'training_data'

def generate_whitenoise_image():
    return np.random.randint(0, 256, PIXEL_SIZE * PIXEL_SIZE)

def calculate_similarity(vec1, vec2):
    return np.sum((vec1 - vec2) ** 2)

def image_to_base64(image):
    pil_img = Image.fromarray(image.reshape(PIXEL_SIZE, PIXEL_SIZE).astype(np.uint8), 'L')
    buffered = io.BytesIO()
    pil_img.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode()

def run_experiment(target_image, num_trials, trial_num, observer_type, true_digit):
    all_images = []
    selections = []
    
    for trial in range(num_trials):
        stimuli = [generate_whitenoise_image() for _ in range(10)]
        similarities = [calculate_similarity(target_image, stim) for stim in stimuli]
        most_similar_index = np.argmin(similarities)
        
        selected_image = stimuli[most_similar_index]
        all_images.append(selected_image)
        selections.append({
            'trial_number': trial + 1,
            'true_digit': true_digit,
            'observer_type': observer_type,
            'selected_index': most_similar_index,
            'selected_image': image_to_base64(selected_image)
        })
    
    return all_images, selections

def create_composite_image(images):
    composite = np.mean(images, axis=0)
    return ((composite - composite.min()) / (composite.max() - composite.min()) * 255).astype(np.uint8)

def load_target_image(image_path):
    img = Image.open(image_path).convert('L')  # Convert to grayscale
    img = img.resize((PIXEL_SIZE, PIXEL_SIZE), Image.LANCZOS)
    return np.array(img).flatten()

def process_and_save_images(folder_name, file_pattern, num_trials, participant_id, training_data, test_data):
    for filename in os.listdir(folder_name):
        if filename.endswith('.png'):
            match = re.match(file_pattern, filename)
            if match:
                image_path = os.path.join(folder_name, filename)
                target_image = load_target_image(image_path)
                true_digit = match.group(1)
                observer_type = 'original' if 'original' in folder_name else 'cursive' if 'cursive' in folder_name else 'rotated'
                
                experiment_results, selections = run_experiment(target_image, num_trials, participant_id, observer_type, true_digit)
                composite = create_composite_image(experiment_results)
                
                # Save selections to training data
                training_data.append(selections)
                
                # Save composite to test data
                test_data.append({
                    'participant_id': participant_id,
                    'num_trials': num_trials,
                    'true_digit': true_digit,
                    'observer_type': observer_type,
                    'composite': image_to_base64(composite)
                })

class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

def main():
    np.random.seed(42)  # Set random seed for reproducibility
    
    os.makedirs(os.path.join(training_data_folder, 'training_set'), exist_ok=True)
    os.makedirs(os.path.join(training_data_folder, 'test_set'), exist_ok=True)
    
    for participant in range(NUM_PARTICIPANTS):
        print(f"Processing participant {participant + 1}/{NUM_PARTICIPANTS}")
        
        training_data = []
        test_data = []
        
        for num_trials in TRIALS:
            process_and_save_images("original_digit_images", r"digit_(\d+)\.png", num_trials, participant, training_data, test_data)
            process_and_save_images("cursive_digit_images", r"digit_(\d+)_cursive\.png", num_trials, participant, training_data, test_data)
            process_and_save_images("rotated_digit_images", r"digit_(\d+)_angle_(\d+)\.png", num_trials, participant, training_data, test_data)
        
        # Save training data
        with open(os.path.join(training_data_folder, 'training_set', f'participant_{participant}.json'), 'w') as f:
            json.dump(training_data, f, cls=NumpyEncoder)
        
        # Save test data
        with open(os.path.join(training_data_folder, 'test_set', f'participant_{participant}.json'), 'w') as f:
            json.dump(test_data, f, cls=NumpyEncoder)
    
    # Create zip file
    with zipfile.ZipFile('training_data.zip', 'w') as zipf:
        for root, dirs, files in os.walk(training_data_folder):
            for file in files:
                zipf.write(os.path.join(root, file), 
                           os.path.relpath(os.path.join(root, file), 
                                           os.path.join(training_data_folder, '..')))
    
    print("All data has been generated and saved in training_data.zip")

if __name__ == "__main__":
    main()

Processing participant 1/100
Processing participant 2/100
Processing participant 3/100
Processing participant 4/100
Processing participant 5/100
Processing participant 6/100
Processing participant 7/100
Processing participant 8/100
Processing participant 9/100
Processing participant 10/100
Processing participant 11/100
Processing participant 12/100
Processing participant 13/100
Processing participant 14/100
Processing participant 15/100
Processing participant 16/100
Processing participant 17/100
Processing participant 18/100
Processing participant 19/100
Processing participant 20/100
Processing participant 21/100
Processing participant 22/100
Processing participant 23/100
Processing participant 24/100
Processing participant 25/100
Processing participant 26/100
Processing participant 27/100
Processing participant 28/100
Processing participant 29/100
Processing participant 30/100
Processing participant 31/100
Processing participant 32/100
Processing participant 33/100
Processing particip