#### Rename Patterns/Attributes

In [4]:
import os
from PIL import Image
import shutil

patterns_folder = 'Attributes'
output_folder = 'Renamed_Patterns2'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for filename in os.listdir(patterns_folder):
    if filename.lower().endswith(('png', 'jpg', 'jpeg', 'dae')):
        new_filename = filename.replace('with_', '').replace('in_', '')
        old_file_path = os.path.join(patterns_folder, filename)
        new_file_path = os.path.join(output_folder, new_filename)
        
        if filename.lower().endswith(('png', 'jpg', 'jpeg')):
            # Process image files with PIL
            img = Image.open(old_file_path)
            img.save(new_file_path)
        elif filename.lower().endswith('dae'):
            # Copy .dae files directly
            shutil.copy2(old_file_path, new_file_path)

#### Spam some Number of the Samples

In [47]:
import random

def random_size():
    return random.choice(['small', 'big'])

def random_color():
    return random.choice(['red', 'green', 'blue'])


#### Create Celebrities.json

In [5]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .jpg files from the specified folder (changed from .dae to .jpg per your code)
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.dae')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "ten")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Generate all gates first
        color_list = colors.copy()  # Fresh copy for each sample
        for j in range(1, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        # Use the properties of the correct gate for prompt_simpler
        correct_gate = gates[str(correct_index)]
        prompt = f"Fly through the gate with {correct_number_word}".replace('_', ' ')
        prompt_simpler = f"Fly through the {correct_gate['size']} {correct_gate['shape']} {correct_gate['color']} gate with {correct_number_word}".replace('_', ' ')
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Celebrities",  # Folder containing .jpg files
    "Celebrities.json",
    500,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)

#### Create Patterns.json

In [6]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .jpg files from the specified folder (changed from .dae to .jpg per your code)
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.dae')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "ten")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Generate all gates first
        color_list = colors.copy()  # Fresh copy for each sample
        for j in range(1, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        # Use the properties of the correct gate for prompt_simpler
        correct_gate = gates[str(correct_index)]
        prompt = f"Fly through the gate with a person with {correct_number_word}".replace('_', ' ')
        prompt_simpler = f"Fly through the {correct_gate['size']} {correct_gate['shape']} {correct_gate['color']} gate with a person with {correct_number_word}".replace('_', ' ')
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Patterns",  # Folder containing .jpg files
    "Patterns.json",
    500,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)