#### Create .json with dishes

In [25]:
import os
import random
import json

def create_food_dataset(healthy_folder, unhealthy_folder, output_file):
    healthy_images = [img for img in os.listdir(healthy_folder) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    unhealthy_images = [img for img in os.listdir(unhealthy_folder) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []

    for img in healthy_images:
        name = os.path.splitext(img)[0].replace('_', ' ')  # Extract the name without extension and replace underscores with spaces
        prompt = f"Fly to the gate with the healthy food."

        # Select two random unhealthy images
        choices = [os.path.join('/', healthy_folder.replace('\\', '/'), img)] + random.sample([os.path.join('/', unhealthy_folder.replace('\\', '/'), i) for i in unhealthy_images], 2)
        random.shuffle(choices)
        correct_index = choices.index(os.path.join('/', healthy_folder.replace('\\', '/'), img)) + 1  # Indexing starts from 1

        # Replace backslashes with forward slashes in the choices
        choices = [choice.replace('\\', '/') for choice in choices]

        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index
        })

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

create_food_dataset('Food/Healthy', 'Food/Unhealthy', "healthy_food_dataset.json")

#### Create .json with Animals

In [26]:
import os
import json
import random

def create_dataset(folder_path, output_file):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for img in images:
        name = os.path.splitext(img)[0].replace('_', ' ')  # Replace underscores with spaces
        prompt = f"Fly through the gate with a {name}"
        
        # Choose two random names that are not the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

create_dataset("Symbols/Animals", "animals_dataset.json")

#### Create .json with the Symbols


In [27]:
import os
import json
import random

def create_dataset(folder_path, output_file):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for img in images:
        name = os.path.splitext(img)[0].replace('_', ' ')  # Replace underscores with spaces
        prompt = f"Fly through the gate with {name}"
        
        # Choose two random names that are not the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

create_dataset("Symbols/Letters", "letters_dataset.json")

In [28]:
import os
import random
import json

def create_dataset_with_multiple_gates(output_file, num_samples, num_gates, different_colors=True, size_option='both'):
    # Fixed list of image options for simplicity (you can adjust this as needed)
    image_options = ["one.jpg", "two.jpg", "three.jpg", "four.jpg", "five.jpg", "six.jpg", "seven.jpg", "eight.jpg", "nine.jpg", "ten.jpg"]
    dataset = []
    
    for i in range(1, num_samples + 1):  # Start from 1 for "test 1", "test 2", etc.
        prompt = f"test {i}"
        
        # Randomly shuffle the options
        choices = image_options.copy()
        random.shuffle(choices)
        correct_index = choices.index("ten.jpg") + 1  # Assuming "ten.jpg" is always the correct one
        
        gates = {}
        colors = ['red', 'green', 'blue']
        for j in range(1, num_gates + 1):
            if different_colors:
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random_color()  # You'll need to define this function if using
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:
                size = random_size()  # You'll need to define this function if using
            
            gates[str(j)] = {  # Use string numbers "1", "2", "3" instead of "gate_1"
                "size": size,
                "shape": "round",  # Hardcoded as "round" per your example
                "color": color,
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"  # Renamed from "texture" and hardcoded as "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Define these functions if you want random colors or sizes
def random_color():
    return random.choice(['red', 'green', 'blue'])  # Example implementation

def random_size():
    return random.choice(['big', 'small'])  # Example implementation

# Example usage
create_dataset_with_multiple_gates("new_dataset2.json", 2, 3, different_colors=True, size_option='big')

#### Create .json with the Digits

In [54]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read image files from the specified folder
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    if len(images) < 3:  # Ensure there are enough images for options
        raise ValueError("Folder must contain at least 3 image files.")
    
    dataset = []
    
    for i in range(1, num_samples + 1):  # Start from 1 for "test 1", "test 2", etc.
        prompt = f"test {i}"
        
        # Choose a random image as the correct one and two others as distractors
        correct_img = random.choice(images)
        distractors = random.sample([img for img in images if img != correct_img], 2)
        choices = [correct_img] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_img) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        shapes = ['square', 'round', 'triangle']  # Available shape options
        
        for j in range(1, num_gates + 1):
            if different_colors:
                if not colors:  # Reset colors if we run out
                    colors = ['red', 'green', 'blue']
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random_color()  # Define this if using
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:
                size = random_size()  # Define this if using
            
            # Handle shape selection
            if shape_option == 'square':
                shape = 'square'
            elif shape_option == 'round':
                shape = 'round'
            elif shape_option == 'triangle':
                shape = 'triangle'
            else:  # 'all' or any other value defaults to random choice
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Define these functions if you want random colors or sizes
def random_color():
    return random.choice(['red', 'green', 'blue'])  # Example implementation

def random_size():
    return random.choice(['big', 'small'])  # Example implementation

# Example usage
create_dataset_with_multiple_gates(
    "Symbols/Animals", 
    "letterrs_dataset.json", 
    500, 
    3, 
    different_colors=True, 
    size_option='big', #both
    shape_option='all'  # Can be 'square', 'round', 'triangle', or 'all'
)

In [55]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read image files from the specified folder
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    if len(images) < 3:  # Ensure there are enough images for options
        raise ValueError("Folder must contain at least 3 image files.")
    
    dataset = []
    
    for i in range(1, num_samples + 1):  # Start from 1 for "test 1", "test 2", etc.
        prompt = f"test {i}"
        
        # Choose a random image as the correct one and two others as distractors
        correct_img = random.choice(images)
        distractors = random.sample([img for img in images if img != correct_img], 2)
        choices = [correct_img] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_img) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        shapes = ['square', 'round', 'triangle']  # Available shape options
        
        for j in range(1, num_gates + 1):
            if different_colors:
                if not colors:  # Reset colors if we run out
                    colors = ['red', 'green', 'blue']
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random_color()  # Define this if using
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:
                size = random_size()  # Define this if using
            
            # Handle shape selection
            if shape_option == 'square':
                shape = 'square'
            elif shape_option == 'round':
                shape = 'round'
            elif shape_option == 'triangle':
                shape = 'triangle'
            else:  # 'all' or any other value defaults to random choice
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Define these functions if you want random colors or sizes
def random_color():
    return random.choice(['red', 'green', 'blue', 'yellow'])  # Example implementation

def random_size():
    return random.choice(['big', 'small'])  # Example implementation

# Example usage
create_dataset_with_multiple_gates(
    "Digits", 
    "new_dataset.json", 
    2, 
    3, 
    different_colors=True, 
    size_option='big', 
    shape_option='all'  # Can be 'square', 'round', 'triangle', or 'all'
)

In [56]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .dae files from the specified folder
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.jpg')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    # Extract number words from filenames (assuming filenames like "one.dae", "two.dae", etc.)
    number_words = [f.split('.')[0] for f in dae_files]  # e.g., "one" from "one.dae"
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "one")
        prompt = f"Fly through the gate with a number {correct_number_word}"
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        shapes = ['square', 'round', 'triangle']
        
        for j in range(1, num_gates + 1):
            # Color selection
            if different_colors:
                if not colors:  # Reset colors if we run out
                    colors = ['red', 'green', 'blue']
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random.choice(['red', 'green', 'blue'])
            
            # Size selection
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            # Shape selection
            if shape_option in ['square', 'round', 'triangle']:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Symbols/Letters",  # Replace with your folder containing .dae files
    "numbers_dataset.json",
    5,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)

In [57]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .dae files from the specified folder
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.jpg')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "four")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        shapes = ['square', 'round', 'triangle']
        
        # Define the correct gate's properties (e.g., gate "1" will be the "correct" gate)
        if size_option == 'big':
            size = 'big'
        elif size_option == 'small':
            size = 'small'
        else:  # 'both'
            size = random.choice(['big', 'small'])
        
        if shape_option in ['square', 'round', 'triangle']:
            shape = shape_option
        else:  # 'all'
            shape = random.choice(shapes)
        
        if different_colors and colors:
            color = colors.pop(random.randint(0, len(colors) - 1))
        else:
            color = random.choice(['red', 'green', 'blue'])
        
        # Create the prompt using the correct gate's properties
        prompt = f"Fly through the {size} {shape} {color} gate with number {correct_number_word}"
        
        # Assign the correct gate (gate "1") with the prompt's properties
        gates["1"] = {
            "size": size,
            "shape": shape,
            "color": color,
        }
        
        # Generate the remaining gates
        for j in range(2, num_gates + 1):
            if different_colors:
                if not colors:  # Reset colors if we run out
                    colors = ['red', 'green', 'blue']
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random.choice(['red', 'green', 'blue'])
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in ['square', 'round', 'triangle']:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        dataset.append({
            "prompt_auto": prompt,  # Changed key to "prompt_auto" as requested
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Symbols/Letters",  # Replace with your folder containing .dae files
    "numbers_dataset2.json",
    5,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)

In [None]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .dae files from the specified folder
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.jpg')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "four")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Define gate "1" properties (used for prompt_simpler)
        if size_option == 'big':
            size = 'big'
        elif size_option == 'small':
            size = 'small'
        else:  # 'both'
            size = random.choice(['big', 'small'])
        
        if shape_option in shapes:
            shape = shape_option  # Use specified shape if provided
        else:  # 'all'
            shape = random.choice(shapes)
        
        if different_colors:
            color_list = colors.copy()  # Use a fresh copy to avoid depletion issues
            color = random.choice(color_list)
            color_list.remove(color)  # Remove used color for uniqueness
        else:
            color = random.choice(colors)
        
        # Create both prompts using gate "1" properties
        prompt = f"Fly through the gate with number {correct_number_word}"
        prompt_simpler = f"Fly through the {size} {shape} {color} gate with number {correct_number_word}"
        
        # Assign gate "1" with the prompt_simpler properties
        gates["1"] = {
            "size": size,
            "shape": shape,
            "color": color,
        }
        
        # Generate the remaining gates
        for j in range(2, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Digits",  # Replace with your folder containing .dae files
    "numbers_dataset3.json",
    500,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'  # Can set to 'square', 'round', or 'triangle' for fixed shape
)

#### 26.02 Create .json with the Letters

In [62]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .jpg files from the specified folder (changed from .dae to .jpg per your code)
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.dae')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "ten")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Generate all gates first
        color_list = colors.copy()  # Fresh copy for each sample
        for j in range(1, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        # Use the properties of the correct gate for prompt_simpler
        correct_gate = gates[str(correct_index)]
        prompt = f"Fly through the gate with a letter {correct_number_word}"
        prompt_simpler = f"Fly through the {correct_gate['size']} {correct_gate['shape']} {correct_gate['color']} gate with a letter {correct_number_word}"
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Symbols/Letters",  # Folder containing .jpg files
    "Letters.json",
    500,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)

#### 26.02 Create .json with the Animals

In [60]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .jpg files from the specified folder (changed from .dae to .jpg per your code)
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.dae')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "ten")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Generate all gates first
        color_list = colors.copy()  # Fresh copy for each sample
        for j in range(1, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        # Use the properties of the correct gate for prompt_simpler
        correct_gate = gates[str(correct_index)]
        prompt = f"Fly through the gate with a {correct_number_word}"
        prompt_simpler = f"Fly through the {correct_gate['size']} {correct_gate['shape']} {correct_gate['color']} gate with a {correct_number_word}"
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Symbols/Animals",  # Folder containing .jpg files
    "Animals.json",
    500,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)

#### 26.02 Create .json with the Logos

In [61]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .jpg files from the specified folder (changed from .dae to .jpg per your code)
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.dae')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "ten")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Generate all gates first
        color_list = colors.copy()  # Fresh copy for each sample
        for j in range(1, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        # Use the properties of the correct gate for prompt_simpler
        correct_gate = gates[str(correct_index)]
        prompt = f"Fly through the gate with {correct_number_word}"
        prompt_simpler = f"Fly through the {correct_gate['size']} {correct_gate['shape']} {correct_gate['color']} gate with {correct_number_word}"
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Symbols/Logos",  # Folder containing .jpg files
    "Logos.json",
    500,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)

#### 26.02 Create .json with the Digits


In [2]:
import os
import random
import json

def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', shape_option='all'):
    # Read .jpg files from the specified folder (changed from .dae to .jpg per your code)
    dae_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.dae')]
    if len(dae_files) < 3:  # Ensure there are enough files for options
        raise ValueError("Folder must contain at least 3 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select the correct file and its number word
        correct_file = random.choice(dae_files)
        correct_number_word = correct_file.split('.')[0]  # Extract the number word (e.g., "ten")
        
        # Choose two distractors (different from the correct one)
        distractors = random.sample([f for f in dae_files if f != correct_file], 2)
        choices = [correct_file] + distractors
        random.shuffle(choices)
        correct_index = choices.index(correct_file) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Generate all gates first
        color_list = colors.copy()  # Fresh copy for each sample
        for j in range(1, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        # Use the properties of the correct gate for prompt_simpler
        correct_gate = gates[str(correct_index)]
        prompt = f"Fly through the gate with {correct_number_word}"
        prompt_simpler = f"Fly through the {correct_gate['size']} {correct_gate['shape']} {correct_gate['color']} gate with {correct_number_word}"
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates(
    "Digits",  # Folder containing .jpg files
    "Digits.json",
    500,  # Set to 500 for full dataset
    3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)

#### Healthy Food

In [4]:
import os
import random
import json

def create_food_gate_dataset(healthy_folder, unhealthy_folder, output_file, num_samples, num_gates=3, different_colors=True, size_option='both', shape_option='all'):
    # Read .dae files from both folders
    healthy_files = [f for f in os.listdir(healthy_folder) if f.lower().endswith('.dae')]
    unhealthy_files = [f for f in os.listdir(unhealthy_folder) if f.lower().endswith('.dae')]
    
    # Ensure there are enough files
    if len(healthy_files) < 1 or len(unhealthy_files) < 2:
        raise ValueError("Healthy folder must contain at least 1 .dae file, and Unhealthy folder must contain at least 2 .dae files.")
    
    dataset = []
    colors = ['red', 'green', 'blue']  # Available colors
    shapes = ['square', 'round', 'triangle']  # Available shapes
    
    for _ in range(num_samples):
        # Randomly select one healthy file (correct answer) and two unhealthy files (distractors)
        correct_file = random.choice(healthy_files)
        distractors = random.sample(unhealthy_files, 2)
        
        # Combine and shuffle the choices (include only the subfolder name in the path)
        healthy_path = f"Healthy/{correct_file}"  # Use only "Healthy/" prefix
        unhealthy_paths = [f"Unhealthy/{f}" for f in distractors]  # Use only "Unhealthy/" prefix
        choices = [healthy_path] + unhealthy_paths
        random.shuffle(choices)
        correct_index = choices.index(healthy_path) + 1  # Indexing starts from 1
        
        gates = {}
        
        # Generate all gates
        color_list = colors.copy()  # Fresh copy for each sample
        for j in range(1, num_gates + 1):
            if different_colors:
                if not color_list:  # Reset if we run out
                    color_list = colors.copy()
                color = random.choice(color_list)
                color_list.remove(color)
            else:
                color = random.choice(colors)
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:  # 'both'
                size = random.choice(['big', 'small'])
            
            if shape_option in shapes:
                shape = shape_option
            else:  # 'all'
                shape = random.choice(shapes)
            
            gates[str(j)] = {
                "size": size,
                "shape": shape,
                "color": color,
            }
        
        # Use the properties of the correct gate for prompt_simpler
        correct_gate = gates[str(correct_index)]
        prompt = "Fly to the gate with healthy food"
        prompt_simpler = f"Fly through the {correct_gate['size']} {correct_gate['shape']} {correct_gate['color']} gate with healthy food"
        
        dataset.append({
            "prompt": prompt,
            "prompt_simpler": prompt_simpler,
            "options": choices,
            "correct": correct_index,
            "gates": gates,
            "background": "room"
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_food_gate_dataset(
    "Food/Healthy",  # Path to healthy food files
    "Food/Unhealthy",  # Path to unhealthy food files
    "Food.json",  # Output JSON file
    500,  # Number of samples
    num_gates=3,
    different_colors=True,
    size_option='both',
    shape_option='all'
)