#### Create .json with celebs

In [44]:
import os
import json
import random

def create_dataset(folder_path, output_file):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for img in images:
        name = os.path.splitext(img)[0].replace('_', ' ')  # Replace underscores with spaces
        prompt = f"Fly through the gate with {name}"
        
        # Choose two random names that are not the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

create_dataset("Celebrities", "hr_celeb_dataset.json")

#### Rename Patterns/Attributes

In [34]:
import os
from PIL import Image

patterns_folder = 'Patterns/Attributes'
output_folder = 'Renamed_Patterns'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for filename in os.listdir(patterns_folder):
    if filename.lower().endswith(('png', 'jpg', 'jpeg')):
        new_filename = filename.replace('with_', '').replace('in_', '')
        img = Image.open(os.path.join(patterns_folder, filename))
        img.save(os.path.join(output_folder, new_filename))

#### Create .json with patterns

In [43]:
def create_dataset(folder_path, output_file):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for img in images:
        name = os.path.splitext(img)[0].replace('_', ' ')  # Extract the name without extension and replace underscores with spaces
        prompt = f"Fly through the gate with a person with {name}"
        
        # Select two random names that do not match the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)
        
create_dataset('Renamed_Patterns\Attributes', "hr_patterns_attributes_dataset.json")


#### Spam some Number of the Samples

In [47]:
import random

def random_size():
    return random.choice(['small', 'big'])

def random_color():
    return random.choice(['red', 'green', 'blue'])


In [None]:
def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both'):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for _ in range(num_samples):
        img = random.choice(images)
        name = os.path.splitext(img)[0].replace('_', ' ')  # Replace underscores with spaces
        prompt = f"Fly through the gate with {name}"
        
        # Choose two random names that are not the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        shape = ['round', 'square', 'triangle']

        for i in range(1, num_gates + 1):
            if different_colors:
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random_color()
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:
                size = random_size()
                
            
            
            gates[f"gate_{i}"] = {
                "size": size,
                "color": color
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "gates": gates
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates("Celebrities", "hr_celeb_dataset_with_multiple_gates.json", 10, 3, different_colors=True, size_option='small')

In [53]:
def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both'):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for _ in range(num_samples):
        img = random.choice(images)
        name = os.path.splitext(img)[0].replace('_', ' ')  # Replace underscores with spaces
        prompt = f"Fly through the gate with {name}"
        
        # Choose two random names that are not the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        for i in range(1, num_gates + 1):
            if different_colors:
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random_color()
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:
                size = random_size()
            
            gates[f"gate_{i}"] = {
                "size": size,
                "color": color,
                "texture": "textures/room"
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "gates": gates
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates("Celebrities", "hr_celeb_dataset_with_multiple_gates.json", 10, 3, different_colors=True, size_option='small')

In [58]:
def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both', different_shapes=True):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for _ in range(num_samples):
        img = random.choice(images)
        name = os.path.splitext(img)[0].replace('_', ' ')  # Replace underscores with spaces
        prompt = f"Fly through the gate with {name}"
        
        # Choose two random names that are not the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        shapes = ['round', 'square', 'triangle']
        
        for i in range(1, num_gates + 1):
            if different_colors:
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random_color()
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:
                size = random_size()
            
            if different_shapes:
                shape = shapes.pop(random.randint(0, len(shapes) - 1))
            else:
                shape = random.choice(['round', 'square', 'triangle'])
            
            gates[f"gate_{i}"] = {
                "size": size,
                "color": color,
                "shape": shape
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "gates": gates
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates("Celebrities", "hr_celeb_dataset_with_multiple_gates.json", 10, 3, different_colors=True, size_option='small', different_shapes=True)

In [60]:
def create_dataset_with_multiple_gates(folder_path, output_file, num_samples, num_gates, different_colors=True, size_option='both'):
    images = [img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
    dataset = []
    
    for _ in range(num_samples):
        img = random.choice(images)
        name = os.path.splitext(img)[0].replace('_', ' ')  # Replace underscores with spaces
        prompt = f"Fly through the gate with {name}"
        
        # Choose two random names that are not the current one
        choices = [img.replace('_', '_')] + random.sample([i.replace('_', '_') for i in images if i != img], 2)
        random.shuffle(choices)
        correct_index = choices.index(img.replace('_', '_')) + 1  # Indexing starts from 1
        
        gates = {}
        colors = ['red', 'green', 'blue']
        for i in range(1, num_gates + 1):
            if different_colors:
                color = colors.pop(random.randint(0, len(colors) - 1))
            else:
                color = random_color()
            
            if size_option == 'big':
                size = 'big'
            elif size_option == 'small':
                size = 'small'
            else:
                size = random_size()
            
            gates[f"gate_{i}"] = {
                "size": size,
                "color": color,
            }
        
        dataset.append({
            "prompt": prompt,
            "options": choices,
            "correct": correct_index,
            "texture": "textures/room",
            "gates": gates
        })
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# Example usage
create_dataset_with_multiple_gates("Celebrities", "hr_celeb_dataset_with_multiple_gates.json", 10, 3, different_colors=True, size_option='both')