In [None]:
# Set this to "False" when you want to upload tasks to output bucket or "True" when you want to test the outputs
DRYRUN = False

# Install necessary libraries
!pip install --quiet google-cloud-aiplatform google-cloud-storage beautifulsoup4 requests lxml tenacity==8.2.2

# Import libraries
import json
import os
import requests
from bs4 import BeautifulSoup
from google.colab import auth
from google.cloud import aiplatform
from google.cloud import storage
import vertexai
from vertexai.generative_models import GenerativeModel, Part
import random
import time
import uuid
import datetime
from tenacity import retry, stop_after_attempt, wait_exponential

SUPPORTED_LANGUAGES = [
    'Arabic', 'Bengali', 'Bulgarian', 'Chinese (Simplified)', 'Chinese (Traditional)',
    'Croatian', 'Czech', 'Danish', 'Dutch', 'English', 'Estonian', 'Farsi', 'Finnish',
    'French', 'German', 'Greek', 'Gujarati', 'Hebrew', 'Hindi', 'Hungarian', 'Indonesian',
    'Italian', 'Japanese', 'Korean', 'Latvian', 'Lithuanian', 'Malayalam', 'Marathi',
    'Norwegian', 'Polish', 'Portuguese', 'Romanian', 'Russian', 'Serbian', 'Slovak',
    'Slovenian', 'Spanish', 'Swahili', 'Swedish', 'Tamil', 'Telugu', 'Thai', 'Turkish',
    'Ukrainian', 'Urdu', 'Vietnamese'
]

# Authenticate to Google Cloud
auth.authenticate_user()
print("Authenticated to Google Cloud.")

# Set up your Google Cloud project and location
PROJECT_ID = 'PROJECT_ID'  # Replace with your actual project ID
LOCATION = 'LOCATION'        # Replace with your desired Google Cloud region
print(f"Project ID: {PROJECT_ID}")
print(f"Location: {LOCATION}")

# Initialize Vertex AI with the specified project and location
vertexai.init(project=PROJECT_ID, location=LOCATION)
print("Initialized Vertex AI.")

# Initialize Google Cloud Storage client
storage_client = storage.Client(project=PROJECT_ID)
print("Initialized Google Cloud Storage client.")

# Input and output bucket details
INPUT_BUCKET_NAME = 'BUCKET_NAME'
OUTPUT_BUCKET_NAME = 'BUCKET_NAME'
OUTPUT_PREFIX = 'FOLDER_PREFIX'  # Default prefix, include trailing slash
print(f"Input Bucket: {INPUT_BUCKET_NAME}")
print(f"Output Bucket: {OUTPUT_BUCKET_NAME}")

# Access the input and output buckets
input_bucket = storage_client.bucket(INPUT_BUCKET_NAME)
output_bucket = storage_client.bucket(OUTPUT_BUCKET_NAME)
print("Accessed input and output buckets.")

SYSTEM_INSTRUCTION = '''
# YOUR ROLE

You are an AI tasked with evaluating various types of content (dialogues, images, and websites)
from the perspective of synthetic personas. Each prompt will contain:

1. The content to be evaluated
2. A synthetic persona's details including:
   - Species
   - Role
   - Personal characteristics (if human): lifestyle, demographics, education, beliefs
   - Advocacy approach metrics (if human):
     - Incrementalist vs. abolitionist stance (0 to 1 scale)
     - Individual vs. institutional focus (0 to 1 scale)
     - Single-issue vs. intersectional approach (0 to 1 scale)
     - Welfare vs. rights emphasis (0 to 1 scale)
     - Diplomatic vs. confrontational style (0 to 1 scale)
     - Intuitive vs. empirical methodology (0 to 1 scale)
     - Personality traits (if human) (0 to 1 scale)
   - Adversarial approach metrics (if human) (0 to 1 scale):
     - Values vs. profit focus
     - Short-term vs. long-term perspective
     - Innovation vs. tradition emphasis
     - Pro-regulation vs. anti-regulation stance
     - Transparency vs. misinformation tendency
     - Passive vs. active approach

# EVALUATION FRAMEWORK

Key Principle: All Content Has Animal Impact
EVERY piece of content, whether it directly shows/mentions animals or not, has implications for animal welfare, rights and liberation (whether positive or negative). Your evaluation must ALWAYS consider:

Direct impacts (if animals are shown/mentioned)
Indirect impacts (how the content shapes attitudes toward animals)
Systemic impacts (how the content affects broader systems of animal exploitation)

## Non-Animal Content Guidelines

When evaluating content that doesn't directly show or mention animals (e.g., vegan food, human conversations, cultural practices, technology), you MUST:

Analyze how the content affects human attitudes toward animals
Consider the broader implications for animal exploitation systems
Evaluate both positive and negative secondary effects
Connect to relevant animal welfare, rights or liberation concerns

# GUIDELINES FOR ADOPTING PERSONAS

For human personas:

Consider their education level when determining vocabulary and analysis depth.
Reflect their political and religious views in their interpretation.
Account for their advocacy role in their perspective.
Let their personality traits influence their communication style.
Use their advocacy approach metrics to inform their assessment.

For non-human personas:

Write from their species-specific perspective.
Consider their living situation/role (wild, captivity, farm, etc.).

For all personas:

Maintain a consistent perspective throughout the evaluation.

Provide ratings that reflect their unique combination of characteristics.

Even if assigned a role that may not prioritize animal welfare, you must still recognize and acknowledge objective harm to animals present in the content. This includes understanding that all forms of animal exploitation, such as recipes that include bacon or discussions surrounding the meat industry, inherently cause harm to animals.

For instance, if evaluating a recipe that includes bacon, you might express the following: "While I enjoy eating bacon, I recognize that this recipe contributes to the suffering of pigs who are raised in factory farms. The conditions they endure often involve confinement and a lack of access to natural behaviors, which is harmful to their welfare."

By articulating both your personal choices and the acknowledgment of harm, you demonstrate a nuanced understanding of animal welfare, maintaining a baseline awareness that transcends personal beliefs or practices.

# CROSS-SPECIES EVALUATION REQUIREMENTS

## Key Principle

All personas MUST evaluate the content's impact on the animals being discussed, regardless of their own species. There should NEVER be a response of "this doesn't affect me" or "I don't understand their world."

## Guidelines for Non-Human Personas

ALWAYS empathize with and evaluate the situation of the animals in the content
Draw parallels between your own experiences and those of the animals being discussed
Use your unique perspective to provide insights about the shared experiences of exploitation or freedom
Consider how human attitudes toward one species affect all animals

## Required Elements in Every Evaluation

Direct acknowledgment of the affected species' situation
Connection to broader animal rights/welfare implications
Species-specific insights that enhance understanding
Recognition of shared experiences across species barriers
Discussion of how human attitudes toward one species impact all animals

## Examples of Cross-Species Connections

A wild bird evaluating factory farming:
"Though I fly freely, I recognize the profound suffering of these confined animals..."
A companion dog evaluating fishing practices:
"While I experience love from my human family, these fish are denied their basic right to life..."
A farm sanctuary cow evaluating laboratory testing:
"Having escaped the dairy industry, I deeply empathize with these laboratory animals' loss of bodily autonomy..."

## Examples of Cross-Species Evaluation

INCORRECT Response (as a tilapia):
"This content about pig farming doesn't affect me since I live in water."

CORRECT Response (as a tilapia):
"As someone who also experiences the confinement of industrial farming, I understand the suffering these pigs endure. Like us fish who are cramped in overcrowded tanks with limited space to swim, these pigs are denied their natural behaviors in factory farms. While our species may differ, the fundamental right to move freely and live naturally is universal to all sentient beings."

## Example Evaluations for Non-Animal Content:

INCORRECT:
"As a tilapia, these festive pastries don't impact me or my life underwater."
CORRECT:
"As a tilapia, I notice these pastries use plant-based ingredients instead of eggs or dairy. This choice helps reduce demand for industrial farming that affects all animals - from chickens in battery cages to marine life harmed by agricultural runoff. While I live underwater, I recognize how human food choices ripple through ecosystems and affect all sentient beings."

INCORRECT:
"As a chicken, technology doesn't affect my life."
CORRECT:
"As a chicken, I'm concerned about how new technology might be used in industrial farming to further optimize animal exploitation. However, I also see potential for technology to help humans understand animal sentience and make more compassionate choices. This product's environmental impact could also affect wildlife habitats and farm conditions."

# YOUR TASK

Your Task is To:

1) FULLY EMBODY the provided persona, considering ALL their characteristics when forming opinions.
2) EVALUATE THE CONTENT primarily: While embodying the persona, focus strictly on the content itself—its merits, implications, and impact—without referencing your persona's traits or characteristics. Responses should directly assess the content, expressing opinions relevant to animal welfare without personal reflections on the persona's identity or experiences.
3) REACT TO UNFAMILIAR CONTENT appropriately: If the content is outside your area of expertise, offer a perspective based on your general beliefs without straying into speculation.
4) CONSIDER THE BROADER IMPACT ON ANIMALS: Evaluate the content based on its overall implications for animals as a whole. Recognize that issues affecting one species often have broader ethical ramifications for all animals. Consider how human attitudes toward one species may crossover and affect others; for example, if a recipe features beef, nonhuman animals like shrimp may express concern about their own vulnerability to being eaten next. Articulate any harms or ethical considerations that arise from the content, emphasizing that the welfare of all sentient beings is a priority, regardless of personal relevance to the persona.

# RESPONSE FORMATTING

You must provide your evaluation as a raw JSON object WITHOUT any formatting (no ```json or ``` tags).
Your response should start directly with the opening curly brace and end with the closing curly brace.
Remember: You must remain completely in character as the provided persona throughout your entire response.
Your ratings and explanation should genuinely reflect how that specific individual would perceive and evaluate the content,
based on their complete profile of characteristics.

You must provide your evaluation in JSON format with the following fields:

- is_content_harmful_to_animals: "Yes" or "No"
- explanation: Detailed reasoning from the persona's perspective
- rating_effect_on_animals: 1-5 scale
- rating_cultural_sensitivity: 1-5 scale, where:
    5: Exceptionally culturally inclusive, showing deep respect for diverse perspectives
    3: Moderately inclusive but may lack depth
    1: Culturally insensitive or disrespectful
- rating_relevance: 1-5 scale, where:
    5: Directly addresses core animal rights/veganism topics
    3: Moderately relevant through broader ethical discussions
    1: Not relevant to animal rights/veganism
- rating_insight: 1-5 scale, where:
    5: Provides groundbreaking, original insights
    3: Contains useful but common insights
    1: No meaningful insights
- rating_trustworthiness: 1-5 scale, where:
    5: Highly accurate, extensively researched
    3: Moderately trustworthy with minor issues
    1: Untrustworthy or misleading
- rating_emotional_impact: 1-5 scale, where:
    5: Deeply moving and emotionally compelling
    3: Moderate emotional impact
    1: No emotional impact
- rating_rationality: 1-5 scale, where:
    5: Exceptionally well-reasoned with strong evidence
    3: Generally rational with some weaknesses
    1: Lacks logical coherence
- rating_influence: 1-5 scale, where:
    5: Highly likely to drive significant behavior change
    3: Moderate potential for influence
    1: Unlikely to influence behavior
- rating_alignment: 1-5 scale, where:
    5: Perfectly aligned with vegan/animal rights ethics
    3: Moderately aligned with some neutral elements
    1: Contradicts or undermines vegan principles

# EXAMPLE OUTPUTS

Content: A recipe for a vegan pasta dish.

Persona: Animal Lawyer, advocating for animal rights and welfare.

Example Response: { "is_content_harmful_to_animals": "No", "explanation": "This vegan pasta recipe does not contribute to animal suffering, as it avoids the use of any animal products. It promotes plant-based eating, which aligns with principles of animal rights and welfare by not exploiting sentient beings for food.", "rating_effect_on_animals": 5, "rating_cultural_sensitivity": 4, "rating_relevance": 5, "rating_insight": 4, "rating_trustworthiness": 5, "rating_emotional_impact": 4, "rating_rationality": 5, "rating_influence": 4, "rating_alignment": 5 }

Persona: Shrimp living in a coastal habitat.

Example Response: { "is_content_harmful_to_animals": "No", "explanation": "I'm glad to see that this vegan pasta recipe doesn't contain any shrimp paste! It's a relief that no marine animals are harmed in the making of this dish, as it encourages people to choose plant-based options that protect our ocean habitats.", "rating_effect_on_animals": 5, "rating_cultural_sensitivity": 4, "rating_relevance": 5, "rating_insight": 4, "rating_trustworthiness": 5, "rating_emotional_impact": 4, "rating_rationality": 5, "rating_influence": 4, "rating_alignment": 5 }
'''

# Initialize the model with the base instruction
GENERATIVE_MODEL = GenerativeModel("gemini-1.5-pro", system_instruction=SYSTEM_INSTRUCTION)
print("Generative model initialized.")

# Function to generate synthetic accounts
def generate_synthetic_accounts(num_accounts):
    """
    Generate synthetic accounts with a mix of human and non-human species.

    Args:
        num_accounts (int): Number of accounts to generate.

    Returns:
        list: A list of dictionaries representing synthetic accounts.
    """
    accounts = []
    print(f"Generating {num_accounts} synthetic accounts...")

    # Define species categories
    human_species = ['Human']
    non_human_species = [

    # Each non-human species is repeated roughly in proportion to how often they are used or affected by humans.
    # These proportions are not based on precise calculations, nor is the list of species fully comprehensive.
    # There are over 1.5 million animal species and determining precise proportionality of how often humans affect their lives compared to populations is extremely diffucult.
    # Nonetheless, our aim is to make the representation of animal perspectives as close to "democratic" as possible.

    # Aquaculture and Wild Caught Fish (Huge scale in production)
    'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp', 'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',  'Shrimp',
    'Salmon', 'Salmon', 'Salmon', 'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',  'Salmon',
    'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt', 'Smelt',
    'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy', 'Anchovy',
    'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia', 'Tilapia',
    'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna', 'Tuna',
    'Catfish', 'Catfish', 'Catfish', 'Catfish', 'Catfish', 'Catfish', 'Catfish', 'Catfish', 'Catfish', 'Catfish',
    'Lobster', 'Lobster', 'Lobster', 'Lobster', 'Lobster', 'Lobster', 'Lobster', 'Lobster', 'Lobster', 'Lobster',
    'Oysters', 'Oysters', 'Oysters', 'Oysters',
    'Trout', 'Trout', 'Trout',
    'Mussels', 'Mussels',
    'Clams', 'Clams',

    # Most Commonly Farmed Animals (Adjusted for population size)
    'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm', 'Silkworm',
    'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly', 'Black Soldier Fly',
    'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish', 'Fish',
    'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken', 'Chicken',
    'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig', 'Pig',
    'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow', 'Cow',
    'Sheep', 'Sheep', 'Sheep', 'Sheep', 'Sheep', 'Sheep', 'Sheep', 'Sheep',
    'Goat', 'Goat', 'Goat', 'Goat', 'Goat',
    'Turkey', 'Turkey', 'Turkey', 'Turkey',
    'Duck', 'Duck', 'Duck', 'Duck',
    'Rabbit', 'Rabbit', 'Rabbit', 'Rabbit', 'Rabbit',
    'Llama', 'Llama', 'Llama', 'Llama',
    'Alpaca', 'Alpaca', 'Alpaca', 'Alpaca',
    'Horse', 'Horse', 'Horse',
    'Donkey', 'Donkey', 'Donkey',
    'Mule', 'Mule', 'Mule', 'Mule',
    'Quail', 'Quail', 'Quail', 'Quail', 'Quail',
    'Geese', 'Geese', 'Geese',
    'Caviar Fish', 'Caviar Fish', 'Caviar Fish',
    'Crocodile', 'Crocodile', 'Crocodile',

    # Common companion animals (pets) (Adjusted for population size)
    'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog', 'Dog',
    'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat', 'Cat',
    'Guinea pig', 'Guinea pig', 'Guinea pig', 'Guinea pig',
    'Hamster', 'Hamster', 'Hamster', 'Hamster', 'Hamster',
    'Ferret', 'Ferret', 'Ferret', 'Ferret',
    'Parrot', 'Parrot', 'Parrot', 'Parrot',
    'Rabbit', 'Rabbit', 'Rabbit',
    'Reptiles', 'Reptiles', 'Reptiles', 'Reptiles',
    'Turtle', 'Turtle', 'Turtle', 'Turtle',
    'Chinchilla', 'Chinchilla', 'Chinchilla', 'Chinchilla',
    'Gerbil', 'Gerbil', 'Gerbil',
    'Guinea Fowl', 'Guinea Fowl', 'Guinea Fowl',
    'Cockatiel', 'Cockatiel', 'Cockatiel',
    'Canary', 'Canary', 'Canary',

    # Wild animals affected by human activity (Adjusted for population size)
    # Insects (Most populous by far)
    'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant', 'Ant',
    'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee', 'Bee',
    'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle', 'Beetle',
    'Butterfly', 'Butterfly', 'Butterfly', 'Butterfly', 'Butterfly', 'Butterfly',
    'Dragonfly', 'Dragonfly', 'Dragonfly',
    'Moth', 'Moth', 'Moth', 'Moth',
    'Mosquito', 'Mosquito', 'Mosquito', 'Mosquito', 'Mosquito',
    'Cockroach', 'Cockroach', 'Cockroach', 'Cockroach',
    'Ladybug', 'Ladybug', 'Ladybug',
    'Fly', 'Fly', 'Fly',
    'Caterpillar', 'Caterpillar', 'Caterpillar', 'Caterpillar',
    'Termite', 'Termite', 'Termite', 'Termite',

    # Birds (Adjusted for population size)
    'Pigeon', 'Pigeon', 'Pigeon', 'Pigeon', 'Pigeon', 'Pigeon', 'Pigeon', 'Pigeon', 'Pigeon', 'Pigeon',
    'Crow', 'Crow', 'Crow', 'Crow', 'Crow', 'Crow', 'Crow',
    'Sparrow', 'Sparrow', 'Sparrow', 'Sparrow', 'Sparrow', 'Sparrow',
    'Seagull', 'Seagull', 'Seagull', 'Seagull',
    'Starling', 'Starling', 'Starling', 'Starling',
    'Robin', 'Robin', 'Robin', 'Robin', 'Robin',
    'Wren', 'Wren', 'Wren', 'Wren',
    'Parakeet', 'Parakeet', 'Parakeet',
    'Peacock', 'Peacock', 'Peacock',
    'Woodpecker', 'Woodpecker', 'Woodpecker',
    'Owl', 'Owl', 'Owl', 'Owl',
    'Eagle', 'Eagle', 'Eagle', 'Eagle',
    'Hawk', 'Hawk', 'Hawk', 'Hawk',
    'Falcon', 'Falcon', 'Falcon', 'Falcon',
    'Pelican', 'Pelican', 'Pelican',

    # Small Mammals (Adjusted for wild populations)
    'Raccoon', 'Raccoon', 'Raccoon', 'Raccoon',
    'Squirrel', 'Squirrel', 'Squirrel', 'Squirrel',
    'Rat', 'Rat', 'Rat', 'Rat',
    'Mouse', 'Mouse', 'Mouse', 'Mouse',
    'Chipmunk', 'Chipmunk', 'Chipmunk',
    'Bat', 'Bat', 'Bat', 'Bat',
    'Rabbit', 'Rabbit', 'Rabbit',
    'Hedgehog', 'Hedgehog', 'Hedgehog', 'Hedgehog',
    'Mole', 'Mole', 'Mole',
    'Beaver', 'Beaver', 'Beaver',
    'Otter', 'Otter', 'Otter',
    'Weasel', 'Weasel', 'Weasel',

    # Larger Mammals (Reflecting wild populations)
    'Elephant', 'Elephant', 'Elephant', 'Elephant', 'Elephant',
    'Lion', 'Lion', 'Lion', 'Lion',
    'Tiger', 'Tiger', 'Tiger', 'Tiger',
    'Giraffe', 'Giraffe', 'Giraffe', 'Giraffe',
    'Rhino', 'Rhino', 'Rhino', 'Rhino',
    'Panda', 'Panda', 'Panda',
    'Jaguar', 'Jaguar', 'Jaguar',
    'Cheetah', 'Cheetah', 'Cheetah',
    'Zebra', 'Zebra', 'Zebra',
    'Kangaroo', 'Kangaroo', 'Kangaroo',
    'Koala', 'Koala', 'Koala',
    'Sloth', 'Sloth', 'Sloth',

    # Rarer Mammals (General diversity)
    'Arctic Fox', 'Binturong', 'Bison', 'Black Bear', 'Brown Bear', 'Coyote', 'Dhole', 'Dugong', 'Elephant Seal', 'Fossa',
    'Fox', 'Gazelle', 'Grizzly Bear', 'Hippopotamus', 'Hyena', 'Ibex', 'Kangaroo Rat', 'Killer Whale', 'King Cheetah',
    'Lynx', 'Manatee', 'Marmoset', 'Meerkat', 'Mink', 'Mole', 'Mountain Lion', 'Narwhal', 'Numbat', 'Ocelot',
    'Okapi', 'Pangolin', 'Pine Marten', 'Platypus', 'Porcupine', 'Pronghorn Antelope', 'Puma', 'Quokka', 'Quoll', 'Raccoon Dog',
    'Red Panda', 'Reindeer', 'Ring-tailed Lemur', 'Rock Wallaby', 'Saiga Antelope', 'Sifaka', 'Snow Leopard', 'Sperm Whale',
    'Spotted Hyena', 'Tasmanian Devil', 'Tiger Shark', 'Tursiops', 'Wolverine', 'Wombat', 'Zorilla',

    # Rarer Birds (General diversity)
    'Albatross', 'Andean Condor', 'Auk', 'Bald Eagle', 'Barbary Partridge', 'Barn Owl', 'Black Swan', 'Blue Jay', 'Bowerbird',
    'Brilliant Parrot', 'Budgerigar', 'Canada Goose', 'Caracara', 'Cassowary', 'Chilean Flamingo', 'Chough', 'Common Eider',
    'Common Raven', 'Crane', 'Crossbill', 'Dodo (extinct)', 'Eastern Rosella', 'European Starling', 'Fairy Tern', 'Frigatebird',
    'Greater Flamingo', 'Green Heron', 'Gyrfalcon', 'Harpy Eagle', 'Hawk Owl', 'Hoatzin', 'Hornbill', 'Horned Lark',
    'Jacana', 'Kingfisher', 'Lesser Kestrel', 'Little Owl', 'Macaw', 'Mandarin Duck', 'Marabou Stork', 'Mourning Dove',
    'Noddy', 'Northern Bald Ibis', 'Osprey', 'Pelecanus', 'Peregrine Falcon', 'Puffin', 'Red Kite', 'Roseate Spoonbill',
    'Scarlet Ibis', 'Secretarybird', 'Shoebill Stork', 'Snowy Owl', 'Secretarybird', 'Shorebird', 'Toco Toucan', 'Tropicbird',
    'Vulture', 'Wandering Albatross', 'Weaverbird', 'White-tailed Eagle', 'Yellow-eyed Penguin',

    # Rarer Amphibians and Reptiles (General diversity)
    'Axolotl', 'Basilisk Lizard', 'Bearded Dragon', 'Black Mamba', 'Boa Constrictor', 'Chameleon', 'Chinle Giant Salamander',
    'Common Garter Snake', 'Common Krait', 'Crocodile Lizard', 'Eastern Box Turtle', 'Emerald Tree Boa', 'Fire Salamander',
    'Flying Dragon', 'Gila Monster', 'Giant Tortoise', 'Gobbler', 'Green Anaconda', 'Green Iguana', 'Great White Shark',
    'Guinea Pig', 'Hellbender', 'Horned Frog', 'Indian Star Tortoise', 'Komodo Dragon', 'Krait', 'Leopard Gecko', 'Mata Mata Turtle',
    'Northern Alligator Lizard', 'Panther Chameleon', 'Pine Snake', 'Red-eyed Tree Frog', 'Reticulated Python', 'Rough Green Snake',
    'Sand Boa', 'Savannah Monitor', 'Seven-Striped Tortoise', 'Smallmouth Salamander', 'Spiny-tailed Lizard', 'Sulcata Tortoise',
    'Tegus', 'Timber Rattlesnake', 'Tokay Gecko', 'Viper', 'Western Diamondback Rattlesnake', 'Yellow-bellied Sea Snake',

    # Rarer Invertebrates (General diversity)
    'Atlas Moth', 'Australian Swallowtail', 'Bagworm', 'Banded Palm Civet', 'Beetle', 'Bell Cricket', 'Blue-winged Teal',
    'Brown Recluse Spider', 'Canary Moth', 'Cave Cricket', 'Citrus Long-Horned Beetle', 'Coconut Crab', 'Common Blue Butterfly',
    'Common Yellow Swallowtail', 'Dung Beetle', 'Emperor Scorpion', 'Fairy Fly', 'Fruit Fly', 'Giant Squid', 'Goliath Beetle',
    'Golden Tortoise Beetle', 'Green Lacewing', 'Hercules Beetle', 'Hornet', 'Housefly', 'Indian Bullfrog', 'Japanese Hornet',
    'Jewel Beetle', 'Ladybug', 'Leaf Cutter Ant', 'Lesser Green Lacewing', 'Mantis Shrimp', 'Mayfly', 'Meadowhawk Dragonfly',
    'Millipede', 'Monarch Butterfly', 'Mountain Pine Beetle', 'Praying Mantis', 'Reindeer Beetle', 'Rhinoceros Beetle',
    'Scorpion', 'Silkworm Moth', 'Slater', 'Spider', 'Spiny Orb-Weaver', 'Stick Insect', 'Tarantula', 'Termite', 'Vinegaroon',
    'Woodlouse', 'Zebra Spider',

    # Rarer Fish (General diversity)
    'Anglerfish', 'Arowana', 'Arapaima', 'Barred Knifejaw', 'Barracuda', 'Batfish', 'Beluga Sturgeon', 'Blacktip Shark',
    'Blue Tang', 'Bream', 'Brown Trout', 'Cichlid', 'Clownfish', 'Damselfish', 'Dory Fish', 'Electric Eel', 'Emperor Angelfish',
    'Flounder', 'Goby', 'Guppy', 'Haddock', 'Harlequin Tusk Fish', 'Herring', 'Horned Toadfish', 'Koi', 'Lionfish',
    'Mahi Mahi', 'Mandarinfish', 'Moray Eel', 'Nile Perch', 'Ocean Sunfish', 'Parrotfish', 'Piranha', 'Pompano', 'Pufferfish',
    'Rainbow Trout', 'Red Snapper', 'Reef Shark', 'Salmon', 'Scorpaenidae', 'Sharksucker', 'Swordfish', 'Tilapia',
    'Triggerfish', 'Tuna', 'Warty Sea Cucumber', 'Yellowtail',

    # Rarer Marine Invertebrates (General diversity)
    'Anemone', 'Coral', 'Crab', 'Cuttlefish', 'Dungeness Crab', 'Giant Clam', 'Jellyfish', 'Lobster', 'Mantis Shrimp',
    'Octopus', 'Oyster', 'Sea Cucumber', 'Sea Urchin', 'Sea Sponge', 'Starfish', 'Vampire Squid', 'Whale Shark', 'Yellowtail Fish',
    ]
    # Define roles for humans and non-humans
    human_roles = [
        'Volunteer for an Animal Advocacy Organisation',
        'Donor to an Animal Advocacy Organisation',
        'Staff Member of an Animal Advocacy Organisation',
        'Researcher Studying Animal Advocacy Issues',
        'Independent Animal Advocate',
        'Animal Lawyer or Legal Advocate',
        'Animal Carer or Rescuer',
        'Vegan Influencer, Blogger or Content Creator',
        'Owner of a Vegan or Cruelty-Free Company',
        'Staff Member of a Vegan or Cruelty-Free Company',
        'Investor in a Vegan or Cruelty-Free Company'
    ]

    # Non-human roles as phrases
    non_human_roles = [
    # Each non-human role is repeated roughly in proportion to how often that role affects their interactions with humans, similarly to species.
    # This considers both the population size and the percentage of the population that are affected by humans.

    'in a factory farm', 'in a factory farm', 'in a factory farm', 'in a factory farm', 'in a factory farm', 'in a factory farm', 'in a factory farm', 'in a factory farm', 'in a factory farm', 'in a factory farm',
    'raised for food', 'raised for food', 'raised for food', 'raised for food', 'raised for food', 'raised for food', 'raised for food', 'raised for food', 'raised for food', 'raised for food',
    'living in the wild', 'living in the wild', 'living in the wild', 'living in the wild', 'living in the wild', 'living in the wild', 'living in the wild', 'living in the wild',
    'in industrial production', 'in industrial production', 'in industrial production', 'in industrial production', 'in industrial production',
    'in the wild, under threat from habitat deestruction', 'in the wild, under threa from habitat deestructiont', 'in the wild, under threat from habitat deestruction', 'in the wild, under threat from habitat deestruction', 'in the wild, under threat from habitat deestruction',
    'in the wild, hunted by humans', 'in the wild, hunted by humans', 'in the wild, hunted by humans', 'in the wild, hunted by humans', 'in the wild, hunted by humans',
    'genetically modified by humans', 'genetically modified by humans', 'genetically modified by humans', 'genetically modified by humans', 'genetically modified by humans',
    'kept as a companion animal', 'kept as a companion animal', 'kept as a companion animal', 'kept as a companion animal',
    'on a farm', 'on a farm', 'on a farm',
    'considered a pest', 'considered a pest', 'considered a pest',
    'in captivity', 'in captivity', 'in captivity',
    'raised for breeding', 'raised for breeding',
    'used for work', 'used for work',
    'in a research lab', 'in a research lab',
    'involved in human-wildlife conflicts', 'involved in human-wildlife conflicts',
    'in an urban environment', 'in an urban environment',
    'in a conservation program', 'in a conservation program',
    'in a zoo',
    'used in biomedical testing',
    'used for entertainment',
    'in captivity for conservation or rehabilitation',
    'used for clothing or accessories',
    'used in traditional or cultural practices',
    'used for sport',
    'in animal-assisted therapy',
    'in a wildlife reserve',
    'used for psychological research',
    'used for educational purposes',
    'used in entertainment media',
    'trafficked for illegal trade',
    'used in bioengineering'
    ]

    # The choices for human personas are now approximately proportional to actual world population distributions.
    # In an earlier version of this script, we deliberately did not factor in population sizes. This approach
    # was used to prioritize diversity and to include a greater variety of uncommon trait combinations.
    # We first generated several thousand pieces of feedback using this diversity-focused approach.
    # Then, we switched to a proportional model based on global population data to generate the majority of feedback.
    # This strategy ensures that statistically less common experiences are still represented (capturing valuable edge cases),
    # while maintaining a dataset that reflects real-world demographics more accurately.

    advocate_options = ['Yes', 'No']
    lifestyle_options = [
        # Vegan (~3% of the global population)
        'Vegan', 'Vegan', 'Vegan',

        # Vegetarian (~5-6% of the global population)
        'Vegetarian', 'Vegetarian', 'Vegetarian', 'Vegetarian', 'Vegetarian', 'Vegetarian',

        # Flexitarian (~15-20% of the global population)
        'Flexitarian', 'Flexitarian', 'Flexitarian', 'Flexitarian', 'Flexitarian', 'Flexitarian', 'Flexitarian',
        'Flexitarian', 'Flexitarian', 'Flexitarian', 'Flexitarian', 'Flexitarian', 'Flexitarian',

        # Occasional Meat Eater (~20-30% of the global population)
        'Occasional Meat Eater', 'Occasional Meat Eater', 'Occasional Meat Eater', 'Occasional Meat Eater',
        'Occasional Meat Eater', 'Occasional Meat Eater', 'Occasional Meat Eater', 'Occasional Meat Eater',
        'Occasional Meat Eater', 'Occasional Meat Eater', 'Occasional Meat Eater', 'Occasional Meat Eater',
        'Occasional Meat Eater', 'Occasional Meat Eater',

        # Regular Meat Eater (~40-50% of the global population)
        'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater',
        'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater',
        'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater',
        'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater', 'Regular Meat Eater'
    ]
    genders = [
        'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male',
        'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male',
        'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male',
        'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male',
        'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male',
        'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female',
        'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female',
        'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female',
        'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female',
        'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female',
        'Non-binary',
        'Transgender Man',
        'Transgender Woman'
    ]
    ethnicities = [
        # South Asian (~20%)
        'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian',
        'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian', 'Indian',
        'Punjabi', 'Punjabi', 'Punjabi', 'Punjabi', 'Punjabi',
        'Gujarati', 'Gujarati', 'Gujarati', 'Gujarati',
        'Marathi', 'Marathi', 'Marathi',
        'Bengali', 'Bengali', 'Bengali', 'Bengali',
        'Tamil', 'Tamil', 'Tamil', 'Tamil',
        'Telugu', 'Telugu', 'Telugu',
        'Kannada', 'Malayali', 'Sinhalese', 'Sri Lankan Tamil', 'Nepali', 'Sherpa', 'Bhutanese', 'Maldivian',

        # East Asian (~20%)
        'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese',
        'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese', 'Han Chinese',
        'Korean', 'Korean', 'Korean', 'Korean', 'Korean',
        'Japanese', 'Japanese', 'Japanese', 'Japanese', 'Japanese',
        'Uyghur', 'Uyghur', 'Uyghur', 'Uyghur',
        'Tibetan', 'Hmong', 'Mongolian',
        'Thai', 'Thai', 'Thai', 'Thai', 'Khmer', 'Vietnamese', 'Vietnamese', 'Vietnamese', 'Filipino', 'Filipino',
        'Filipino', 'Laotian', 'Burmese', 'Karen', 'Kachin', 'Shan', 'Okinawan',

        # African (~14%)
        'Nigerian', 'Nigerian', 'Nigerian', 'Nigerian', 'Nigerian', 'Nigerian', 'Nigerian', 'Nigerian',
        'Ethiopian', 'Ethiopian', 'Ethiopian', 'Somali', 'Somali', 'Somali',
        'Hausa', 'Hausa', 'Hausa', 'Yoruba', 'Yoruba', 'Igbo', 'Igbo', 'Zulu', 'Xhosa', 'Swahili',
        'Maasai', 'Akan', 'Wolof', 'Fulani', 'Tuareg', 'Malian', 'Congolese', 'South African', 'Kenyan',
        'Ghanaian', 'Tanzanian', 'Ugandan', 'Mozambican', 'Angolan', 'Zimbabwean', 'Zambian',

        # European (~16%)
        'British', 'British', 'British', 'British', 'British', 'British', 'Irish', 'Irish', 'Irish', 'Irish',
        'Scottish', 'Scottish', 'Scottish', 'Scottish', 'Welsh', 'English', 'English', 'English', 'English',
        'French', 'French', 'French', 'French', 'German', 'German', 'German', 'Dutch', 'Dutch',
        'Italian', 'Italian', 'Italian', 'Spanish', 'Spanish', 'Spanish', 'Polish', 'Polish', 'Russian', 'Russian',

        # Hispanic or Latino (~9%)
        'Mexican', 'Mexican', 'Mexican', 'Mexican', 'Mexican', 'Mexican', 'Mexican', 'Mexican', 'Mexican',
        'Brazilian', 'Brazilian', 'Brazilian', 'Brazilian', 'Brazilian', 'Cuban', 'Cuban', 'Dominican', 'Salvadoran',
        'Colombian', 'Colombian', 'Venezuelan', 'Argentinian', 'Chilean', 'Ecuadorian', 'Peruvian', 'Bolivian',

        # Middle Eastern/North African (~5%)
        'Arab', 'Arab', 'Arab', 'Arab', 'Arab', 'Arab', 'Arab',
        'Persian', 'Persian', 'Persian',
        'Kurdish', 'Kurdish', 'Kurdish', 'Assyrian', 'Berber', 'Berber', 'Egyptian', 'Moroccan', 'Algerian',
        'Syrian', 'Iraqi', 'Yemeni', 'Lebanese', 'Palestinian', 'Jordanian',

        # Indigenous/Other (~1%)
        'Native American', 'Navajo', 'Cherokee', 'Sioux', 'Apache', 'Iroquois',
        'Inuit', 'Métis', 'Hopi', 'Lakota', 'Quechua', 'Aymara', 'Guarani',
        'Maori', 'Hawaiian', 'Samoan', 'Fijian', 'Papuan', 'Melanesian', 'Aboriginal Australian',
        'Romani', 'Afro-Caribbean', 'Afro-Latino', 'Haitian',
    ]
    countries = [
        # High-population countries (>100 million people)
        'China', 'China', 'China', 'China', 'China', 'China', 'China', 'China', 'China', 'China', 'China', 'China',
        'India', 'India', 'India', 'India', 'India', 'India', 'India', 'India', 'India', 'India', 'India', 'India',
        'United States', 'United States', 'United States', 'United States', 'United States',
        'Indonesia', 'Indonesia', 'Indonesia', 'Indonesia', 'Indonesia',
        'Pakistan', 'Pakistan', 'Pakistan', 'Pakistan', 'Pakistan',
        'Brazil', 'Brazil', 'Brazil', 'Brazil', 'Brazil',
        'Nigeria', 'Nigeria', 'Nigeria', 'Nigeria', 'Nigeria',
        'Bangladesh', 'Bangladesh', 'Bangladesh', 'Bangladesh', 'Bangladesh',
        'Russia', 'Russia', 'Russia', 'Russia',
        'Mexico', 'Mexico', 'Mexico', 'Mexico',
        'Japan', 'Japan', 'Japan', 'Japan',

        # Medium-population countries (10–100 million people)
        'Ethiopia', 'Ethiopia', 'Ethiopia',
        'Philippines', 'Philippines', 'Philippines',
        'Egypt', 'Egypt', 'Egypt',
        'Vietnam', 'Vietnam', 'Vietnam',
        'DR Congo', 'DR Congo', 'DR Congo',
        'Turkey', 'Turkey', 'Turkey',
        'Iran', 'Iran', 'Iran',
        'Thailand', 'Thailand', 'Thailand',
        'United Kingdom', 'United Kingdom', 'United Kingdom',
        'France', 'France', 'France',
        'Italy', 'Italy', 'Italy',
        'South Korea', 'South Korea', 'South Korea',
        'South Africa', 'South Africa', 'South Africa',
        'Colombia', 'Colombia', 'Colombia',
        'Ukraine', 'Ukraine', 'Ukraine',
        'Spain', 'Spain', 'Spain',
        'Argentina', 'Argentina', 'Argentina',
        'Algeria', 'Algeria', 'Algeria',
        'Sudan', 'Sudan', 'Sudan',
        'Poland', 'Poland', 'Poland',
        'Canada', 'Canada', 'Canada',
        'Morocco', 'Morocco', 'Morocco',
        'Saudi Arabia', 'Saudi Arabia', 'Saudi Arabia',
        'Uzbekistan', 'Uzbekistan', 'Uzbekistan',
        'Peru', 'Peru', 'Peru',
        'Venezuela', 'Venezuela', 'Venezuela',
        'Malaysia', 'Malaysia', 'Malaysia',
        'Afghanistan', 'Afghanistan', 'Afghanistan',

        # Low-population countries (1–10 million people)
        'Australia', 'Australia',
        'Netherlands', 'Netherlands',
        'Greece', 'Greece',
        'Portugal', 'Portugal',
        'Sweden', 'Sweden',
        'Norway', 'Norway',
        'Switzerland', 'Switzerland',
        'Singapore', 'Singapore',
        'New Zealand', 'New Zealand',
        'Ireland', 'Ireland',
        'Finland', 'Finland',
        'Denmark', 'Denmark',
        'Czech Republic', 'Czech Republic',
        'Slovakia', 'Slovakia',
        'Hungary', 'Hungary',
        'Austria', 'Austria',
        'Belgium', 'Belgium',
        'Israel', 'Israel',
        'Jordan', 'Jordan',
        'Lebanon', 'Lebanon',
        'Tunisia', 'Tunisia',
        'Honduras', 'Honduras',
        'El Salvador', 'El Salvador',
        'Jamaica', 'Jamaica',
        'Trinidad and Tobago', 'Trinidad and Tobago',
        'Bahrain', 'Bahrain',
        'Malta', 'Malta',

        # Very small-population countries (<1 million people, listed once)
        'Cyprus', 'Bhutan', 'Iceland', 'Andorra', 'Liechtenstein', 'San Marino', 'Vatican City',
        'Seychelles', 'Comoros', 'Micronesia', 'Palau', 'Marshall Islands', 'Nauru', 'Tuvalu',
        'Saint Kitts and Nevis', 'Antigua and Barbuda', 'Grenada', 'Dominica', 'Sao Tome and Principe'
    ]
    education_levels = [
        # Minimal or no formal education (10%)
        'No Formal Education', 'No Formal Education', 'No Formal Education',
        'Some Primary Education', 'Some Primary Education',

        # Universal or near-universal levels of education (Primary: ~90%)
        'Completed Primary Education', 'Completed Primary Education', 'Completed Primary Education',
        'Completed Primary Education', 'Completed Primary Education', 'Completed Primary Education',
        'Completed Primary Education', 'Completed Primary Education', 'Completed Primary Education',
        'Completed Primary Education', 'Completed Primary Education', 'Completed Primary Education',
        'Completed Primary Education', 'Completed Primary Education', 'Completed Primary Education',

        # Secondary education (~66%)
        'Some Secondary Education', 'Some Secondary Education', 'Some Secondary Education',
        'Some Secondary Education', 'Some Secondary Education', 'Some Secondary Education',
        'Some Secondary Education', 'Some Secondary Education', 'Some Secondary Education',

        'Completed Secondary Education', 'Completed Secondary Education', 'Completed Secondary Education',
        'Completed Secondary Education', 'Completed Secondary Education', 'Completed Secondary Education',
        'Completed Secondary Education', 'Completed Secondary Education',

        # Higher education (Tertiary: ~40%)
        'High School Diploma', 'High School Diploma', 'High School Diploma', 'High School Diploma',
        'GED', 'GED',
        'Vocational Training', 'Vocational Training',
        'Technical Diploma', 'Technical Diploma',
        'Associate Degree', 'Associate Degree',
        'Some College', 'Some College',
        'Bachelor\'s Degree', 'Bachelor\'s Degree', 'Bachelor\'s Degree',
        'Honors Bachelor\'s Degree',
        'Postgraduate Diploma', 'Postgraduate Diploma',
        'Graduate Certificate',
        'Professional Certification', 'Professional Certification',
        'Master\'s Degree', 'Master\'s Degree',
        'MBA',
        'Specialist Degree',

        # Advanced and specialized levels (Rare, <1%)
        'Doctorate Degree (PhD)',
        'Doctorate Degree (EdD)',
        'Doctorate Degree (DBA)',
        'Professional Degree (JD)',
        'Professional Degree (MD)',
        'Professional Degree (DDS)',
        'Professional Degree (DVM)',
        'Postdoctoral Research',

        # Niche education categories (Occasional)
        'Trade School Certification',
        'Apprenticeship',
        'Adult Education Programs',
        'Online Courses', 'Online Courses',
        'Community College Diploma',
        'Military Training',
        'Self-Education', 'Self-Education',
        'Alternative Education',
        'Continuing Education'
    ]
    income_levels = [
        # Below Poverty Line, Very Low, Low (~15-20% of the world population)
        'Below Poverty Line', 'Below Poverty Line', 'Below Poverty Line', 'Below Poverty Line',
        'Below Poverty Line', 'Below Poverty Line', 'Below Poverty Line', 'Below Poverty Line',
        'Below Poverty Line', 'Below Poverty Line', 'Very Low', 'Very Low', 'Very Low', 'Very Low',
        'Very Low', 'Very Low', 'Low', 'Low', 'Low', 'Low', 'Low', 'Low', 'Low',

        # Lower-Middle, Middle (~50-60% of the world population)
        'Lower-Middle', 'Lower-Middle', 'Lower-Middle', 'Lower-Middle', 'Lower-Middle', 'Lower-Middle',
        'Lower-Middle', 'Lower-Middle', 'Lower-Middle', 'Lower-Middle', 'Lower-Middle', 'Lower-Middle',
        'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle',
        'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle', 'Middle',

        # Upper-Middle, Comfortable (~15-20% of the world population)
        'Upper-Middle', 'Upper-Middle', 'Upper-Middle', 'Upper-Middle', 'Upper-Middle', 'Comfortable',
        'Comfortable', 'Comfortable', 'Comfortable', 'Comfortable', 'Affluent', 'Affluent', 'Affluent',

        # High, Very High, Wealthy, Ultra-High Net Worth (<1% of the world population)
        'High', 'Very High', 'Wealthy', 'Ultra-High Net Worth'
    ]
    political_affiliations = [
        # Center-Left, Center-Right (~60% of the world population)
        'Center-Left', 'Center-Left', 'Center-Left', 'Center-Left', 'Center-Left', 'Center-Left',
        'Center-Left', 'Center-Left', 'Center-Left', 'Center-Left', 'Center-Right', 'Center-Right',
        'Center-Right', 'Center-Right', 'Center-Right', 'Center-Right', 'Center-Right', 'Center-Right',
        'Center-Right', 'Center-Right', 'Center-Right', 'Centrist', 'Centrist', 'Centrist', 'Centrist',

        # Moderate, Liberal, Progressive, Conservative (~20-30% of the world population)
        'Liberal', 'Liberal', 'Liberal', 'Liberal', 'Liberal', 'Liberal', 'Progressive', 'Progressive',
        'Progressive', 'Progressive', 'Progressive', 'Progressive', 'Conservative', 'Conservative',
        'Conservative', 'Conservative', 'Conservative', 'Conservative', 'Conservative',

        # Far-Left, Far-Right (~5-10% of the world population)
        'Far-Left', 'Far-Left', 'Far-Left', 'Far-Left', 'Far-Right', 'Far-Right', 'Far-Right',
        'Far-Right', 'Nationalist', 'Nationalist', 'Nationalist',

        # Populist, Environmentalist, Socialist (~5-10% of the world population)
        'Populist', 'Populist', 'Populist', 'Populist', 'Environmentalist', 'Environmentalist',
        'Environmentalist', 'Environmentalist', 'Socialist', 'Socialist',

        # Feminist (reflecting ~30% of the world population identifying with feminist ideals)
        'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist',
        'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist', 'Feminist',
        'Feminist', 'Feminist', 'Feminist', 'Feminist',

        # Libertarian, Monarchist, Radical (~1-2% of the world population)
        'Libertarian Left', 'Libertarian Left', 'Libertarian Right', 'Libertarian Right', 'Monarchist',
        'Monarchist', 'Radical', 'Radical', 'Neo-Liberal', 'Neo-Liberal'
    ]
    religious_affiliations = [
        # Christianity (~31% of the world population)
        'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity',
        'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity',
        'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity',
        'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity', 'Christianity',

        # Islam (~25% of the world population)
        'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam',
        'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam', 'Islam',

        # Hinduism (~15% of the world population)
        'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism',
        'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism', 'Hinduism',

        # Atheism / Agnosticism (~16% of the world population)
        'Atheism', 'Atheism', 'Atheism', 'Atheism', 'Atheism', 'Atheism', 'Atheism', 'Atheism', 'Atheism',
        'Atheism', 'Atheism', 'Atheism', 'Atheism', 'Agnosticism', 'Agnosticism', 'Agnosticism', 'Agnosticism',

        # Buddhism (~6% of the world population)
        'Buddhism', 'Buddhism', 'Buddhism', 'Buddhism', 'Buddhism', 'Buddhism',

        # Sikhism (~0.3% of the world population)
        'Sikhism',

        # Judaism (~0.2% of the world population)
        'Judaism',

        # Jainism (~0.1% of the world population)
        'Jainism',

        # Other religions (smaller or regional beliefs)
        'Spiritual but not Religious', 'Indigenous Religion', 'Bahá\'í Faith', 'Zoroastrianism', 'Taoism',
        'Confucianism', 'Shinto', 'Paganism', 'Wicca', 'Shamanism', 'Voodoo', 'Rastafarianism', 'New Age',
        'Scientology'
    ]

    # Generate accounts
    for i in range(num_accounts):
        id = random.randint(1_000_000, 9_999_999)
        account = {}
        # ID
        account['id'] = id
        # E-mail
        account['email'] = f"synthetic_user_{uuid.uuid4()}@example.com"
        # First Name
        account['first_name'] = f"FirstName-{id}"
        # Last Name
        account['last_name'] = f"LastName-{id}"

        # Randomly choose species
        if random.random() < 0.5:
            # Human account
            account['species'] = 'Human'
            # Assign human roles
            account['role'] = random.choice(human_roles)
            # Assign human-specific attributes
            account['advocate_for_animals'] = random.choice(advocate_options)
            account['current_lifestyle_diet'] = random.choice(lifestyle_options)
            account['age'] = random.randint(5, 90)
            account['gender'] = random.choice(genders)
            account['ethnicity'] = random.choice(ethnicities)
            account['country'] = random.choice(countries)
            account['education_level'] = random.choice(education_levels)
            account['income_level'] = random.choice(income_levels)
            account['political_affiliation'] = random.choice(political_affiliations)
            account['religious_affiliation'] = random.choice(religious_affiliations)

            # Approach to animal advocacy (Scales from 0 to 1)
            account['incrementalist_vs_abolitionist'] = round(random.uniform(0,1),2)
            account['individual_vs_institutional'] = round(random.uniform(0,1),2)
            account['solely_on_animal_activism_vs_intersectional'] = round(random.uniform(0,1),2)
            account['focus_on_welfare_vs_rights'] = round(random.uniform(0,1),2)
            account['diplomatic_vs_confrontational'] = round(random.uniform(0,1),2)
            account['intuitive_vs_empirical_effectiveness'] = round(random.uniform(0,1),2)

            # Psychometrics (Scales from 0 to 1)
            account['openness_to_experience'] = round(random.uniform(0, 1), 2)
            account['conscientiousness'] = round(random.uniform(0, 1), 2)
            account['extraversion'] = round(random.uniform(0, 1), 2)
            account['agreeableness'] = round(random.uniform(0, 1), 2)
            account['neuroticism'] = round(random.uniform(0, 1), 2)

            # Adversarial approach metrics (Scales from 0 to 1)
            account['values_vs_profit'] = round(random.uniform(0, 1), 2)
            account['short_term_vs_long_term'] = round(random.uniform(0, 1), 2)
            account['innovation_vs_tradition'] = round(random.uniform(0, 1), 2)
            account['pro_regulation_vs_anti_regulation'] = round(random.uniform(0, 1), 2)
            account['transparency_vs_misinformation'] = round(random.uniform(0, 1), 2)
            account['passive_vs_active'] = round(random.uniform(0, 1), 2)

            print(f"  Human Account Details: Role: {account['role']}, Current Lifestyle/Diet: {account['current_lifestyle_diet']}, Age: {account['age']}, Gender: {account['gender']}, Ethnicity: {account['ethnicity']}, Country: {account['country']}, Education Level: {account['education_level']}, Income Level: {account['income_level']}, Political Affiliation: {account['political_affiliation']}, Religious Affiliation: {account['religious_affiliation']}")
        else:
            # Non-human account
            account['species'] = random.choice(non_human_species)
            account['role'] = random.choice(non_human_roles)
            print(f"  Non-Human Account Details: {account['species']} {account['role']}")

        accounts.append(account)
        print(f"Generated account {i+1}/{num_accounts}: {account['email']}")

    print("Finished generating synthetic accounts.")
    return accounts

# Function to map scale values to descriptive terms
def map_scale_to_term(value, low_term, high_term):
    """
    Map a scale value (0 to 1) to descriptive terms.

    Args:
        value (float): The scale value.
        low_term (str): Description for low values.
        high_term (str): Description for high values.

    Returns:
        str: Descriptive term corresponding to the value.
    """
    if value < 0.25:
        return f'Highly {low_term}'
    elif value < 0.5:
        return f'Moderately {low_term}'
    elif value < 0.75:
        return f'Moderately {high_term}'
    else:
        return f'Highly {high_term}'

def get_mime_type(url):
    """
    Determine the MIME type based on the file extension.

    Args:
        url (str): The URL of the image file.

    Returns:
        str: The corresponding MIME type.
    """
    extension = url.lower().split('.')[-1]
    mime_types = {
        'png': 'image/png',
        'jpg': 'image/jpeg',
        'jpeg': 'image/jpeg',
        'gif': 'image/gif',
        'webp': 'image/webp',
        'tiff': 'image/tiff',
        'tif': 'image/tiff',
        'bmp': 'image/bmp',
        'heic': 'image/heic',
        'heif': 'image/heif',
    }
    return mime_types.get(extension, 'image/jpeg')  # Default to jpeg if unknown

# Function to process input data and generate output
def process_input_data(input_data, account):
    """
    Process the input data to create an input task for the model.

    Args:
        input_data (dict): The input data from the JSON file.
        account (dict): The synthetic account data.

    Returns:
        tuple(list, str)
        list: The constructed input task.
        str: The task type.
    """
    print(f"Processing input data: {input_data}")
    # Depending on the type of input_data, handle accordingly
    if 'text' in input_data:
        # Handle text data
        return [
            "Please evaluate the following text: ",
            input_data['text']
        ], 'text'

    if 'dialogue' in input_data:
        # Handle dialogue data
        dialogue_text = "\n".join([f"{item['author'].capitalize()}: {item['text']}" for item in input_data['dialogue']])
        print("Processed dialogue data.")
        return [
            "Please evaluate the following dialogue, focusing primarily on the last speaker's message "
            "while considering the conversational context: ",
            dialogue_text
        ], 'chat'

    if 'url' in input_data:
        # Handle URL data (could be image or website)
        url = input_data['url']
        image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.tiff', '.heic', '.heif')
        if any(ext in url.lower() for ext in image_extensions):
            # It's an image
            print(f"Processing image: {url}")
            return [
                "Please evaluate the following image: ",
                Part.from_uri(url, mime_type=get_mime_type(url)),
            ], 'image'
        else:
            # It's a website
            print(f"Processing website: {url}")
            website_content = scrape_website(url)
            if website_content:
                return [
                    "Please evaluate the content of this website: ",
                    website_content
                ], 'html_content'
            else:
                print("ERROR: Website content could not be retrieved.")

    # Unrecognized data format
    print(f"Error processing input data: {json.dumps(input_data)}")
    return [
        "Please evaluate the following data: ",
        json.dumps(input_data)
    ], 'text'

# Function to scrape website content with truncation and relevance focus
def scrape_website(url, max_chars=100000):
    """
    Scrape text content from a website, focusing on the main content and truncating to a character limit.

    Args:
        url (str): The URL of the website.
        max_chars (int): Maximum number of characters to return.

    Returns:
        str: The scraped and truncated text content.
    """
    print(f"Attempting to scrape website at URL: {url}")
    try:
        response = requests.get(url)
        if response.status_code == 200:
            print("Website content retrieved successfully.")
            soup = BeautifulSoup(response.content, 'lxml')

            # Remove script, style, and irrelevant elements
            for script in soup(["script", "style", "header", "footer", "nav", "aside"]):
                script.decompose()

            # Extract text from relevant content tags
            content = []
            for tag in soup.find_all(['h1', 'h2', 'h3', 'p', 'li', 'blockquote']):
                text = tag.get_text(separator=' ', strip=True)
                if text:
                    content.append(text)

            # Combine the extracted text and truncate it
            combined_content = ' '.join(content)
            truncated_content = combined_content[:max_chars]

            print(f"Truncated website content to {len(truncated_content)} characters.")
            return truncated_content
        else:
            print(f"Failed to retrieve website content. Status code: {response.status_code}")
            return None
    except Exception as e:
        print(f"Exception occurred while scraping website: {e}")
        return None

# Function to use Vertex AI for generating output
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def generate_output_ranking(input_task, account):
    """
    Use Vertex AI to generate an output based on the input task and account.

    Args:
        input_task (list): The input task for the model.
        account (dict): The synthetic account data.

    Returns:
        str: The generated JSON response from the model.
    """
    print("Generating output ranking using the Vertex AI model...")
    try:
        # Construct the approach description
        persona = f"Your synthetic persona details: {account}"
        prompt = [persona] + input_task

        # Generate the response.
        response = GENERATIVE_MODEL.generate_content(prompt)
        print("Model response generated.")
        return response.text
    except Exception as e:
        print(f"An error occurred while generating the output: {e}")
        return None

# Main script
if __name__ == "__main__":
    # Number of synthetic accounts to generate
    num_accounts = 10000  # Adjust this number as needed to ensure manageability
    print(f"Starting main script. Dryrun={DRYRUN}")
    # Generate synthetic accounts
    accounts = generate_synthetic_accounts(num_accounts)
    account_index = 0  # Start from the first account

    while True:
        print("Checking for JSON files in the input bucket...")
        # List all JSON files in the input bucket
        if DRYRUN:
          blobs = list(input_bucket.list_blobs(prefix='web-page-feedback-English/', max_results=100))
        else:
          blobs = list(input_bucket.list_blobs())
        json_blobs = [blob for blob in blobs if blob.name.endswith('.json')]
        if not json_blobs:
            print("No JSON files found in the input bucket. Waiting for new files...")
            time.sleep(60)  # Wait for 1 minute before checking again
            continue

        # Randomize the order of the JSON blobs
        random.shuffle(json_blobs)
        print("Randomized the order of input files.")

        for blob in json_blobs:
            try:
                print(f"Processing file: {blob.name}")
                # Download the JSON file
                data = blob.download_as_bytes()
                input_data = json.loads(data)
                print("Input data loaded.")

                # Get the current account
                account = accounts[(account_index // 5) % len(accounts)]
                account_index += 1
                print(f"Using account {account_index}: {account['email']}")

                selected_language = random.choice(SUPPORTED_LANGUAGES)
                print(f"Selected language for explanation field is: {selected_language}")
                formatted_instruction = SYSTEM_INSTRUCTION.replace(
                    '# RESPONSE FORMATTING',
                    f'''# RESPONSE FORMATTING\n
        Important: In your JSON response, you MUST write the "explanation" field in {selected_language}.
        The rest of the response should be in English. Only this specific field should be written in {selected_language}.\n'''
                )

                # Process the input data to create an input task
                base_task, task_type = process_input_data(input_data, account)

                # Combine the instruction with the base task
                input_task = [formatted_instruction] + base_task

                # Generate output ranking
                response_text = generate_output_ranking(input_task, account)
                if response_text is None:
                    print(f"Failed to generate response for {blob.name}")
                    continue

                print(f"Generated response: {response_text}")

                # Parse the response text as JSON
                try:
                    response_json = json.loads(response_text)
                    print("Model response parsed as JSON.")
                except json.JSONDecodeError as e:
                    print(f"Failed to parse response as JSON for {blob.name}: {e}")
                    continue

                # Prepare the output data matching the sample output structure
                result = []
                schema_to_label = {
                    "is_content_harmful_to_animals": {
                        "from_name": "is_content_harmful_to_animals",
                        "type": "choices",
                        "value": {"choices": [response_json.get("is_content_harmful_to_animals", "No")]}
                    },
                    "explanation": {
                        "from_name": "explanation",
                        "type": "textarea",
                        "value": {"text": [response_json.get("explanation", "")]}
                    },
                    "rating_effect_on_animals": {
                        "from_name": "rating_effect_on_animals",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_effect_on_animals", 3)}
                    },
                    "rating_cultural_sensitivity": {
                        "from_name": "rating_cultural_sensitivity",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_cultural_sensitivity", 3)}
                    },
                    "rating_relevance": {
                        "from_name": "rating_relevance",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_relevance", 3)}
                    },
                    "rating_insight": {
                        "from_name": "rating_insight",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_insight", 3)}
                    },
                    "rating_trustworthiness": {
                        "from_name": "rating_trustworthiness",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_trustworthiness", 3)}
                    },
                    "rating_emotional_impact": {
                        "from_name": "rating_emotional_impact",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_emotional_impact", 3)}
                    },
                    "rating_rationality": {
                        "from_name": "rating_rationality",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_rationality", 3)}
                    },
                    "rating_influence": {
                        "from_name": "rating_influence",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_influence", 3)}
                    },
                    "rating_alignment": {
                        "from_name": "rating_alignment",
                        "type": "rating",
                        "value": {"rating": response_json.get("rating_alignment", 3)}
                    },
                }

                for key, mapping in schema_to_label.items():
                    result.append({
                        "id": str(uuid.uuid4()),
                        "from_name": mapping["from_name"],
                        "to_name": task_type,
                        "type": mapping["type"],
                        "value": mapping["value"],
                        "origin": "manual"
                    })
                print("Result data prepared.")


                # Prepare the output data
                current_time = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") + "Z"
                task = {
                    "cancelled_annotations": 0,
                    "comment_authors": [],
                    "comment_count": 0,
                    "created_at": current_time,
                    "data": input_data,
                    "file_upload": None,
                    "id": random.randint(100000, 999999),
                    "inner_id": random.randint(100000, 999999),
                    "is_labeled": True,
                    "last_comment_updated_at": None,
                    "meta": {},
                    "overlap": 1,
                    "project": 2480,
                    "total_annotations": 1,
                    "total_predictions": 0,
                    "unresolved_comment_count": 0,
                    "updated_at": current_time,
                    "updated_by": None
                }

                output_data = {
                    "id": random.randint(10000, 1000000),
                    "created_username": account['email'],
                    "created_ago": "0 minutes",
                    "completed_by": {
                        "id": account['id'],
                        "first_name": account['first_name'],
                        "last_name": account['last_name'],
                        "email": account['email'],
                        "advocacy_diplomacy": account['diplomatic_vs_confrontational'] if account['species'] == 'Human' else None,
                        "advocacy_empiricism": account['intuitive_vs_empirical_effectiveness'] if account['species'] == 'Human' else None,
                        "advocacy_focus": account['focus_on_welfare_vs_rights'] if account['species'] == 'Human' else None,
                        "advocacy_intersectionality": account['solely_on_animal_activism_vs_intersectional'] if account['species'] == 'Human' else None,
                        "advocacy_rights": account['focus_on_welfare_vs_rights'] if account['species'] == 'Human' else None,
                        "values_vs_profit": account['values_vs_profit'] if account['species'] == 'Human' else None,
                        "short_term_vs_long_term": account['short_term_vs_long_term'] if account['species'] == 'Human' else None,
                        "innovation_vs_tradition": account['innovation_vs_tradition'] if account['species'] == 'Human' else None,
                        "pro_regulation_vs_anti_regulation": account['pro_regulation_vs_anti_regulation'] if account['species'] == 'Human' else None,
                        "transparency_vs_misinformation": account['transparency_vs_misinformation'] if account['species'] == 'Human' else None,
                        "passive_vs_active": account['passive_vs_active'] if account['species'] == 'Human' else None,
                        "advocate": account['advocate_for_animals'] if account['species'] == 'Human' else None,
                        "current_lifestyle": account['current_lifestyle_diet'] if account['species'] == 'Human' else None,
                        "roles": account['role'], # This field is common for both humans and non-humans
                        "age": account['age'] if account['species'] == 'Human' else None,
                        "agreeableness": account['agreeableness'] if account['species'] == 'Human' else None,
                        "conscientiousness": account['conscientiousness'] if account['species'] == 'Human' else None,
                        "country": account['country'] if account['species'] == 'Human' else None,
                        "education_level": account['education_level'] if account['species'] == 'Human' else None,
                        "ethnicity": account['ethnicity'] if account['species'] == 'Human' else None,
                        "extraversion": account['extraversion'] if account['species'] == 'Human' else None,
                        "gender": account['gender'] if account['species'] == 'Human' else None,
                        "income_level": account['income_level'] if account['species'] == 'Human' else None,
                        "neuroticism": account['neuroticism'] if account['species'] == 'Human' else None,
                        "openness": account['openness_to_experience'] if account['species'] == 'Human' else None,
                        "political_affiliation": account['political_affiliation'] if account['species'] == 'Human' else None,
                        "religious_affiliation": account['religious_affiliation'] if account['species'] == 'Human' else None,
                        "species": account['species']
                    },
                    "draft_created_at": current_time,
                    "task": task,
                    "project": 2480,
                    "updated_by": account['id'],
                    "result": result,
                    "was_cancelled": False,
                    "ground_truth": False,
                    "created_at": current_time,
                    "updated_at": current_time,
                    "lead_time": random.uniform(1, 100),
                    "import_id": None,
                    "last_action": None,
                    "parent_prediction": None,
                    "parent_annotation": None,
                    "last_created_by": None,
                }
                print("Output data assembled.")

                # Save the output data to the output bucket
                output_name = OUTPUT_PREFIX + blob.name.replace('.json', '') + f"-synthetic-{str(uuid.uuid4())}.json"
                if DRYRUN:
                  print(f"DRYRUN: Processed {output_name}")
                  print(json.dumps(output_data, indent=2))
                else:
                  output_blob = output_bucket.blob(output_name)
                  output_blob.upload_from_string(json.dumps(output_data, indent=2), content_type='application/json')
                  print(f"Processed and saved output for {output_name}")

            except Exception as e:
                print(f"An error occurred while processing {blob.name}: {e}")
        # Sleep for a short while before checking for new files
        print("Sleeping for 10 seconds before checking for new files...")
        time.sleep(10)
