using sociodemographics from Beck, Tilman et al. (2024). “Sensitivity, Performance, Robustness: Deconstructing the Effect of Sociodemographic
Prompting”. In: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2589–2615.

https://github.com/UKPLab/arxiv2023-sociodemographic-prompting/blob/main/Dataset_Preparation_Sampling.ipynb

In [8]:
# base sociodemograpics selected from toxicity_diverseperspectives dataset
genders =  ['male', 'female']  # for now we ignore 'nonbinary'

races = ['Black or African American', 'White', 'Asian',
       'American Indian or Alaska Native', 'Hispanic',
       'Native Hawaiian or Pacific Islander']

educations = ['Some college but no degree',
       'Associate degree in college (2-year)',
       "Bachelor's degree in college (4-year)", 'Doctoral degree',
       "Master's degree", 'Professional degree (JD, MD)',
       'High school graduate (high school diploma or equivalent including GED)',
       'Less than high school degree']

age_ranges = {
    '18 - 24': (18, 24),
    '25 - 34': (25, 34),
    '35 - 44': (35, 44),
    '45 - 54': (45, 54),
    '55 - 64': (55, 64),
    '65 or older': (65, 80)
}

political_affiliations = ['Liberal', 'Independent', 'Conservative']

persona names and their gender (gender is given by randomly chosen personas) now profiles for the other characteristics need to be randomly created

In [35]:
# personas = {
#     "Adam Smith": "Male",
#     "Ayesha Khan": "Female",
#     "Carlos Gomez": "Male",
#     "Eddy Lin": "Male",
#     "Isabella Rodriguez": "Female",
#     "Klaus Mueller": "Male",
#     "Sam Moore": "Male",
#     "Tamara Taylor": "Female",
#     "Tom Moreno": "Male",
#     "Wolfgang Schulz": "Male"
# }

In [39]:
personas = {
    "Isabella Rodriguez": "Female",
    "Klaus Mueller": "Male",
    "Maria Lopez": "Female"
}

In [41]:
import random

random.seed(50)

# Function to generate a random profile, keeping the given gender
def generate_profile(gender):
    race = random.choice(races)
    education = random.choice(educations)
    age_range = random.choice(list(age_ranges.keys()))
    age = random.randint(age_ranges[age_range][0], age_ranges[age_range][1])
    political_affiliation = random.choice(political_affiliations)
    
    return {
        "gender": gender,
        "race_div": race,
        "education": education,
       # "age_range": age_range,
        "age": age,
        "political_affiliation": political_affiliation
    }

# Generate profiles for each persona
profiles = {persona: generate_profile(gender) for persona, gender in personas.items()}

# Print profiles
for persona, profile in profiles.items():
    print(f"{persona}: {profile}")

Isabella Rodriguez: {'gender': 'Female', 'race_div': 'American Indian or Alaska Native', 'education': "Master's degree", 'age': 38, 'political_affiliation': 'Conservative'}
Klaus Mueller: {'gender': 'Male', 'race_div': 'American Indian or Alaska Native', 'education': 'Professional degree (JD, MD)', 'age': 22, 'political_affiliation': 'Independent'}
Maria Lopez: {'gender': 'Female', 'race_div': 'White', 'education': 'Associate degree in college (2-year)', 'age': 30, 'political_affiliation': 'Liberal'}


In [37]:
import json
import os

simulation = "base_the_ville_n10"

# Function to update the scratch file for each persona
def update_scratch_file(persona_name, profile):
    # Construct the path to the scratch file
    path = f"../environment/frontend_server/storage/{simulation}/personas/{persona_name}/bootstrap_memory/scratch.json"
    
    # Load the existing data from the scratch file
    if os.path.exists(path):
        with open(path, 'r') as f:
            scratch_data = json.load(f)
    else:
        print(f"File not found for {persona_name}: {path}")
        return
    
    # Update the scratch data with new characteristics
    scratch_data['gender'] = profile['gender']
    scratch_data['race_div'] = profile['race_div']
    scratch_data['education'] = profile['education']
    scratch_data['age'] = profile['age']
    scratch_data['political_affiliation'] = profile['political_affiliation']
    
    # Write the updated data back to the file
    with open(path, 'w') as f:
        json.dump(scratch_data, f, indent=4)
    print(f"Updated scratch file for {persona_name}")

# Iterate over each persona and update their scratch file
for persona_name, profile in profiles.items():
    update_scratch_file(persona_name, profile)


Updated scratch file for Adam Smith
Updated scratch file for Ayesha Khan
Updated scratch file for Carlos Gomez
Updated scratch file for Eddy Lin
Updated scratch file for Isabella Rodriguez
Updated scratch file for Klaus Mueller
Updated scratch file for Sam Moore
Updated scratch file for Tamara Taylor
Updated scratch file for Tom Moreno
Updated scratch file for Wolfgang Schulz
