### Prompt Engineering for FairFace

In [1]:
""" 
    PROMPT SCHEME:
    "A photo of a(n) [RACE:1] [GENDER & AGE] [RACE:2]

    
    GENDER & AGE:
    baby girl/boy
    girl/boy
    teenage girl/boy
    young woman/man
    woman/man
    woman/man in her/his fourties/fifties/sixties
    elderly woman/man

    
    RACE:
    either a description standing before or after GENDER & AGE (concret description: see below)

"""

' \n    PROMPT SCHEME:\n    "A photo of a(n) [RACE:1] [GENDER & AGE] [RACE:2]\n\n    \n    GENDER & AGE:\n    baby girl/boy\n    girl/boy\n    teenage girl/boy\n    young woman/man\n    woman/man\n    woman/man in her/his fourties/fifties/sixties\n    elderly woman/man\n\n    \n    RACE:\n    either a description standing before or after GENDER & AGE (concret description: see below)\n\n'

In [2]:
import pandas as pd
import numpy as np
from numpy.random import randint

In [3]:
races = ["Black", "Indian", "Latino_Hispanic", "Middle Eastern", "Southeast Asian", "East Asian", "White"]

black_desc = ["Black", "African American", "Northern African", "Eastern African", 
              "from Southern Africa", "from Western Africa", "from Central Africa"]

indian_desc = ["Indian", "Pakistani", "Northern Indian", "Eastern Indian",
               "from Southern India", "from Western India", "from Central India"]

lat_desc = ["Latino", "Hispanic", "Mexican", "Latin American",
            "from the Carribean", "from South America", "from Central America"]

me_desc = ["Middle Eastern", "Arab", "Iranian", "Turkish",
           "from the Middle East", "from the Arabian Peninsula", "from Egypt"]

sea_desc = ["Southeast Asian", "Indonesian", "Vietnamese", "Filipino",
            "from Mainland Southeast Asia", "from Maritime Southeast Asia", "from Indonesia"]

easian_desc = ["East Asian", "Chinese", "Korean", "Han Chinese",
               "from East Asia", "from China", "from Japan"]

white_desc = ["White", "European American", "European Australian", "Northern European", 
              "from Eastern Europe", "from Southern Europe", "from Western Europe"]


race_descs = [black_desc, indian_desc, lat_desc, me_desc, sea_desc, easian_desc, white_desc]
race_desc_pos_sep = 4


age_steps = ["0-2", "3-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "more than 70"]
male_desc = ["baby boy", "boy", "teenage boy", "young man", "man", "man in his fourties", "man in his fifties", "man in his sixties", "elderly man"]
female_desc = ["baby girl", "girl", "teenage girl", "young woman", "woman", "woman in her fourties", "woman in her fifties", "woman in her sixties", "elderly woman"]


In [4]:
print([len(desc) for desc in race_descs])

[7, 7, 7, 7, 7, 7, 7]


In [5]:
# build a FairFace prompt from age, gender and race index
def build_ff_prompt(age, gender, race, random_race_desc):

    # build prefix
    prefix = f"a photo of a"


    # get gender & age descriptor
    if gender == "Male":
        gender_age_desc = male_desc[age_steps.index(age)]
    else:
        gender_age_desc = female_desc[age_steps.index(age)]

    race_index = races.index(race)

    # pick race descriptor
    if random_race_desc:
        rand = randint(len(race_descs[0]))
    else:
        rand = 0

    race_desc = race_descs[race_index][rand]

    # put race descriptor at the correct position in the sentence and finish the prompt
    if rand < race_desc_pos_sep:
        filler = "n " if race_desc[0] in 'aeiouAEIOU' else " "
        prompt = prefix + filler + race_desc + " " + gender_age_desc
    else:
        filler = "n " if gender_age_desc[0] in 'aeiouAEIOU' else " "
        prompt = prefix + filler + gender_age_desc + " " + race_desc 
    
    return prompt

In [6]:
# build diverse prompts for the inserted image_numbers based on the attributes available for FairFace images
# returns dict: (image_number: prompt)
def get_ff_prompts(image_numbers, random_race_desc=True):
    attr_path = "fairface/dataset/fairface_label_train.csv"
    attributes = pd.read_csv(attr_path)
    prompts = {}
    for img_nmb in image_numbers:
        current = attributes.iloc[img_nmb-1]
        prompts[img_nmb] = build_ff_prompt(current["age"], current["gender"], current["race"], random_race_desc)
    return prompts

In [9]:
# build_prompt("50-59", "Male", "Southeast Asian")
img_numbers = np.arange(1, 100)
get_ff_prompts(img_numbers, random_race_desc=True)

{108: 'a photo of an elderly woman from Central India',
 144: 'a photo of a Korean elderly man'}