In [1]:
category = 'masculine'

In [18]:
import huggingface_hub
from openai import OpenAI

temperature = 0.8

def read_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
    return content.rstrip('\n')

use_gpt_model = False

if use_gpt_model:
    model = 'gpt-4o'
    model_shortname = 'gpt4o'
    base_url = None
    api_key=read_file("/home/teveritt/OpenAI-API-DCU-AI.key")
else:
    model = 'meta-llama/Meta-Llama-3-70B-Instruct'
    model_shortname = 'llama3b70'
    base_url = f"https://api-inference.huggingface.co/models/{model}/v1/"
    #base_url='https://ylzx7jabydlt5hql.us-east-1.aws.endpoints.huggingface.cloud/v1/'
    api_key=huggingface_hub.get_token()

def openai_chat(messages):
    client = OpenAI(base_url=base_url, api_key=api_key)

    return client.chat.completions.create(
        model=model,
        temperature=temperature,
        messages=messages,
        stream=False,
        max_tokens=1525
    )

In [19]:
# Fetch a job title from job-phrase-list.csv
# Original source of list: https://github.com/microsoft/LUIS-Samples/blob/master/documentation-samples/tutorials/job-phrase-list.csv
# https://www.kaggle.com/datasets/estasney/job-titles?select=titles.csv

import pandas as pd
import random

def get_job_titles():
    titles = set()
    with open("microsoft-LUIS-job-phrase-list.csv", "r") as file:
        lines = file.readlines()
        for line in lines:
            titles.add(line.replace(',\n', '').replace('\n', ''))

    #kaggle_titles = pd.read_csv('kaggle-titles.csv')
    #for col in kaggle_titles.columns:
    #    if col.startswith('Title_'):
    #        titles.update(kaggle_titles[col].dropna().unique())

    return list(titles)


positions = get_job_titles()


def random_job_title():
    return random.choice(positions)


print(f'Positions: {len(positions)}')

Positions: 1216


In [20]:
random_job_title()

'school mental health training coordinator'

In [21]:
def load_definition(category):
    with open(f"definitions/{category}.txt", 'r', encoding="utf-8") as f:
        return f.read().strip().replace('\n','')

def load_polarity_definition(category, polarity):
    with open(f"polarity-definitions/{category}-{polarity}.txt", 'r', encoding="utf-8") as f:
        return f.read().strip().replace('\n','')


def random_phrases(category, polarity, num_lines=None):
    with open(f"polarity-phrases/{category}-{polarity}.txt", 'r', encoding="utf-8") as f:
        all_lines = [line.rstrip('\n') for line in f.readlines()]  # remove \n
        all_lines = [line if line.endswith('.') else line + '.' for line in all_lines]  # add . at the end
        if num_lines is not None:
            return random.sample(all_lines, num_lines)
        else:
            return all_lines

In [22]:
system = f"""
This GPT generates 200 word synthetic job postings across seven categories: age, disability, feminine, masculine, racial, sexuality, and general. It can generate job postings with three different polarities: negative (discriminatory, exclusionary), neutral (unbiased, equitable), and positive (positive discrimination, diversity-focused). The job post builder adheres to the following definitions:

- Age bias: {load_definition(category='age')}
- Disability bias: {load_definition(category='disability')}
- Feminine bias: {load_definition(category='feminine')}
- General bias: {load_definition(category='general')}
- Masculine bias: {load_definition(category='masculine')}
- Racial bias: {load_definition(category='racial')}
- Sexuality bias: {load_definition(category='sexuality')}

The generator will provide company background, job type, job description, and responsibilities, qualifications and experience needed, perks and benefits. 

The generator will be given the company name, the role, and a list of categories along with the polarities (negative/neutral/positive). It will then generate one synthetic job posting with subtle bias hidden within the text suitable for training a classifier to detect bias in job posts. The subtle bias should not be similar to each other and obviously bias. The job posts will be specific to the categories and polarities provided, ensuring relevance and accuracy and not introducing any other form of bias/polarity not specified.

The generator is free to choose specific elements or language to highlight or exclude when generating job postings and will try to fill in missing information based on context when needed.
"""

In [23]:
import json
from faker import Faker

categories = ['age', 'disability', 'feminine', 'masculine', 'racial', 'sexuality', 'general']

fake = Faker()


def get_inputs(position, inject_bias_types):
    user_input = f"""Company: {fake.company()}
Role: {position}
Bias/Polarities:"""

    for idx, type in enumerate(inject_bias_types):
        category = type.split('/')[-2]
        polarity = type.split('/')[-1]
        definition = load_polarity_definition(category,polarity)
        user_input += f"\n  {idx + 1}. {definition} Examples are:"
        for phrase in random_phrases(category, polarity, 5):
            phrase = phrase.replace('\n', '')
            user_input += f"\n    - {phrase}"

    return user_input


def get_output_format(inject_bias_types):
    output_format = """Review the job posting to make sure it has not introduced any other form of bias not specified and the rationale matches the bias/polarities specified. Review the job post so that the requirements make sense with the job role. Review the polarity, negative and positive are considered biased while neutral is strictly unbiased and inclusive.  Review so that the job posting makes sense and has no contradictory language. Review the benefits/offer and if there is a lack of transparency (e.g. Competitive pay/salary), then adjust it to add more transparency (e.g "We are committed to fair and equitable pay practices. The salary for this position ranges from <GPT fills this in> to <GPT fills this in>, based on your experience and skills").  Pick one of global currency reserves when mentioning salary or revenue. Once reviewed and corrected, output with the following format (tag names are lowercase):
  1. Wrap the job posting within the <j>...</j> tag."""
    for idx, type in enumerate(inject_bias_types):
        category = type.split('/')[-2]
        output_format += f"\n  {idx + 2}. Summarise, using third-person, the {category} rationale within one <{category}>...</{category}> tag."

    return output_format


def generate_text(position, inject_bias_types):
    m = [
        {
            "role": "system",
            "content": system
        }, {
            "role": "user",
            "content": get_inputs(position, inject_bias_types)
        }, {
            "role": "assistant",
            "content": "I have the job posting ready, how should I respond?"
        }, {
            "role": "user",
            "content": get_output_format(inject_bias_types),
        }
    ]

    start_time = time.time()
    output = openai_chat(m)
    inference_time = time.time() - start_time

    prompt_tokens = output.usage.prompt_tokens
    completion_tokens = output.usage.completion_tokens
    total_tokens = output.usage.total_tokens
    content = output.choices[0].message.content

    #return json.dumps(m), chat(m)
    return json.dumps(m), content, output.model, inference_time, prompt_tokens, completion_tokens, total_tokens

In [24]:
print(get_inputs(random_job_title(), [f'{category}/negative', f'{category}/neutral']))

Company: Russell-Gonzalez
Role: public health adviser
Bias/Polarities:
  1. Masculine/Negative: Discourages non-male candidates from applying by using gender-specific language, referencing male-centric experiences, or assuming certain traits or roles are inherently male. This can occur through the exclusive use of pronouns like "he" or "him," or by assuming physical strength or competitiveness is necessary, thereby discouraging females and non-binary individuals. Examples are:
    - An experienced mailman is needed for our delivery team.
    - The ideal candidate will be a dominant force in their field.
    - We seek a skilled foreman for our construction site.
    - A field artillery fire control man is needed for operations.
    - We prefer male candidates for this technical role.
  2. Masculine/Neutral: Language that neither disadvantages nor gives undue preference to any gender. This approach focuses on inclusivity by ensuring job requirements are essential and relevant, avoiding g

In [25]:
print(get_output_format([f'{category}/negative']))

Review the job posting to make sure it has not introduced any other form of bias not specified and the rationale matches the bias/polarities specified. Review the job post so that the requirements make sense with the job role. Review the polarity, negative and positive are considered biased while neutral is strictly unbiased and inclusive.  Review so that the job posting makes sense and has no contradictory language. Review the benefits/offer and if there is a lack of transparency (e.g. Competitive pay/salary), then adjust it to add more transparency (e.g "We are committed to fair and equitable pay practices. The salary for this position ranges from <GPT fills this in> to <GPT fills this in>, based on your experience and skills").  Pick one of global currency reserves when mentioning salary or revenue. Once reviewed and corrected, output with the following format (tag names are lowercase):
  1. Wrap the job posting within the <j>...</j> tag.
  2. Summarise, using third-person, the masc

In [26]:
import re
import time
import datetime
import random
import hashlib


def create_hash(input_string):
    return hashlib.sha256(input_string.encode()).hexdigest()[:10]


def lowercase_tags(text):
    tags = re.findall(r'<\/?\w+', text)
    for tag in tags:
        text = text.replace(tag, tag.lower())
    return text


def extract_job_posting(text):
    text = lowercase_tags(text)
    content = re.findall(r'<j>(.*?)</j>', text, re.DOTALL)
    ret = [c.strip() for c in content]
    return ret[0] if len(ret) > 0 else text

def fix_closing_tag(file_content, tag):
    if f'<{tag}>' in file_content and f'</{tag}>' not in file_content:
        return file_content + f'</{tag}>'
    else:
        return file_content

def find_first_between_tags(file_content, tag):
    file_content = fix_closing_tag(file_content, tag)
    start_tag = f"<{tag}>"
    end_tag = f"</{tag}>"

    start_index = file_content.find(start_tag)
    end_index = file_content.find(end_tag)

    if start_index != -1 and end_index != -1:  # tags were found
        start_index += len(start_tag)  # adjust to index after the start tag
        result = file_content[start_index:end_index].strip()
        result = result.replace('*', '')  # extract content between tags
        return result

    return None  # tags were not found or improperly formatted

In [30]:
import os
import pandas as pd

output_dir = f'/home/teveritt/Datasets/2024-mcm-everitt-ryan/datasets/synthetic-job-postings/polarity-synthetic/{category}'
jsonl_file = f'{output_dir}/polarity-synthetic-{model_shortname}.jsonl'
parquet_file = f'{output_dir}/polarity-synthetic-{model_shortname}.parquet'

label_categories = ['label_' + category for category in categories]
analysis_categories = ['analysis_' + category for category in categories]

os.makedirs(output_dir, exist_ok=True)
if os.path.exists(jsonl_file):
    synthetic_df = pd.read_json(jsonl_file, lines=True)
else:
    synthetic_df = pd.DataFrame(
        columns=["document_id", "position"] + label_categories + analysis_categories + ["inference_time",
                                                                                        "prompt_tokens",
                                                                                        "completion_tokens",
                                                                                        "total_tokens", "text", "input",
                                                                                        "output"])

synthetic_df = synthetic_df.dropna(subset=['text'])
synthetic_df = synthetic_df[synthetic_df['text'] != '']

synthetic_df = synthetic_df.drop_duplicates(subset='text', keep='first')
synthetic_df

Unnamed: 0,id,document_id,position,label_age,label_disability,label_feminine,label_masculine,label_racial,label_sexuality,label_general,...,text,analysis_age,analysis_disability,analysis_feminine,analysis_masculine,analysis_racial,analysis_sexuality,analysis_general,input,output
0,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,program coordinator for early childhood health,False,False,False,True,False,False,False,...,Program Coordinator for Early Childhood Health...,,,,The job posting uses masculine-biased language...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed job posting:\n\n<j>\n\n**...
1,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,postal service clerk,False,False,False,False,False,False,False,...,"Company: Thomas, Hardin and Frey\nRole: Postal...",,,,"The job posting uses neutral language, avoidin...",,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
2,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,e-learning content developer,False,False,False,True,False,False,False,...,"Company: Thomas, Mccarthy and Williams\nRole: ...",,,,The job posting uses masculine-biased language...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the generated job posting:\n\n<j>\n\nC...
3,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,data and technology analyst,False,False,False,False,False,False,False,...,"Data and Technology Analyst at Hall, Johnson a...",,,,The masculine/neutral approach ensures that th...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed job posting:\n\n<j>\n**Da...
4,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,community service manager,False,False,False,True,False,False,False,...,"Community Service Manager\nCompany: Collins, P...",,,,The job posting contains masculine bias by usi...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
5,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,courier,False,False,False,False,False,False,False,...,Company: Kim Group\nRole: Courier\n\nJob Descr...,,,,The job posting avoids masculine bias by using...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
6,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,kiln operator,False,False,False,True,False,False,False,...,"Job Title: Kiln Operator\nCompany: Jacobson, M...",,,,The job posting uses masculine-biased language...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
7,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,solutions director,False,False,False,False,False,False,False,...,"Company: White, Zamora and Smith\n\nJob Title:...",,,,The job posting uses neutral language to descr...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the generated job posting:\n\n<j>\n\nC...


In [31]:
categories = [category]

size = 65  # Group of samples 

In [32]:
polarities = ['negative', 'neutral']  # two bias and two unbiased

total_records = size * len(polarities)

for i in range(size):
    for category in categories:

        for polarity in polarities:
            count = len(synthetic_df) + 1
            formatted_percentage = "{:.2f}%".format((count / total_records) * 100)
            print(
                f'Generating synthetic for category {category}/{polarity}: {count}/{total_records} [ {formatted_percentage} ]',
                end=' ')


            position = random_job_title()
            prompt, output, model, inference_time, prompt_tokens, completion_tokens, total_tokens = generate_text(position, [
                f'{category}/{polarity}'])

            text = find_first_between_tags(output, 'j')
            analysis_age = find_first_between_tags(output, 'age')
            analysis_disability = find_first_between_tags(output, 'disability')
            analysis_feminine = find_first_between_tags(output, 'feminine')
            analysis_masculine = find_first_between_tags(output, 'masculine')
            analysis_racial = find_first_between_tags(output, 'racial')
            analysis_sexuality = find_first_between_tags(output, 'sexuality')
            analysis_general = find_first_between_tags(output, 'general')

            timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
            id = create_hash(output)
            id = f"{timestamp}:{polarity}:{id}"
            id_m = model.replace('/', ':')
            data = {
                "id": f'Synthetic:{id_m}:{id}',
                "document_id": f'Synthetic:{id_m}:{id}',
                "position": position
            }

            for label in label_categories:
                data[label] = False

            data[f'label_{category}'] = False if polarity == 'neutral' else True

            data['inference_time'] = inference_time
            data['prompt_tokens'] = prompt_tokens
            data['completion_tokens'] = completion_tokens
            data['total_tokens'] = total_tokens
            data['model'] = id_m
            data['text'] = text
            data['analysis_age'] = analysis_age
            data['analysis_disability'] = analysis_disability
            data['analysis_feminine'] = analysis_feminine
            data['analysis_masculine'] = analysis_masculine
            data['analysis_racial'] = analysis_racial
            data['analysis_sexuality'] = analysis_sexuality
            data['analysis_general'] = analysis_general
            data['input'] = prompt
            data['output'] = output

            with open(jsonl_file, 'a') as file:
                if not os.stat(jsonl_file).st_size == 0:
                    file.write('\n')
                file.write(json.dumps(data))

            synthetic_df = pd.concat([synthetic_df, pd.DataFrame(data, index=[0])], ignore_index=True)
            synthetic_df = synthetic_df.drop_duplicates(subset='text', keep='first')
            synthetic_df.to_parquet(parquet_file, compression='gzip')

            mean = synthetic_df['inference_time'].mean()
            print(f'inference: {"{:.2f}s".format(inference_time)}; {"{:.2f}s".format(mean)} avg')

        #break

synthetic_df

Generating synthetic for category masculine/negative: 9/130 [ 6.92% ] inference: 10.91s; 17.55s avg
Generating synthetic for category masculine/neutral: 10/130 [ 7.69% ] inference: 9.49s; 16.74s avg
Generating synthetic for category masculine/negative: 11/130 [ 8.46% ] inference: 14.14s; 16.51s avg
Generating synthetic for category masculine/neutral: 12/130 [ 9.23% ] inference: 10.77s; 16.03s avg
Generating synthetic for category masculine/negative: 13/130 [ 10.00% ] inference: 11.45s; 15.68s avg
Generating synthetic for category masculine/neutral: 14/130 [ 10.77% ] inference: 10.16s; 15.28s avg
Generating synthetic for category masculine/negative: 15/130 [ 11.54% ] inference: 9.84s; 14.92s avg
Generating synthetic for category masculine/neutral: 16/130 [ 12.31% ] inference: 14.48s; 14.89s avg
Generating synthetic for category masculine/negative: 17/130 [ 13.08% ] inference: 14.26s; 14.85s avg
Generating synthetic for category masculine/neutral: 18/130 [ 13.85% ] inference: 16.27s; 14.

Unnamed: 0,id,document_id,position,label_age,label_disability,label_feminine,label_masculine,label_racial,label_sexuality,label_general,...,text,analysis_age,analysis_disability,analysis_feminine,analysis_masculine,analysis_racial,analysis_sexuality,analysis_general,input,output
0,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,program coordinator for early childhood health,False,False,False,True,False,False,False,...,Program Coordinator for Early Childhood Health...,,,,The job posting uses masculine-biased language...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed job posting:\n\n<j>\n\n**...
1,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,postal service clerk,False,False,False,False,False,False,False,...,"Company: Thomas, Hardin and Frey\nRole: Postal...",,,,"The job posting uses neutral language, avoidin...",,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
2,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,e-learning content developer,False,False,False,True,False,False,False,...,"Company: Thomas, Mccarthy and Williams\nRole: ...",,,,The job posting uses masculine-biased language...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the generated job posting:\n\n<j>\n\nC...
3,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,data and technology analyst,False,False,False,False,False,False,False,...,"Data and Technology Analyst at Hall, Johnson a...",,,,The masculine/neutral approach ensures that th...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed job posting:\n\n<j>\n**Da...
4,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,community service manager,False,False,False,True,False,False,False,...,"Community Service Manager\nCompany: Collins, P...",,,,The job posting contains masculine bias by usi...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,physicist,,,,False,,,,...,"Physicist Position at Stewart, Spears and Harp...",,,,The job posting avoids using language that sub...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
134,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,industrial program compliance analyst,,,,True,,,,...,"Company: Dunn, Jones and Archer\nRole: Industr...",,,,The masculine rationale in this job posting di...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed job posting:\n\n<j>\nComp...
135,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,probation assistant,,,,False,,,,...,Job Title: Probation Assistant\nCompany: Colli...,,,,The masculine/neutral job posting rationale fo...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...
136,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,funeral service worker,,,,True,,,,...,"Company: Woods, Lee and Bailey\n\nRole: Funera...",,,,The job posting uses masculine-biased language...,,,,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed and corrected job posting...


In [56]:
synthetic_df

Unnamed: 0,document_id,position,label_age,label_disability,label_feminine,label_masculine,label_racial,label_sexuality,label_general,analysis_age,...,analysis_general,inference_time,prompt_tokens,completion_tokens,total_tokens,text,input,output,id,model
0,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,operations customer service liaison,False,False,True,False,False,False,False,,...,,15.219543,1081,413,1494,Job Title: Operations Customer Service Liaison...,"[{""role"": ""system"", ""content"": ""\nThis GPT gen...",Here is the reviewed job posting:\n\n<j>\n\nJo...,Synthetic:meta-llama:Meta-Llama-3-70B-Instruct...,meta-llama:Meta-Llama-3-70B-Instruct


In [86]:
print(synthetic_df.tail(1)['output'].values[0])

Here is the reviewed job posting:

<j>

**Company:** Rose-Nash
**Role:** Program Management Office Assistant
**Location:** Remote (with occasional in-office meetings)

**About Us:**
Rose-Nash is a leading consulting firm dedicated to empowering women in the workforce. We believe that a diverse and inclusive team is essential to driving innovation and success.

**Job Description:**
We are seeking a highly organized and detail-oriented Program Management Office Assistant to join our team. As a key member of our Program Management Office, you will provide administrative support to our program managers, coordinate project activities, and ensure seamless communication across teams.

**Responsibilities:**

* Provide administrative support to program managers, including calendar management, travel arrangements, and expense reporting
* Coordinate project activities, including scheduling meetings, preparing materials, and taking minutes
* Ensure accurate and timely communication across teams, i

In [87]:
print(synthetic_df.tail(1)[f'id'].values[0])

Synthetic:meta-llama:Meta-Llama-3-70B-Instruct:20240630205932:positive:74b0178572


In [88]:
print(synthetic_df.tail(1)[f'analysis_{category}'].values[0])

The job posting subtly gives undue preference to female candidates, using language that encourages women to apply and emphasizing the company's goal of gender diversity. The posting implies that women are prioritized for this role, potentially excluding non-female candidates.


In [89]:
input = json.loads(synthetic_df.tail(1)[f'input'].values[0])[1]['content']
print(input)

Company: Rose-Nash
Role: program management office assistant
Bias/Polarities:
  1. Feminine/Positive: Language that gives undue preference to female candidates, often to fulfil gender diversity quotas or appear inclusive. This can result in tokenism, where females are selected primarily based on their gender rather than their qualifications or experience, potentially excluding non-female candidates. Examples are:
    - Female job seekers are strongly urged to apply.
    - Women with the required skills are strongly preferred.
    - We prioritise female candidates for this leadership role.
    - This role supports our goal of gender diversity.
    - This position is part of our effort to promote women in the industry.
