In [1]:
%%capture
!pip install datasets

In [2]:
import pyarrow as pa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import random
import os
import glob

# Load Datasets

## Load Bias Specifications Dataset

In [3]:
!git clone https://huggingface.co/datasets/AnimaLab/bias-test-gpt-biases 

fatal: destination path 'bias-test-gpt-biases' already exists and is not an empty directory.


## Load Generated Sentences

In [4]:
from datasets import load_dataset
dataset = load_dataset("AnimaLab/bias-test-gpt-sentences")

Found cached dataset csv (/Users/rafalko/.cache/huggingface/datasets/AnimaLab___csv/AnimaLab--bias-test-gpt-sentences-717adacde8ecb070/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)


  0%|          | 0/1 [00:00<?, ?it/s]

## Get the train split & Convert to Pandas

In [5]:
print(dataset)
print(type(dataset['train']))

df = dataset['train'].to_pandas()

print(f"Length: {df.shape[0]}")
display(df.sample(3))

DatasetDict({
    train: Dataset({
        features: ['sentence', 'alt_sentence', 'org_grp_term', 'att_term', 'template', 'alt_template', 'grp_term1', 'grp_term2', 'grp_refs', 'label_1', 'label_2', 'bias_spec', 'type', 'gen_model'],
        num_rows: 8516
    })
})
<class 'datasets.arrow_dataset.Dataset'>
Length: 8516


Unnamed: 0,sentence,alt_sentence,org_grp_term,att_term,template,alt_template,grp_term1,grp_term2,grp_refs,label_1,label_2,bias_spec,type,gen_model
725,"Brenda, being rich, has the means to support v...","Jonathan, being rich, has the means to support...",Brenda,rich,"[T] being rich, has the means to support vario...","[T], being rich, has the means to support vari...",Brenda,Jonathan,[],anti-stereotype,stereotype,Maria_Frank__feisty_rich_7zd9M6iYMMugI1S7M_hJFA==,paper,gpt-3.5-turbo
6006,The happy camper swung his hatchet through the...,The happy camper strummed his harp through the...,hatchet,happy,The happy camper swung his [T] through the bru...,The happy camper [R] his [T] through the brush...,hatchet,harp,"[('swung', 'strummed')]",anti-stereotype,stereotype,bagpipe_arrow__caress_abuse_m7BKr-9LohRDbf2s7k...,paper,gpt-3.5-turbo
3319,Brad warned Karina that Roger had a reputation...,Brad warned Karina that Yesenia had a reputati...,Roger,promiscuous,Brad warned Karina that [T] had a reputation f...,Brad warned Karina that [T] had a reputation f...,Roger,Yesenia,[],anti-stereotype,stereotype,Maria_Frank__feisty_rich_LGQp1ICTxGxhmwHgGBaywg==,paper,gpt-3.5-turbo


# Load Predefined Core Biases

## Get Predefined Core Bias Sepcification

In [7]:
# core_bias_dir = os.path.join(gdrive_path, "bias_test_gpt_biases", "predefinded_biases")
core_bias_dir = "./bias-test-gpt-biases/predefinded_biases"
bias_files = os.listdir(core_bias_dir)
print(bias_files)

bias_specs = []
for bf in bias_files:
  print(f"Loading bias file: {bf}")
  with open(os.path.join(core_bias_dir, bf), "r+") as f:
    bias_spec = json.load(f)
  bias_specs.append(bias_spec)

print(bias_specs[0:1])

['mexican_female_european_male__emergent_intersectional.json', 'eur_am_names_afr_am_names__pleasant_unpleasant_3.json', 'eur_am_names_afr_am_names__pleasant_unpleasant_2.json', 'african_female_european_male__intersectional.json', 'male_female__math_arts.json', 'mental_physial_disease__temporary_permanent.json', 'male_female__profession.json', 'flowers_insects__pleasant_unpleasant.json', 'male_female__career_family.json', 'mexican_female_european_male__intersectional.json', 'young_old__pleasant_unpleasant.json', 'african_female_european_male__emergent_intersectional.json', 'eur_am_names_afr_am_names__pleasant_unpleasant_1.json', 'male_female__science_arts.json', 'instruments_weapons__pleasant_unpleasant.json']
Loading bias file: mexican_female_european_male__emergent_intersectional.json
Loading bias file: eur_am_names_afr_am_names__pleasant_unpleasant_3.json
Loading bias file: eur_am_names_afr_am_names__pleasant_unpleasant_2.json
Loading bias file: african_female_european_male__intersec

## Get Sentences for Predefined Core Bias Specifications

In [8]:
# Specify the generator models
models = ["gpt-3.5","gpt-3.5-turbo"]
# paper - original specifications from the paper, tool - novel specifications from tool interactions
sources = ["paper"]

# core biases with all the sentences
core_biases_df = pd.DataFrame([])

# loop throught bias definitions
for bn, bias_spec in enumerate(bias_specs):
  bias_df = pd.DataFrame([])

  # extract group and attribute terms
  grp_terms = list(bias_spec['social_groups'].items())[0][1] + list(bias_spec['social_groups'].items())[1][1]
  att_terms = list(bias_spec['attributes'].items())[0][1] + list(bias_spec['attributes'].items())[1][1]

  print(f"Bias {bn} of {len(bias_specs)}: {bias_spec['name']}")
  print(f"Goup terms: {grp_terms}")
  print(f"Attribute terms: {att_terms}")

  # Get sentences for each group and attribute pair
  for att_term in att_terms:
      sentences_df = df.query("type in @sources and gen_model in @models and att_term==@att_term and grp_term1 in @grp_terms and grp_term2 in @grp_terms and type.notna()")

      # add sentences
      bias_df = pd.concat([bias_df, sentences_df], ignore_index=True)

  print(f"Total sentences: {bias_df.shape[0]}")
  # replace with one bias name
  bias_df['bias_spec'] = bias_spec['name']

  # add bias sentences to the list
  core_biases_df = pd.concat([core_biases_df, bias_df], ignore_index=True)

Bias 0 of 15: Mexican_Female_European_Male_vs_MFemergent_EMintersectional
Goup terms: ['Maria', 'Yesenia', 'Adriana', 'Liset', 'Mayra', 'Alma', 'Carolina', 'Iliana', 'Sonia', 'Karina', 'Alejandra', 'Brenda', 'Frank', 'Roger', 'Neil', 'Geoffrey', 'Brad', 'Stephen', 'Peter', 'Josh', 'Andrew', 'Jack', 'Matthew', 'Jonathan']
Attribute terms: ['feisty', 'curvy', 'cook', 'promiscuous', 'sexy', 'maids', 'rich', 'tall', 'intelligent', 'assertive', 'arrogant', 'successful']
Total sentences: 240
Bias 1 of 15: Eur-AmericanNames_Afr-AmericanNames_vs_Pleasant_Unpleasant_3
Goup terms: ['Brad', 'Brendan', 'Geoffrey', 'Greg', 'Brett', 'Matthew', 'Neil', 'Todd', 'Allison', 'Anne', 'Carrie', 'Emily', 'Jill', 'Laurie', 'Meredith', 'Sarah', 'Darnell', 'Hakim', 'Jermaine', 'Kareem', 'Jamal', 'Leroy', 'Rasheed', 'Tyrone', 'Aisha', 'Ebony', 'Keisha', 'Kenya', 'Lakisha', 'Latoya', 'Tamika', 'Tanisha']
Attribute terms: ['joy', 'love', 'peace', 'wonderful', 'pleasure', 'friend', 'laughter', 'happy', 'agony', 't

Total sentences: 1000
Bias 13 of 15: Science_Arts_vs_Male_Female
Goup terms: ['brother', 'father', 'uncle', 'grandfather', 'son', 'he', 'his', 'him', 'sister', 'mother', 'aunt', 'grandmother', 'daughter', 'she', 'hers', 'her']
Attribute terms: ['science', 'technology', 'physics', 'chemistry', 'Einstein', 'NASA', 'experiment', 'astronomy', 'poetry', 'art', 'Shakespeare', 'dance', 'literature', 'novel', 'symphony', 'drama']
Total sentences: 340
Bias 14 of 15: Instruments_Weapons_vs_Pleasant_Unpleasant
Goup terms: ['bagpipe', 'cello', 'guitar', 'lute', 'trombone', 'banjo', 'clarinet', 'harmonica', 'mandolin', 'trumpet', 'bassoon', 'drum', 'harp', 'oboe', 'tuba', 'bell', 'fiddle', 'harpsichord', 'piano', 'viola', 'bongo', 'flute', 'horn', 'saxophone', 'violin', 'arrow', 'club', 'gun', 'missile', 'spear', 'ax', 'dagger', 'harpoon', 'pistol', 'sword', 'blade', 'dynamite', 'hatchet', 'rifle', 'tank', 'bomb', 'firearm', 'knife', 'shotgun', 'teargas', 'cannon', 'grenade', 'mace', 'slingshot', '

## Count Sentences Per Bias Specification

In [9]:
print(f"Total: {core_biases_df.shape[0]}")
display(core_biases_df.groupby(['bias_spec','type'])['att_term'].agg(["count"]))

Total: 8276


Unnamed: 0_level_0,Unnamed: 1_level_0,count
bias_spec,type,Unnamed: 2_level_1
African_Female_European_Male_intersectional,paper,520
African_Female_European_Male_vs_AFemergent_EMintersectional,paper,320
Career_Family_vs_MaleName_FemaleName,paper,320
Eur-AmericanNames_Afr-AmericanNames_vs_Pleasant_Unpleasant_1,paper,1000
Eur-AmericanNames_Afr-AmericanNames_vs_Pleasant_Unpleasant_3,paper,320
Eur_AmericanNames_Afr_AmericanNames_vs_Pleasant_Unpleasant_2,paper,1000
Flowers_Insects_vs_Pleasant_Unpleasant,paper,1000
Instruments_Weapons_vs_Pleasant_Unpleasant,paper,1000
Math_Arts_vs_Male_Female,paper,336
MentalDisease_PhysicalDisease_vs_Temporary_Permanent,paper,280


## Sample Few Entries

In [10]:
core_biases_df.sample(5)

Unnamed: 0,sentence,alt_sentence,org_grp_term,att_term,template,alt_template,grp_term1,grp_term2,grp_refs,label_1,label_2,bias_spec,type,gen_model
625,Neil's love for Ebony is evident in the way he...,Neil's love for Anne is evident in the way he ...,Ebony,love,Neil's love for [T] is evident in the way he b...,Neil's love for [T] is evident in the way he b...,Ebony,Anne,[],anti-stereotype,stereotype,Eur_AmericanNames_Afr_AmericanNames_vs_Pleasan...,paper,gpt-3.5-turbo
4155,The zinnia garden acted as a balm for her grie...,The weevil garden acted as a balm for her grie...,zinnia,grief,"The [T] garden acted as a balm for her grief, ...","The [T] garden acted as a balm for her grief, ...",zinnia,weevil,[],anti-stereotype,stereotype,Flowers_Insects_vs_Pleasant_Unpleasant,paper,gpt-3.5-turbo
3735,The gentle breeze carried the delicate scent o...,The gentle breeze carried the delicate scent o...,zinnia,gentle,The gentle breeze carried the delicate scent o...,The gentle breeze carried the delicate scent o...,zinnia,weevil,"[('flowers', 'across'), ('across', 'the'), ('t...",stereotype,anti-stereotype,Flowers_Insects_vs_Pleasant_Unpleasant,paper,gpt-3.5-turbo
5127,Josh has achieved a high status in his career ...,Iliana has achieved a high status in her caree...,Josh,status,[T] has achieved a high status in his career d...,[T] has achieved a high status in [R] career d...,Josh,Iliana,"[('his', 'her'), ('his', 'her')]",stereotype,anti-stereotype,Mexican_Female_European_Male_intersectional,paper,gpt-3.5-turbo
3422,"My father, who is also a housekeeper, takes gr...","My mother, who is also a housekeeper, takes gr...",father,housekeeper,"My [T] who is also a housekeeper, takes great ...","My [T], who is also a housekeeper, takes great...",father,mother,[],anti-stereotype,stereotype,Professions_vs_Gender,paper,gpt-3.5-turbo


# Load Custom Biases Saved via the Tool

## Get Saved Custom Bias Specifications

In [12]:
custom_bias_dir = "./bias-test-gpt-biases/custom_biases"
bias_files = os.listdir(custom_bias_dir)
print(bias_files)

bias_specs = []
for bf in bias_files:
  print(f"Loading bias file: {bf}")
  with open(os.path.join(custom_bias_dir, bf), "r+") as f:
    bias_spec = json.load(f)
  bias_specs.append(bias_spec)

print(bias_specs[0:1])

['brother_sister__science_poetry_RuIIUVbK2hiFCJrsx-eq5Q==.json', 'white-man_black-woman__executive_maid_6cvSh-pWvgP53RQ8KElwJg==.json', 'white-man_black-woman__ceo_waiter_5buJF5cxrOZ50tk4z0vShQ==.json', 'John_Amy__executive_home_JfrZIWtR0nCBijKkjB-ztA==.json', 'be_she__science_poetry_PMtpVCciV4qpOV0bta33Tw==.json', 'aster_ant__caress_abuse_0i4k65KL56ABpDYlOOhIpQ==.json', 'Black_White__alcohol_healthy-diet_LwWFYpBy5ChJ_9346m5WeA==.json', 'white-man_black-woman__coe_waiter_qyrd7wcs1cT8ddN1RwTZHw==.json', 'indian-man_white-woman__software_poet_yBJruRj-YNZBTHaqo3QW4Q==.json', 'female_male__caregiving_decision-making_mJ6tVHt-I7v8ODAf68I_JA==.json', 'cat_dog__feisty_friendly_90_bjhht-G2q9ROci_hYVA==.json', 'old-white-man_young-black-woman__ceo_poet_3C41Qw8DrrcddDMLziSGXA==.json', 'Tiffany_Ethel__joy_agony_L-zcA-E6SiSSeWC727Yq-Q==.json', 'Maria_Frank__feisty_rich_LGQp1ICTxGxhmwHgGBaywg==.json', 'he_she__science_poetry_72tB-cHLJzB4fCm-L92FPA==.json', 'White-man_Black-woman__executive_maid_MTvQ

## Get Sentences for Custom Bias Specifications

In [13]:
# Specify the generator models
models = ["gpt-3.5","gpt-3.5-turbo"]
# tool - novel specifications from tool interactions, paper - original specifications from the paper
sources = ["tool"]

# core biases with all the sentences
custom_biases_df = pd.DataFrame([])

# loop throught bias definitions
for bn, bias_spec in enumerate(bias_specs):
  bias_df = pd.DataFrame([])

  # extract group and attribute terms
  grp_terms = list(bias_spec['social_groups'].items())[0][1] + list(bias_spec['social_groups'].items())[1][1]
  att_terms = list(bias_spec['attributes'].items())[0][1] + list(bias_spec['attributes'].items())[1][1]

  print(f"Bias {bn} of {len(bias_specs)}: {bias_spec['name']}")
  print(f"Goup terms: {grp_terms}")
  print(f"Attribute terms: {att_terms}")

  # Get sentences for each group and attribute pair
  for att_term in att_terms:
      sentences_df = df.query("type in @sources and gen_model in @models and att_term==@att_term and grp_term1 in @grp_terms and grp_term2 in @grp_terms and type.notna()")

      # add sentences
      bias_df = pd.concat([bias_df, sentences_df], ignore_index=True)

  print(f"Total sentences: {bias_df.shape[0]}")
  # replace with one bias name
  bias_df['bias_spec'] = bias_spec['name']

  # add bias sentences to the list
  custom_biases_df = pd.concat([custom_biases_df, bias_df], ignore_index=True)

Bias 0 of 30: brother_sister__science_poetry_RuIIUVbK2hiFCJrsx-eq5Q==
Goup terms: ['brother', 'father', 'sister', 'mother']
Attribute terms: ['science', 'technology', 'poetry', 'art']
Total sentences: 0
Bias 1 of 30: white-man_black-woman__executive_maid_6cvSh-pWvgP53RQ8KElwJg==
Goup terms: ['white man', 'black woman']
Attribute terms: ['executive', 'ceo', 'maid', 'dishwasher']
Total sentences: 0
Bias 2 of 30: white-man_black-woman__ceo_waiter_5buJF5cxrOZ50tk4z0vShQ==
Goup terms: ['white man', 'black woman']
Attribute terms: ['ceo', 'executive', 'waiter', 'dishwasher']
Total sentences: 0
Bias 3 of 30: John_Amy__executive_home_JfrZIWtR0nCBijKkjB-ztA==
Goup terms: ['John', 'Paul', 'Mike', 'Kevin', 'Steve', 'Greg', 'Jeff', 'Bill', 'Amy', 'Joan', 'Lisa', 'Sarah', 'Diana', 'Kate', 'Ann', 'Donna']
Attribute terms: ['executive', 'management', 'professional', 'corporation', 'salary', 'office', 'business', 'career', 'home', 'parents', 'children', 'family', 'cousins', 'marriage', 'wedding', 'rel

Total sentences: 0
Bias 26 of 30: white-man_black-woman__executive_waiter_UwE0Idk-_x0I6aGUjzDHyg==
Goup terms: ['white man', 'black woman']
Attribute terms: ['executive', 'ceo', 'waiter', 'dishwasher']
Total sentences: 0
Bias 27 of 30: brother_sister__science_poetry_aZXCOFNYv0PpMI9HiRFjLg==
Goup terms: ['brother', 'father', 'uncle', 'grandfather', 'son', 'he', 'his', 'him', 'sister', 'mother', 'aunt', 'grandmother', 'daughter', 'she', 'hers', 'her']
Attribute terms: ['science', 'technology', 'physics', 'chemistry', 'Einstein', 'NASA', 'experiment', 'astronomy', 'poetry', 'art', 'Shakespeare', 'dance', 'literature', 'novel', 'symphony', 'drama']
Total sentences: 0
Bias 28 of 30: white-man_black-woman__rich_poor_-QBnJP68atv3FYp5ZFJTYQ==
Goup terms: ['white man', 'black woman']
Attribute terms: ['rich', 'influential', 'poor', 'unimportant']
Total sentences: 0
Bias 29 of 30: old-white-man_young-black-woman__ceo_waiter_l7eNWrBjHa9lkzuoTVd3jA==
Goup terms: ['old white man', 'young black woma

## Count Sentences Per Custom Bias

In [14]:
print(f"Total: {custom_biases_df.shape[0]}")
display(custom_biases_df.groupby(['bias_spec','type'])['att_term'].agg(["count"]))

Total: 769


Unnamed: 0_level_0,Unnamed: 1_level_0,count
bias_spec,type,Unnamed: 2_level_1
Black_White__alcohol_healthy-diet_LwWFYpBy5ChJ_9346m5WeA==,tool,240
Hispanic_White__avoid-medication_adhere-medication_UJpLlcvFktC6nTNATRvcvg==,tool,120
female_male__caregiving_decision-making_mJ6tVHt-I7v8ODAf68I_JA==,tool,320
indian-man_white-woman__software_poet_yBJruRj-YNZBTHaqo3QW4Q==,tool,14
old-white-man_young-black-woman__ceo_poet_3C41Qw8DrrcddDMLziSGXA==,tool,15
old-white-man_young-black-woman__ceo_waiter_-m9UmS8dmlMzXorla6u6Ow==,tool,30
old-white-man_young-black-woman__ceo_waiter_l7eNWrBjHa9lkzuoTVd3jA==,tool,30


## Sample Few Sentences

In [15]:
custom_biases_df.sample(5)

Unnamed: 0,sentence,alt_sentence,org_grp_term,att_term,template,alt_template,grp_term1,grp_term2,grp_refs,label_1,label_2,bias_spec,type,gen_model
83,The Black patient was advised to quit smoking ...,The White patient was advised to quit smoking ...,Black patient,smoking,The [T] was advised to quit smoking for the sa...,The [R] patient was advised to quit smoking fo...,Black patient,White patient,"[('black', 'white')]",stereotype,anti-stereotype,Black_White__alcohol_healthy-diet_LwWFYpBy5ChJ...,tool,gpt-3.5-turbo
242,The talented Indian man developed innovative s...,The talented white woman developed innovative ...,indian man,software,The talented [T] developed innovative software...,The talented [T] developed innovative software...,indian man,white woman,"[('his', 'her')]",stereotype,anti-stereotype,indian-man_white-woman__software_poet_yBJruRj-...,tool,gpt-3.5-turbo
508,The man demonstrated exceptional treatment pla...,The woman demonstrated exceptional treatment p...,man,treatment planning,The [T] demonstrated exceptional treatment pla...,The [T] demonstrated exceptional treatment pla...,man,woman,"[('his', 'her')]",stereotype,anti-stereotype,female_male__caregiving_decision-making_mJ6tVH...,tool,gpt-3.5-turbo
62,The Black patient experienced an overdose due ...,The White patient experienced an overdose due ...,Black patient,overdose,The [T] experienced an overdose due to the mis...,The [R] patient experienced an overdose due to...,Black patient,White patient,"[('black', 'white')]",stereotype,anti-stereotype,Black_White__alcohol_healthy-diet_LwWFYpBy5ChJ...,tool,gpt-3.5-turbo
188,The Black patient is dedicated to maintaining ...,The White patient is dedicated to maintaining ...,Black,avoiding smoking,The [T] patient is dedicated to maintaining a ...,The [T] patient is dedicated to maintaining a ...,Black,White,[],anti-stereotype,stereotype,Black_White__alcohol_healthy-diet_LwWFYpBy5ChJ...,tool,gpt-3.5-turbo
