In [None]:
import pickle
from embedding import BertHuggingface
import math
from geometrical_bias import SAME, DirectBias, WEAT, RIPA, MAC, GeneralizedWEAT
import numpy as np
from lipstick import BiasGroupTest, NeighborTest, ClusterTest, ClassificationTest

## Usage example

This is a minimialistic example on how to use the implemented bias scores. This includes reporting individual words' biases, 
biases for one set of neutral words (SAME, MAC, Direct Bias, RIPA) or several sets of neutral words representing different stereotypes (WEAT, generalized WEAT).

In [None]:
jobs = ['nurse', 'doctor', 'teacher', 'police officer', 'firefighter', 'secretary', 'programmer', 'engineer', 'caretaker', 'salesclerk']
jobs_m = ['doctor', 'police officer', 'firefighter', 'programmer', 'engineer', 'surgeon', 'rapper', 'businessman', 'pastor']
jobs_f = ['nurse', 'teacher', 'secetrary', 'caretaker', 'salesclerk', 'model', 'paralegal', 'dietitian', 'teacher']

jobs_black = ['taxi driver', 'basketball player']
jobs_white = ['police officer', 'lawyer']
jobs_asian = ['programmer', 'mathematician']

gender_attributes = [['he', 'man', 'his', 'boy', 'son', 'himself', 'father'], ['she', 'woman', 'her', 'girl', 'daughter', 'herself', 'mother']]
race_attributes = [['black', 'african'], ['white', 'caucasian'], ['asian', 'chinese']]

In [None]:
bert = BertHuggingface(2)

job_emb = bert.embed(jobs)
job_m_emb = bert.embed(jobs_m)
job_f_emb = bert.embed(jobs_f)
jobs_black_emb = bert.embed(jobs_black)
jobs_white_emb = bert.embed(jobs_white)
jobs_asian_emb = bert.embed(jobs_asian)
gender_attr = [bert.embed(attr) for attr in gender_attributes]
race_attr = [bert.embed(attr) for attr in race_attributes]

## Defining the bias space

Each geometrical bias score implements the define_bias_space that takes an attribute set. The number of supported attribute groups depends on the score.
For the Direct Bias and RIPA one can set the number of bias dimensions k and parameter c that determines the strictness of bias measurements (see the paper/ implementation).

In [None]:
gweat = GeneralizedWEAT()
gweat.define_bias_space(gender_attr)

gweat2 = GeneralizedWEAT()
gweat2.define_bias_space(race_attr)

In [None]:
mac = MAC()
mac.define_bias_space(gender_attr)

In [None]:
weat = WEAT()
weat.define_bias_space(gender_attr)

In [None]:
same = SAME()
same.define_bias_space(gender_attr)

In [None]:
db1 = DirectBias(k=1,c=1)
db1.define_bias_space(gender_attr)

In [None]:
db7 = DirectBias(k=7,c=1)
db7.define_bias_space(gender_attr)

In [None]:
ripa1 = RIPA(k=1,c=1)
ripa1.define_bias_space(gender_attr)

In [None]:
ripa7 = RIPA(k=7,c=1)
ripa7.define_bias_space(gender_attr)

## Individual word biases

In [None]:
score_names = ['mac', 'db1', 'db7', 'ripa1', 'ripa7', 'same', 'weat']
scores = [mac, db1, db7, ripa1, ripa7, same, weat]

for i in range(len(scores)):
    print(score_names[i], ": ", [scores[i].individual_bias(emb) for emb in job_emb])


## Group biases

Most bias scores implement a mean bias over one set of words, WEAT requires several groups of words, matching the number of attribute groups. For WEAT only 2 groups are supported, the generalized WEAT can handle an arbitrary number.

In [None]:
# most scores implement a mean bias
for i in range(len(scores)-1):
    print(score_names[i], ": ", scores[i].mean_individual_bias(job_emb))
    
# weat implements an effect size over two groups stereotypically associated with the gender attribute groups
print("weat: ", weat.group_bias([job_m_emb, job_f_emb]))
print("gweat (gender): ", gweat.group_bias([job_m_emb, job_f_emb]))
print("gweat (race): ", gweat2.group_bias([jobs_black_emb, jobs_white_emb, jobs_asian_emb]))

## SAME

SAME implements additional functions to measure skew and stereotype and for multiclass bias (n>2) it can return the pairwise signed biases used to obtain the overall bias.

Skew and Stereotype are only implemented pairwise, so the user has to specify which attributes to use (according to the order of attribute groups).

In [None]:
same.define_bias_space(race_attr)
print("Black vs. White")
print("Skew: ", same.skew_pairwise(job_emb, 0, 1))
print("Stereotype: ", same.stereotype_pairwise(job_emb, 0, 1))
print()

print("Asian vs. White")
print("Skew: ", same.skew_pairwise(job_emb, 2, 1))
print("Stereotype: ", same.stereotype_pairwise(job_emb, 2, 1))
print()

same.define_bias_space(race_attr)
print("Multiclass bias vector for 'nurse': ", same.individual_bias_per_pair(job_emb[0])) # first is black/white, second black/asian
print("bias magntiude for 'nurse': ", same.individual_bias(job_emb[0]))

## Cluster, neighbor and classification test

In [None]:
neighborTest = NeighborTest(k=5)

# this is how the neighbor test is used in the paper:
# TODO: call weat on jobs, sort by bias into m/f groups
weats = [weat.individual_bias(emb) for emb in job_emb]
sort_idx = np.argsort(weats)
jobs_f_weat = [job_emb[idx] for idx in sort_idx[:5]]
jobs_m_weat = [job_emb[idx] for idx in sort_idx[-5:]]
print("bias by neighbor (as in the paper):")
print(neighborTest.bias_by_neighbor([jobs_f_weat, jobs_m_weat]))

# instead of using weat we can define stereotypical groups by hand
jobs_gender = [job_m_emb, job_f_emb]
print("bias by neighbor (without weat):")
print(neighborTest.bias_by_neighbor(jobs_gender))

# define the bias space with a subset of known stereotypical words, then test words without known categories
neighborTest.define_bias_space(gender_attr)
print("bias by neighbor (without predefined groups): ")
biases = [neighborTest.individual_bias(emb) for emb in job_emb]
print(biases)

print("mean bias by neighbor (without predefined groups): ")
print(neighborTest.mean_individual_bias(job_emb))

In [None]:
clusterTest = ClusterTest()

# according to the paper
weats = [weat.individual_bias(emb) for emb in job_emb]
sort_idx = np.argsort(weats)
jobs_f_weat = [job_emb[idx] for idx in sort_idx[:5]]
jobs_m_weat = [job_emb[idx] for idx in sort_idx[-5:]]
print("cluster test accuracy (weat): ")
print(clusterTest.cluster_test([jobs_f_weat, jobs_m_weat]))

# instead of using weat we can define stereotypical groups by hand
jobs_gender = [job_m_emb, job_f_emb]
print("cluster test accuracy (predefined groups): ")
print(clusterTest.cluster_test(jobs_gender))

# define the bias space with a subset of known stereotypical words, then test words without known categories
#clusterTest.define_bias_space(gender_attr)
#clusterTest.mean_individual_bias(job_emb)


In [None]:
clfTest = ClassificationTest()
cv_scores = clfTest.classification_test(jobs_gender)
print(np.mean(cv_scores), np.std(cv_scores))