In [1]:
import evaluate
import random
import json
import tqdm as tqdm

In [22]:
#Helper functions
def get_regard_labels(results):
    #Given the list of regard scores, return the label whose score is the maximum 
    labels = []
    for result in results:
        labels.append(max(result, key=lambda x:x['score'])['label'])
    return labels

def get_regard_proportion(labels):
    #Given a list of regard labels, return the proportion of text being classified as positive, negative, neutral, other in the following order
    positive = sum([1 if x == 'positive' else 0 for x in labels])
    negative = sum([1 if x == 'negative' else 0 for x in labels])
    neutral = sum([1 if x == 'neutral' else 0 for x in labels])
    other = sum([1 if x == 'other' else 0 for x in labels])
    n_data = len(labels)

    return positive/n_data, negative/n_data, neutral/n_data, other/n_data

def get_toxic_ratio(tox):
    #Given the list of toxicity score, return the ratio of inputs that is toxic.
    #The input is considered toxic if the score is > 0.5
    return sum([1 if tox[i] > 0.5 else 0 for i in range(len(tox))])/len(tox)


In [3]:
f = open('bold_gender_dexperts_output.json')  

# # returns JSON object as a dictionary
data = json.load(f)

In [34]:
regard = evaluate.load("regard", "compare")

In [4]:
regard.compute(data=["da"])

{'regard': [[{'label': 'neutral', 'score': 0.849643349647522},
   {'label': 'positive', 'score': 0.10980390012264252},
   {'label': 'negative', 'score': 0.024608461186289787},
   {'label': 'other', 'score': 0.015944279730319977}]]}

In [18]:
len(data['American_actresses'])

1156

In [19]:
regard.compute(data = data['American_actors'][:1156], references = data['American_actresses'], aggregation='average')

{'average_data_regard': {'positive': 0.5249906626356677,
  'neutral': 0.35627636421837594,
  'other': 0.05049494531945806,
  'negative': 0.06823802663228445},
 'average_references_regard': {'neutral': 0.3618687958368761,
  'positive': 0.5363768508592254,
  'other': 0.04778950077370389,
  'negative': 0.053964851675156314}}

In [39]:
regard = evaluate.load("regard")
gender_regard_score = {}
for subgroup, text in data.items():
    gender_regard_score[subgroup] = regard.compute(data=text, aggregation='average')
print("gpt2")
for subgroup in gender_regard_score:
    # labels = get_regard_labels(gender_regard_score[subgroup]['regard'])
    # print(f"{subgroup} regard proportions: {get_regard_proportion(labels)}")
    print(f"{subgroup} regard proportions: {gender_regard_score[subgroup]}")

gpt2
American_actors regard proportions: {'average_regard': {'positive': 0.522671369390423, 'neutral': 0.35837908065968804, 'other': 0.05138809814536671, 'negative': 0.06756145145658365}}
American_actresses regard proportions: {'average_regard': {'neutral': 0.3618687958368761, 'positive': 0.5363768508592254, 'other': 0.04778950077370389, 'negative': 0.053964851675156314}}


In [23]:
for subgroup in gender_regard_score:
    labels = get_regard_labels(gender_regard_score[subgroup]['regard'])
    print(f"{subgroup} regard proportions: {get_regard_proportion(labels)}")

American_actors regard proportions: (0.5882352941176471, 0.06228373702422145, 0.3295847750865052, 0.019896193771626297)
American_actresses regard proportions: (0.6029411764705882, 0.04671280276816609, 0.33304498269896193, 0.01730103806228374)


In [30]:
gender_regard_score['American_actors']['regard']

[{'label': 'positive', 'score': 0.8343826532363892},
 {'label': 'neutral', 'score': 0.07981245964765549},
 {'label': 'other', 'score': 0.07164169102907181},
 {'label': 'negative', 'score': 0.014163133688271046}]

In [31]:
from statistics import mean

In [None]:
ref_mean = {k: mean(v) for k, v in ref_regard.items()}

In [37]:
regard.compute(data = data['American_actors'][:10], references = data['American_actresses'][:10],)

{'regard_difference': {'positive': 0.05777929238975055,
  'neutral': -0.06632474549114706,
  'other': 0.005823543528094888,
  'negative': 0.002721911924891173}}