Set Up

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch

# Confirm that the GPU is detected
assert torch.cuda.is_available()

# Get the GPU device name.
device_name = torch.cuda.get_device_name()
n_gpu = torch.cuda.device_count()
print(f"Found device: {device_name}, n_gpu: {n_gpu}")

In [None]:
from model_wrappers.gpt2_wrapper import GPT2Wrapper
wrapper = GPT2Wrapper(model_name = "gpt2-medium", use_cuda = True)

# Confirm Conditional Probabilities on GPU

In [None]:
prompt = (f"What color is the sky? Answer: blue\n" f"What color is grass? Answer:")
choices =("red", "blue", "green")

scores = wrapper.cond_log_prob(inputs=prompt, targets=choices)

print("\n")
print(f"prompt:\n{prompt}")
print(f"scores:")
for c, s in zip(choices, scores):
    print(f"  {c:>8}: {s:0.2f}")

# Gender Sensitivity

In [None]:
from bias_evaluation.gender_sensitivity_english.task import gender_sensitivity_test_English

In [None]:
task = gender_sensitivity_test_English(verbose = True)

In [None]:
result = task.evaluate_model(wrapper)

In [None]:
for score in task.score_data:
  print(score.preferred_score)
  print(score.score_dict.keys())

print(task.score_data[0].score_dict['gender_bias_score'])
print(task.score_data[0].score_dict['gender_bias'])

print(task.score_data[0].score_dict['gender_minority_bias_score'])
print(task.score_data[0].score_dict['gender_bias'])

In [None]:
import pickle
file = open("gpt2-gender-sensitivity-config-1", 'wb')

pickle.dump(task.score_data, file)

In [None]:
test_file = open("gpt2-gender-sensitivity-config-1", "rb")
test = pickle.load(test_file)
test

# UnQover

In [13]:
from bias_evaluation.unqover.task import UnQover
unqover_task = UnQover()
results = unqover_task.evaluate_model(wrapper, max_examples=100)

Running task gender


  logits = torch.tensor(logits, dtype=torch.float32)


Running task nationality
Running task ethnicity
Running task religion


In [14]:
results

[ScoreData(score_dict={'fairness': 0.9968668694042665, 'consistency': 0.020589326423762246, 'average_answer_probability': 0.5000000000500001, 'positional_error': 0.856510187045549, 'attributive_error': 0.8565101870455492, 'bias_intensity': 0.0031331305957334604, 'count-based_bias': 1.0}, preferred_score='fairness', number_of_shots=-1, low_score=0, high_score=1, subtask_description='Measurements of biases for the gender data'),
 ScoreData(score_dict={'fairness': 0.9652515195998621, 'consistency': 0.40638113647020224, 'average_answer_probability': 0.5000000000499999, 'positional_error': 0.07812599433539111, 'attributive_error': 0.5591793087423147, 'bias_intensity': 0.03474848040013795, 'count-based_bias': 1.0}, preferred_score='fairness', number_of_shots=-1, low_score=0, high_score=1, subtask_description='Measurements of biases for the nationality data'),
 ScoreData(score_dict={'fairness': 0.9398463943796996, 'consistency': 0.4109940973480587, 'average_answer_probability': 0.500000000050

# BBQ

In [24]:
from bias_evaluation.bbq_lite.task import BBQLite
bbq_lite = BBQLite()
results = bbq_lite.evaluate_model(model=wrapper, max_examples=1000)

  logits = torch.tensor(logits, dtype=torch.float32)


In [25]:
results

[ScoreData(score_dict={'accuracy': 0.21649484536082475}, preferred_score='accuracy', number_of_shots=0, low_score=0.0, high_score=1.0, subtask_description='Age score'),
 ScoreData(score_dict={'accuracy': 0.2054794520547945}, preferred_score='accuracy', number_of_shots=0, low_score=0.0, high_score=1.0, subtask_description='Disability_status score'),
 ScoreData(score_dict={'accuracy': 0.2887700534759358}, preferred_score='accuracy', number_of_shots=0, low_score=0.0, high_score=1.0, subtask_description='Gender_identity score'),
 ScoreData(score_dict={'accuracy': 0.2627118644067797}, preferred_score='accuracy', number_of_shots=0, low_score=0.0, high_score=1.0, subtask_description='Nationality score'),
 ScoreData(score_dict={'accuracy': 0.28125}, preferred_score='accuracy', number_of_shots=0, low_score=0.0, high_score=1.0, subtask_description='Physical_appearance score'),
 ScoreData(score_dict={'accuracy': 0.21568627450980393}, preferred_score='accuracy', number_of_shots=0, low_score=0.0, h

# Diverse Social Bias

In [27]:
from bias_evaluation.diverse_social_bias.task import FairnessTask
fairness_task = FairnessTask()
results = fairness_task.evaluate_model(model=wrapper, max_examples=1000)

ValueError: ignored