<a href="https://colab.research.google.com/github/barryhpr/SyntheticDebiasing/blob/main/Adapter_GPT2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers==4.26.1
!pip install -U adapter-transformers
!pip install datasets
!git clone https://github.com/barryhpr/SyntheticDebiasing.git
%cd /content/SyntheticDebiasing

Collecting transformers==4.26.1
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.26.1)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m106.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.15.0
    Uninstalling tokenizers-0.15.0:
      Successfully uninstalled tokenizers-0.15.0
  Attempting uninstall: transformers
    Found existing installation: transformers 4.35.2
    Uninstalling transformers-4.35.2:
      Successfully uninstalled transformers-4.35.2
Successfully installed tokenizers-0.13.3 transformers-4.26.1
Collecting adapter-transforme

# Original Score for GPT2

In [None]:
from Score_evaluator import *
from Score_evaluator1_1 import *
from Evaluate_StereoSet import *
from Evaluate_CrowSPairs import *
from transformers import GPT2LMHeadModel, GPT2Tokenizer

model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)


#Input file: "test.json" to evaluate all categories. For a specific category, use for example: "test_gender.json".
srunner = StereoSetRunner(
    intrasentence_model = model,
    tokenizer = tokenizer,
    # model_name = "gpt2",
    input_file="test.json",
    is_generative=True,
    is_self_debias=False
    )

metric_data = srunner() # a nested dict

import json
with open('stereoset_result.json', 'w') as outfile:
    json.dump(metric_data, outfile, indent=4)

parse_file("test.json", "stereoset_result.json")
# parse_file1("test.json","stereoset_result.json", "gender")

# Train GPT2

In [None]:
import json
import shutil
import os
import random
import torch
from torch.utils.data import Dataset
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TrainingArguments, Trainer
from Evaluate_CrowSPairs import CrowSPairsRunner

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

class MyDataset(Dataset):
    def __init__(self, filename, tokenizer):
        with open(filename, 'r') as file:
            self.data = json.load(file)
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        text = f"{item['prompt'].replace('{}', item['subject'])} {item['target_new']['str']}"
        encoding = self.tokenizer(text, return_tensors='pt')
        labels = encoding['input_ids'].clone()
        labels[:, :-1] = -100
        return {'input_ids': encoding['input_ids'].squeeze(), 'attention_mask': encoding['attention_mask'].squeeze(), 'labels': labels.squeeze()}

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
#Datasets are like "Synthetic_Religion_3.json", remember don't use S. So for exmaple, don't use "Synthetic_Religion_3S"
train_dataset = MyDataset('Synthetic_Gender_2.json', tokenizer)
test_dataset = MyDataset('test.json', tokenizer)

if os.path.exists('./results'):
    shutil.rmtree('./results')
os.makedirs('./results')


# Training session
training_epoch = 5
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.add_adapter("debias")
model.train_adapter("debias")

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=training_epoch,
    per_device_train_batch_size=1,
    logging_steps=10,
    seed=42,
    learning_rate=5e-6
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

trainer.train()


# Evaluate GPT2 Model on StereoSet & CrowSPairs

In [None]:
# Evaluation session
from Score_evaluator import *
from Score_evaluator1_1 import *
from Evaluate_StereoSet import *
from Evaluate_CrowSPairs import *
evaluation_list = [1,2,3]
for eval_epoch in evaluation_list:

    print(f"Evaluate epoch{eval_epoch}")
    #this should be 500 for normal training
    checkpoint_path = f"/content/SyntheticDebiasing/results/checkpoint-{eval_epoch*1000}"
    eval_model = GPT2LMHeadModel.from_pretrained(checkpoint_path)
    eval_model.set_active_adapters("debias")

    ##########################################################

# please import input_file eg. "test-religion.json" for evaluating religion bias

    srunner = StereoSetRunner(
        intrasentence_model = eval_model,
        tokenizer = tokenizer,
        input_file="test-race.json",
        is_generative=True,
        is_self_debias=False
        )

    metric_data = srunner() # a nested dict

    import json
    with open('stereoset_result.json', 'w') as outfile:
        json.dump(metric_data, outfile, indent=4)  # The `indent=4` makes the JSON output more readable

    parse_file1("test.json",
		'stereoset_result.json', "race")


    #########################################################
    runner = CrowSPairsRunner(
        model=eval_model,
        tokenizer=tokenizer,
        input_file="crows_pairs_anonymized.csv",
        bias_type="race",
        is_generative=True
    )

    results = runner()
    print(f"Metric Score: {results}")
    print(" ")

# Evaluate GPT2 Model on BiasTestGPT

In [None]:
import json
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

## Applicable datasets names ###

# Formatted_Profession_vs_Gender
# Formatted_Math_Arts_vs_Male_Female
# Formatted_Mexican_Female_European_Male_vs_MFemergent_EMintersectional
# Formatted_YoungName_OldName_vs_Pleasant_Unpleasant

# Load the data from the file (you can change datasets names here)
with open('Formatted_Profession_vs_Gender.json', 'r') as file:
    data = json.load(file)

def get_sentence_probability(model, sentence):
    """Get probability of a sentence using the GPT-2 model."""
    input_ids = tokenizer.encode(sentence, return_tensors="pt")
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
    log_likelihood = outputs[0].item()
    return log_likelihood

evaluation_list = [1,2,3,5]

for eval_epoch in evaluation_list:
    print(f"Evaluate epoch {eval_epoch}")

    # This should be 500 for normal training
    checkpoint_path = f"/content/SyntheticDebiasing/results/checkpoint-{eval_epoch*500}"
    eval_model = GPT2LMHeadModel.from_pretrained(checkpoint_path)
    eval_model.set_active_adapters("debias")

    # Initialize counters
    stereotype_count = 0
    total_count = len(data)

    # Iterate over each entry
    for entry in data:
        sentence_prob = get_sentence_probability(eval_model, entry["sentence"])
        alt_sentence_prob = get_sentence_probability(eval_model, entry["alt_sentence"])

        if entry["label_1"] == "stereotype":
            if sentence_prob < alt_sentence_prob:
                stereotype_count += 1
        elif entry["label_2"] == "stereotype":
            if alt_sentence_prob < sentence_prob:
                stereotype_count += 1

    # Calculate the stereotype score as a percentage
    stereotype_score = (stereotype_count / total_count) * 100
    print(f"Stereotype Score for epoch {eval_epoch}: {stereotype_score:.2f}%")

