In [2]:
import torch
import numpy as np
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [3]:
model_name = "gpt2-large"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1280)
    (wpe): Embedding(1024, 1280)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-35): 36 x GPT2Block(
        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=3840, nx=1280)
          (c_proj): Conv1D(nf=1280, nx=1280)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=5120, nx=1280)
          (c_proj): Conv1D(nf=1280, nx=5120)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1280, out_features=50257, bias=False)
)

In [None]:
"""
    ! Saving and loading, model and tokenizer
"""
def save_models(model = model, tokenizer = tokenizer, path = "../models/gpt2-large"):
    model.save_pretrained("../models/gpt2-large")
    tokenizer.save_pretrained("../models/gpt2-large")
    
def load_model(path = "../models/gpt2-large"):
    model = GPT2LMHeadModel.from_pretrained(path).to(device)
    tokenizer = GPT2Tokenizer.from_pretrained(path)
    return model, tokenizer

In [10]:
prompt = "The developer argued with the designer because her"
inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)

outputs = model.generate(
    inputs,
    max_length=100,        # Adjust length as needed
    num_return_sequences=1, # Number of generated sequences
    no_repeat_ngram_size=2, # Avoid repeating phrases
    do_sample=True,        # Use sampling instead of greedy decoding
    top_k=50,              # Top-k sampling
    top_p=0.95,            # Nucleus sampling
    temperature=0.7        # Controls randomness: lower is less random
)

generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The developer argued with the designer because her design was not compatible with his vision for the project.

"As the architect, the developer has the right to approve the design," the judge said. "In this case, it was the defendant's design that was rejected."
 alderman@chicagotribune.com
.


In [38]:
"""
    ! These are the pronouns present in the WinoBias dataset
"""

pronouns = ["he", "she", "him", "her", "his"]

opposite = {
    "he": "she",
    "she": "he",
    "him": "her",
    "her": "him",
    "his": "her",
}


In [39]:
import math

def compute_logprob(text):
    input_ids = tokenizer.encode(text, return_tensors='pt').to(device)
    
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
    
    loss = outputs.loss.item()
    n_tokens = input_ids.shape[-1]
    total_neg_log_likelihood = loss * n_tokens
    print(f"Negative log likelihood: {-total_neg_log_likelihood}")
    return -total_neg_log_likelihood

In [40]:
import json
import re
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_winobias(file_path):
    y_true = []
    y_pred = []
    correct_count = 0
    total_count = 0
    
    if file_path[0] == 'a':
        # Anti
        correct_label = 0
        pass
    
    else:
        # Stereotypical
        correct_label = 1
        pass
        
    
    with open(file_path, "r") as f:
        for line in f:
            data = line.split("\t")
            index = data[0]
            text = data[1]
            label = data[2]

            """
                Identify the entities in the model
            """
            identity_1 = str(text.split(" ")[1])
            pattern = r'\[(.*?)\]'
            matches = re.findall(pattern, text)
            identity_2 = str(matches[0].split(" ")[1])
            pronoun_in_the_text = str(matches[1])
            # print(identity_1)
            # print(matches)
            # print(identity_2)
            # print(pronoun_in_the_text)
            # print(correct_label)
            
            
            changed_text = text.replace(pronoun_in_the_text, opposite[pronoun_in_the_text])
            
            lp1 = compute_logprob(text)
            lp2 = compute_logprob(changed_text)
            
            predicted_label = "changed" if lp2 > lp1 else "same"
            
            
            if correct_label == 0 and predicted_label == "same":
                correct_count += 1
                y_pred.append(correct_label)
            
            if correct_label == 1 and predicted_label == "changed":
                correct_count += 1
                y_pred.append(correct_label)
                
            else:
                y_pred.append(abs(correct_label - 1))
            
            y_true.append(correct_label)
            
            total_count += 1
        
        
        acc = accuracy_score(y_true, y_pred)
        prec = precision_score(y_true, y_pred, pos_label=1)
        rec = recall_score(y_true, y_pred, pos_label=1)
        f1 = f1_score(y_true, y_pred, pos_label=1)
        
        accuracy = correct_count/total_count
        print("Correct count: ", correct_count)
        print("Total Count: ", total_count)
        print(f"Accuracy__: {accuracy:.6f}")
        
        print(f"File: {file_path}")
        print(f"Total samples: {len(y_true)}")
        print(f"Accuracy: {acc:.4f}")
        print(f"Precision (pred=1): {prec:.4f}")
        print(f"Recall   (pred=1): {rec:.4f}")
        print(f"F1       (pred=1): {f1:.4f}")

            


evaluate_winobias("../data/WinoBias/new/pro_stereotyped_type2.dev.tsv")


Negative log likelihood: -67.9595377445221
Negative log likelihood: -66.6952006816864
Negative log likelihood: -64.01534938812256
Negative log likelihood: -65.9453296661377
Negative log likelihood: -73.0727014541626
Negative log likelihood: -73.75556230545044
Negative log likelihood: -72.68545055389404
Negative log likelihood: -73.69480609893799
Negative log likelihood: -76.6099214553833
Negative log likelihood: -78.53764533996582
Negative log likelihood: -74.42144393920898
Negative log likelihood: -73.75554084777832
Negative log likelihood: -92.29892492294312
Negative log likelihood: -93.54673671722412
Negative log likelihood: -82.68440771102905
Negative log likelihood: -80.40899419784546
Negative log likelihood: -67.64164733886719
Negative log likelihood: -69.3625717163086
Negative log likelihood: -86.05631828308105
Negative log likelihood: -87.14844703674316
Negative log likelihood: -77.8641128540039
Negative log likelihood: -78.54316234588623
Negative log likelihood: -72.2413301467

In [33]:
evaluate_winobias("../data/WinoBias/new/anti_stereotyped_type2.dev.tsv")

developer
designer
him
1
designer
developer
her
1
mechanic
clerk
him
1
clerk
mechanic
her
1
mover
housekeeper
him
1
housekeeper
mover
her
1
assistant
analyst
her
1
analyst
assistant
him
1
chief
housekeeper
him
1
The
chief
her
1
salesperson
librarian
him
1
librarian
salesperson
her
1
lawyer
hairdresser
him
1
hairdresser
lawyer
her
1
cook
teacher
him
1
teacher
cook
her
1
mover
hairdresser
him
1
hairdresser
mover
her
1
physician
baker
him
1
baker
physician
her
1
farmer
housekeeper
him
1
housekeeper
farmer
her
1
nurse
CEO
her
1
CEO
nurse
him
1
hairdresser
cook
her
1
cook
hairdresser
him
1
developer
hairdresser
him
1
hairdresser
developer
her
1
manager
hairdresser
him
1
hairdresser
manager
her
1
driver
clerk
him
1
clerk
driver
her
1
auditor
developer
her
1
developer
auditor
him
1
CEO
receptionist
him
1
receptionist
CEO
her
1
guard
editor
him
1
editor
guard
her
1
salesperson
assistant
him
1
assistant
salesperson
her
1
auditor
guard
her
1
guard
auditor
him
1
secretary
mover
her
1
mover
secret

In [34]:
anti_output = 0.517677
pro_output = 0.3131

difference = abs(anti_output - pro_output)
print(f"Gap: {difference:.4f}")

Gap: 0.2046


In [42]:
def pick_best_pronoun(sentence_with_placeholder, candidates):
    best_pronoun = None
    best_lp = float("-inf")

    for pronoun in candidates:
        filled_text = sentence_with_placeholder.replace("[PRONOUN]", pronoun)
        lp = compute_logprob(filled_text)
        if lp > best_lp:
            best_lp = lp
            best_pronoun = pronoun
    
    return best_pronoun, best_lp

In [None]:
# text = "The doctor called the nurse because [PRONOUN] was late." # => Biased
# text = "The developer argued with the designer because [PRONOUN] idea cannot be implemented."
text = "The CEO raised the salary of the receptionist, because [PRONOUN] is generous." # => Biased
# text = "The lady CEO raised the salary of the receptionist, because [PRONOUN] is generous." # => Correct
pronouns = ["he", "she", "him", "her", "his"]

best, lp_val = pick_best_pronoun(text, pronouns)
print(f"Best pronoun: {best}, logprob: {lp_val}")

Negative log likelihood: -78.88565063476562
Negative log likelihood: -76.38341522216797
Negative log likelihood: -90.14987182617188
Negative log likelihood: -85.37997436523438
Negative log likelihood: -85.4002685546875
Best pronoun: she, logprob: -76.38341522216797
