In [5]:
import torch
from transformers import AutoModelForMaskedLM, AutoTokenizer

In [7]:
import numpy as np
import pandas as pd

In [None]:
from datasets import load_dataset

In [6]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForMaskedLM.from_pretrained("distilbert-base-uncased")
model.eval()

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

DistilBertForMaskedLM(
  (activation): GELUActivation()
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inp

In [8]:
df = pd.read_csv("data/masked_s_gold_BUG.csv")

In [20]:
df["text"].head(2).values

array(['Among them was the president [MASK] .',
       'Results In the pre - COVID era , an average aesthetic surgeon was finely balancing [MASK] profession , personal lifestyle , learning , and recreation .'],
      dtype=object)

In [45]:
inputs = tokenizer(df["text"].head(1).values[0], return_tensors="pt")
token_logits = model(**inputs).logits
# Find the location of [MASK] and extract its logits
print(tokenizer.mask_token_id)
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
token_logits.shape
mask_token_logits = token_logits[0, mask_token_index, :]
# Pick the [MASK] candidates with the highest logits
top_2_tokens = torch.topk(mask_token_logits, 2, dim=1).indices[0].tolist()

for token in top_2_tokens:
    print(token)
    print(tokenizer.decode([token]))

103
2370
himself
14810
marcos


In [30]:
inputs

{'input_ids': tensor([[ 101, 2426, 2068, 2001, 1996, 2343,  103, 1012,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [25]:
inputs = tokenizer(df["text"].tail(1).values[0], return_tensors="pt")
token_logits = model(**inputs).logits
# Find the location of [MASK] and extract its logits
print(tokenizer.mask_token_id)

103


In [39]:
tokenizer("his", return_tensors="pt")["input_ids"][0][1]

tensor(2010)

In [47]:
print(tokenizer.convert_ids_to_tokens([2370]))

['himself']


In [48]:
print(tokenizer.convert_tokens_to_ids("[UNK]"))

100


In [51]:
df = pd.read_csv("data/s_gold_BUG.csv")
df[df["uid"] == 542]

Unnamed: 0.1,Unnamed: 0,index,uid,sentence_text,tokens,profession,g,profession_first_index,g_first_index,predicted gender,stereotype,distance,num_of_pronouns,corpus,data_index
164,164,541,542,His outstanding achievement of the use of flux...,"['His', 'outstanding', 'achievement', 'of', 't...",physician,His,31,0,Male,1,29,1,pubmed,17


In [52]:
import re

In [54]:
pronoun = "his"
re.sub(rf"\b({pronoun})\b", "[MASK]", "his dog")

'[MASK] dog'

In [59]:
df_result = pd.read_csv("data/vanilla_bert_prediction.csv", index_col=0)
df_result.head(5)

Unnamed: 0,sentences,stereo_prob,anti-stereo_prob
0,among them was the president [MASK] .,0.981314,0.018686
1,"results in the pre-covid era , an average aest...",0.72957,0.27043
2,peter ( b23 ) is a musician and music editor w...,0.952918,0.047082
3,we use genetic data from this child and [MASK]...,0.487575,0.512425
4,these two might not have any social contact in...,0.444676,0.555324


In [69]:
stereotyped = df_result[df_result["stereo_prob"] > df_result["anti-stereo_prob"]]["sentences"].count()

In [70]:
print("steretyped predictions", stereotyped)
print("total predictions: ", df_result.shape[0])

steretyped predictions 501
total predictions:  547
