In [None]:
!pip install -r requirements.txt

---

In [6]:
import numpy as np
from tqdm import tqdm
import transformers
from transformers import (
    BertTokenizer,
    BertForMaskedLM
)
import pandas as pd
import torch
from torch.nn import functional as F

### Ideas

- Try a BERTLM from transformer
- Can you separate the encoder and decoder?
- Can we show that the embeddings from BERT have the same pattern as GLOVE?
    - Generate word vectors for vocabulary in glove embeddings
    - Get gender biased words
    - Get Null Projection
    - Plot diagrams showing cluster change after null projection
    - Acquire other polarised words, hot-cold, up-down, soft-hard, nature-manmade, etc
    - Should that the clusters for these words are still separatable after null projection
- Can we use the language model encoder / decoder to apply null projection inbetween to should change in output with projection?
    - e.g. input -> tokeniser -> encoder -> (null projection OR identity) -> decoder -> tokeniser -> output

# BERT Transformer

In [9]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_mlm = BertForMaskedLM.from_pretrained('bert-base-uncased', return_dict = True)

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


![title](images/bert_architecture.png)

Source: [**BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding**: Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova](https://arxiv.org/abs/1810.04805)

In [10]:
def chunker(input_list, chunk_size):
    for i in range(0, len(input_list), chunk_size):
        yield input_list[i:i + chunk_size]
        
def get_embeddings(input_sequences, model, tokenizer):
    tokenized_input = bert_tokenizer.batch_encode_plus(input_sequences, return_tensors = "pt", padding=True, truncation=False)
    embeddings = bert_mlm(**tokenized_input, output_hidden_states=True).hidden_states[BERT_BIAS_LAYER]
    return embeddings.detach().numpy(), tokenized_input["input_ids"].detach().numpy()

def extract_token_embeddings(embeddings, input_ids):
    extracted_embeddings = []
    for idx in range(embeddings.shape[0]):
        if 0 in input_ids[idx]: # if input contains padding
            eos_idx = list(input_ids[idx]).index(0) - 1
        else:
            eos_idx = list(input_ids[idx]).index(102)
        extracted_embeddings.append(embeddings[idx][1:eos_idx].mean(axis=0))
    return np.array(extracted_embeddings)

def guard_vector(layer):
    return NULL_PROJECTION.dot(layer.T).T

def guard_embedding(hidden_state, tokenized_input):    
    input_ids_numpy = list(tokenized_input["input_ids"].detach().numpy()[0])
    word_indexes = [input_ids_numpy.index(token_id) for token_id in input_ids_numpy if token_id not in [101, 103, 102, 0]]
    bias_layer_numpy =  hidden_state.detach().numpy()
    for idx in word_indexes:
        bias_layer_numpy[0][idx] = guard_vector(bias_layer_numpy[0][idx])
    return torch.Tensor(bias_layer_numpy)

def run_post_bias_encoder_layers(encoder_layers_list, previous_hidden_state):
    for attention_block in encoder_layers_list:
        previous_hidden_state = attention_block.forward(hidden_states=previous_hidden_state)[0]
    return previous_hidden_state

def get_next_word(logits, tokenizer, mask_index):
    softmax = F.softmax(logits, dim = -1)
    mask_word = softmax[0, mask_index, :]
    top_word = torch.argmax(mask_word, dim=1)
    return tokenizer.decode(top_word)

# BERT Vector Generation

![title](images/bert_layers_gender_bias.png)

Source: [**Investigating Gender Bias in BERT**: Rishabh Bhardwaj, Navonil Majumder, Soujanya Poria](https://arxiv.org/abs/2009.05021)

In [12]:
BERT_BIAS_LAYER = 1
VOCABULARY = pd.read_csv("data/vocabulary.txt", header=None)[0].values.tolist()

In [13]:
bert_vocab_embedding_list = np.empty((0, 768))
for chunk in tqdm(chunker(VOCABULARY, 1000)):
    embeddings, input_ids = get_embeddings(chunk, bert_mlm, bert_tokenizer)
    embeddings = extract_token_embeddings(embeddings, input_ids)
    bert_vocab_embedding_list = np.concatenate((bert_vocab_embedding_list, embeddings), axis=0)

8it [02:32, 19.07s/it]


In [14]:
embedding_shape = bert_vocab_embedding_list.shape
with open("data/embeddings/BERTLM_ENCODER_LAYER_ONE/bert-base-uncased-embeddings.txt", "w") as bert_file:
    bert_file.write(f"{embedding_shape[0]} {embedding_shape[1]} \n")
    for word, embedding in zip(VOCABULARY, bert_vocab_embedding_list):
        bert_file.write(f"{word} {' '.join(map(str, list(embedding)))}\n")

In [18]:
!python get_bias_sensitive_tokens.py

pca explained variance ratio: [0.32702196 0.20407492 0.15900126 0.08174839 0.05758673 0.05069628
 0.04543994 0.03472261 0.02933717 0.01037071] 

TOP 100 MALE SENSITIVE TOKENS 
 ('man', 'john', 'he', 'boy', 'guy', 'son', 'his', 'manhunt', 'him', 'housman', 'himself', 'heisman', 'manu', 'johny', 'heyman', 'sons', 'hes', 'guymon', 'sonnen', 'boye', 'brothers', 'rockman', 'mangini', 'father', 'brother', 'mike', 'sandman', 'bossman', 'heder', 'jono', 'helt', 'hedi', 'hegel', 'dude', 'bluesman', 'jason', 'mr', 'sonali', 'redman', 'sonographer', 'linesman', 'charles', 'cashman', 'handsome', 'boyhood', 'ferdinando', 'marksman', 'darkman', 'timo', 'kingdon', 'cocky', 'walter', 'sonja', 'james', 'robert', 'heba', 'matthew', 'irishman', 'pitman', 'martino', 'paulus', 'mikel', 'juwan', 'tradesman', 'ockham', 'welshman', 'bagman', 'edgar', 'hegelian', 'vorderman', 'richard', 'william', 'david', 'countryman', 'bouchon', 'kings', 'waltrip', 'rotman', 'samo', 'heliotrope', 'wilfredo', 'cheeseman', 'th

# Null-Space Projection

In [23]:
!python context_nullspace_projection.py

Train size: 147; Dev size: 63; Test size: 90
iteration: 24, accuracy: 0.36507936507936506: 100%|█| 25/25 [00:07<00:00,  3.25i
Figure(600x500)
Figure(600x500)
V-measure-before (TSNE space): 0.778190793392485
V-measure-after (TSNE space): 0.0011550932483761207
V-measure-before (original space): 1.0
V-measure-after (original space): 0.0007205831499929152


![title](images/tsne_projections.png)

# Transformer Encoder / Decoder Generation

In [24]:
NULL_PROJECTION = np.load("data/nullspace_vector.npy")

In [25]:
def generate_next_word(input_sequence, model, tokenizer, guard_flag=False, biased_layer_index=1):  
    # extracting modules from BERT LM
    bert_encoder_modules = list(bert_mlm.modules())[8:-5] # extract list of model components
    encoder_layers_list = [bert_encoder_modules[idx] for idx in range(19, 206, 17)] # extracting each encoder attention block
    bert_mlm_head = bert_encoder_modules[-1] # extracting BERT LM Head

    # tokenize input sequence
    tokenized_input = tokenizer.encode_plus(input_sequence, return_tensors = "pt")
    mask_index = torch.where(tokenized_input["input_ids"][0] == bert_tokenizer.mask_token_id)

    # extracting encoding and feeding back into model
    hidden_state = model(**tokenized_input, output_hidden_states=True).hidden_states[biased_layer_index]
    
    # apply guarding function to hidden state
    hidden_state = guard_embedding(hidden_state, tokenized_input) if guard_flag else hidden_state
    
    # run guarded hidden state through remaining encoder layers
    encoder_output = run_post_bias_encoder_layers(encoder_layers_list, hidden_state)
    
    # pass encoder output into LM Head to generate logits
    output_logits = bert_mlm_head.forward(sequence_output=encoder_output)

    # generate the highest likelihood word
    return get_next_word(output_logits, tokenizer, mask_index)

def complete_the_sentence(female_variant, male_variant):
    guarded_pred_female = generate_next_word(female_variant, bert_mlm, bert_tokenizer, guard_flag=True)
    unguarded_pred_female = generate_next_word(female_variant, bert_mlm, bert_tokenizer, guard_flag=False)
    guarded_pred_male = generate_next_word(male_variant, bert_mlm, bert_tokenizer, guard_flag=True)
    unguarded_pred_male = generate_next_word(male_variant, bert_mlm, bert_tokenizer, guard_flag=False)

    guarded_female_sentence = female_variant.replace("[MASK]", guarded_pred_female.upper())
    unguarded_female_sentence = female_variant.replace("[MASK]", unguarded_pred_female.upper())
    guarded_male_sentence = male_variant.replace("[MASK]", guarded_pred_male.upper())
    unguarded_male_sentence = male_variant.replace("[MASK]", unguarded_pred_male.upper())
    output_string = (f"unguarded outputs: \n \t{unguarded_female_sentence} \n \t{unguarded_male_sentence} \n\n" +
                     f"guarded outputs: \n \t{guarded_female_sentence} \n \t{guarded_male_sentence} \n")
    print(output_string)



In [26]:
print("\n-------------- <she/he> wanted a [MASK]. -----------------\n")
complete_the_sentence("she wanted a " + bert_tokenizer.mask_token + ".",
                      "he wanted a " + bert_tokenizer.mask_token + ".")
print("\n-------------- <she/he> works as a [MASK]. -----------------\n")
complete_the_sentence("she works as a " + bert_tokenizer.mask_token + ".",
                      "he works as a " + bert_tokenizer.mask_token + ".")
print("\n-------------- people thought <she/he> was very [MASK] during the meeting. -----------------\n")
complete_the_sentence("people thought she was very " + bert_tokenizer.mask_token + " during the meeting.",
                      "people thought he was very " + bert_tokenizer.mask_token + " during the meeting.")
print("\n--------------------------------------------------\n")


-------------- <she/he> wanted a [MASK]. -----------------

unguarded outputs: 
 	she wanted a MAN. 
 	he wanted a DRINK. 

guarded outputs: 
 	she wanted a MAN. 
 	he wanted a DRINK. 


-------------- <she/he> works as a [MASK]. -----------------

unguarded outputs: 
 	she works as a TEACHER. 
 	he works as a LAWYER. 

guarded outputs: 
 	she works as a TEACHER. 
 	he works as a LAWYER. 


-------------- people thought <she/he> was very [MASK] during the meeting. -----------------

unguarded outputs: 
 	people thought she was very BEAUTIFUL during the meeting. 
 	people thought he was very POPULAR during the meeting. 

guarded outputs: 
 	people thought she was very QUIET during the meeting. 
 	people thought he was very QUIET during the meeting. 


--------------------------------------------------

