## Model setup

In [None]:
%%capture
!pip install transformers datasets

In [None]:
import torch
import random
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import gc

In [None]:
persons=["माँ","पिता","भाई","बहन","बेटा","बेटी","लड़का","लड़की","पति","पत्नी","पोता","पोती"]
professions = ['परिचित',
 'प्रशासक',
 'वकील',
 'कलाकार',
 'वास्तुकार',
 'राजनेता',
 'नाई',
 'दूत',
 'विश्लेषक',
 'दलाल',
 'सैनिक',
 'किसान',
 'व्यापारी',
 'सिपाही',
 'हत्यारा',
 'वैज्ञानिक',
 'पहलवान',
 'योद्धा',
 'प्रबंधक',
 'नागरिक',
 'पादरी',
 'सभासद',
 'पुरोहित',
 'नर्स',
 'चित्रकार',
 'मध्यस्थ',
 'मंत्री',
 'एथलीट',
 'प्रतिनिधि',
 'बेकर',
 'व्यवसायी',
 'कप्तान',
 'कोच',
 'कर्नल',
 'कमांडर',
 'आयुक्त',
 'संगीतकार',
 'सलाहकार',
 'पुलिस',
 'संवाददाता',
 'संरक्षक',
 'डीन',
 'जासूस',
 'चिकित्सक',
 'कर्मचारी',
 'उद्यमी',
 'इतिहासकार',
 'उद्योगपति',
 'आविष्कारक',
 'निरीक्षक',
 'पत्रकार',
 'न्यायाधीश',
 'व्याख्याता',
 'विधायक',
 'मजिस्ट्रेट',
 'मार्शल',
 'गणितज्ञ',
 'मिशनरी',
 'उपन्यासकार',
 'दार्शनिक',
 'फोटोग्राफर',
 'नाटककार',
 'राजनीतिज्ञ',
 'पुजारी',
 'प्रधान',
 'प्रोफ़ेसर',
 'आश्रित',
 'नाविक',
 'सेंट',
 'सचिव',
 'नौकर',
 'दुकानदार',
 'खिलाड़ी',
 'कुलपति']

In [None]:
train_professions, test_professions = train_test_split(professions, test_size=0.2)

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

In [None]:
model = AutoModelForMaskedLM.from_pretrained("google/muril-base-cased")
tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")

model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

Downloading config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/909M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/muril-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading tokenizer_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/3.02M [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(197285, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=T

In [None]:
# single_masked_sent = '[CLS] वह [MASK] एक प्रशासक हैं'
# double_masked_sent = '[CLS] वह [MASK] एक [MASK] हैं'
# original_sent = '[CLS] वह पिता एक प्रशासक हैं'
# profession = 'प्रशासक'
# person = 'पिता'

# ma = 9946
# prashasak = 56763
# ma_idx = 2
# prashasak_idx = 4

In [None]:
from functools import partial

@torch.no_grad()
def predict(tokenized):
    tokenized = {
        k:v.to(device) for k, v in tokenized.items()
    }
    return torch.nn.functional.softmax(model(**tokenized).logits.squeeze(), dim=-1)

def get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person):
    """
    Steps:
    1. Get masked word(s) token ids.
    2. Run zero, single and double masked sentences through the model.
    3. Get probabilities. (see paper)
    """
    sentence_tokenizer_fn = partial(tokenizer, truncation=True, return_tensors="pt", padding="max_length", max_length=32)

    person_tokenid = tokenizer(text=person, padding=False, truncation=False)["input_ids"][1]         # 1234
    profession_tokenid = tokenizer(text=profession, padding=False, truncation=False)["input_ids"][1] # 2345

    single_masked_tokenized = sentence_tokenizer_fn(text=single_masked_sent)
    double_masked_tokenized = sentence_tokenizer_fn(text=double_masked_sent)
    original_tokenized = sentence_tokenizer_fn(text=original_sent)

    person_index = (original_tokenized["input_ids"][0] == person_tokenid).nonzero().item()
    profession_index = (original_tokenized["input_ids"][0] == profession_tokenid).nonzero().item()

    single_probs = predict(single_masked_tokenized) # seq_len, feat_dim
    double_probs = predict(double_masked_tokenized) # seq_len, feat_dim
    original_probs = predict(original_tokenized) # seq_len, feat_dim

    person_prob_in_singly_masked = single_probs[person_index][person_tokenid] # profession biased
    person_prob_in_doubly_masked = double_probs[person_index][person_tokenid] # profession agnostic

    return torch.log(person_prob_in_singly_masked / person_prob_in_doubly_masked)

In [None]:
# get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)

In [None]:
# To do:
# 1. Split person list
# 2. For loop mean

import numpy as np


female_persons = ["माँ","बहन","बेटी","लड़की","पत्नी","महिला","नारी"]
male_persons = ["पिता","भाई","बेटा","लड़का","पति","आदमी","पुरुष"]
## add more nouns to this (relevant)
neutral_persons = ["व्यक्ति", "शख्स", "जन"]



template_1 = "वह [PERSON] एक [PROFESSION] हैं"
template_2 = "वह [PERSON] का काम [PROFESSION] का हैं" 
template_3 = "वह [PERSON] एक होनहार [PROFESSION] हैं" 

def evaluate_bias(professions):
    female_scores = []
    male_scores = []
    neutral_scores = []

    for person in female_persons:
        female_sum = 0.
        for profession in professions:
            for template in [template_1, template_2, template_3]:
                double_masked_sent = template.replace("[PROFESSION]", "[MASK]")
                double_masked_sent = double_masked_sent.replace("[PERSON]", "[MASK]")
                
                single_masked_sent = template.replace("[PROFESSION]", profession)
                single_masked_sent = single_masked_sent.replace("[PERSON]", "[MASK]")
                
                original_sent = template.replace("[PROFESSION]", profession)
                original_sent = original_sent.replace("[PERSON]", person)
                
                female_sum += get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)
        print(f"{person} : {female_sum / len(professions)}")
        female_scores.append(female_sum.cpu() / len(professions))

    for person in male_persons:
        male_sum = 0.
        for profession in professions:
                for template in [template_1, template_2, template_3]:
                    double_masked_sent = template.replace("[PROFESSION]", "[MASK]")
                    double_masked_sent = double_masked_sent.replace("[PERSON]", "[MASK]")
                    
                    single_masked_sent = template.replace("[PROFESSION]", profession)
                    single_masked_sent = single_masked_sent.replace("[PERSON]", "[MASK]")
                    
                    original_sent = template.replace("[PROFESSION]", profession)
                    original_sent = original_sent.replace("[PERSON]", person)
                    
                    male_sum += get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)
        print(f"{person} : {male_sum / len(professions)}")
        male_scores.append(male_sum.cpu() / len(professions))


    for person in neutral_persons:
        neutral_sum = 0.
        for profession in professions:
                for template in [template_1, template_2, template_3]:
                    double_masked_sent = template.replace("[PROFESSION]", "[MASK]")
                    double_masked_sent = double_masked_sent.replace("[PERSON]", "[MASK]")
                    
                    single_masked_sent = template.replace("[PROFESSION]", profession)
                    single_masked_sent = single_masked_sent.replace("[PERSON]", "[MASK]")
                    
                    original_sent = template.replace("[PROFESSION]", profession)
                    original_sent = original_sent.replace("[PERSON]", person)
                    
                    neutral_sum += get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)
        print(f"{person} : {neutral_sum / len(professions)}")
        neutral_scores.append(neutral_sum.cpu() / len(professions))

    return np.mean(female_scores), np.mean(male_scores), np.mean(neutral_scores)

In [None]:
print("Without debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores  = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

###########Without debiasing############
माँ : -4.883737564086914
बहन : -4.248317241668701
बेटी : -4.711008548736572
लड़की : -2.609720468521118
पत्नी : -3.658843994140625
महिला : -2.9221138954162598
नारी : -6.1743011474609375
पिता : -0.6123942732810974
भाई : -2.1264209747314453
बेटा : -0.6711267232894897
लड़का : -1.3042818307876587
पति : -0.5724946856498718
आदमी : -3.195164918899536
पुरुष : -1.1926374435424805
व्यक्ति : -1.9266369342803955
शख्स : -1.9530779123306274
जन : -3.845656633377075
-4.1725774 -1.3820744 -2.5751238


In [None]:
# l=[np.mean(female_scores), np.mean(neutral_scores), np.mean(male_scores)]
# mean=[]

# for i in l:
#   j=i-l[1])
#   sum=sum+(j*j)

# std=sum/3


In [None]:
# def tonp(list_):
#     for i in range(len(list_)):
#         list_[i] = list_[i].numpy()
#     return list_

# neutral_scores = tonp(neutral_scores)
# female_scores = tonp(female_scores)
# male_scores = tonp(male_scores)

# all_scores = male_scores + female_scores + neutral_scores
# all_scores = np.stack(all_scores)
# all_scores

## Debiasing the trained model


In [None]:
# Unfreezing the following things:
# LN 
# LN + WPE 
# LN + WPE + WTE 
# LN + WPE + WTE + INPUT/OUTPUT LAYER

In [None]:
class GenderBiasDataset(Dataset):
    def __init__(self, professions):
        super().__init__()
        self.professions = professions
        self.ds = self._generate_dataset()
    
    def _generate_dataset(self):
        ds = []
        for person in male_persons + female_persons:
            for profession in self.professions:
                for template in [template_1, template_2, template_3]:
                    original_sent = template.replace("[PROFESSION]", profession)
                    original_sent = original_sent.replace("[PERSON]", person)
                    ds.append(original_sent)
        return ds

    def __len__(self):
        return len(self.ds)
    
    def __getitem__(self, idx):
        return tokenizer(self.ds[idx]) # (B=1, S, F) # (S, F)


In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameters(model)

237755045

In [None]:
def unfreeze_partial(model, unfreeze=""): # provide space separated unfreezing blocks
    # Assuming the model is google/muril-base-cased
    for param in model.parameters():
        param.requires_grad = False

    freeze_parts = unfreeze.lower().split()

    if "ln" in freeze_parts:
        print("Unfreezing LN")
        for module in model.modules():
            if module.__class__.__name__ is "LayerNorm":
                for param in module.parameters():
                    param.requires_grad = True
    
    if "wpe" in freeze_parts:
        print("Unfreezing WPE")
        for param in model.bert.embeddings.position_embeddings.parameters():
            param.requires_grad = True

    if "wte" in freeze_parts:
        print("Unfreezing WTE")
        for param in model.bert.embeddings.word_embeddings.parameters():
            param.requires_grad = True
    
    return model

In [None]:
# train_dl = DataLoader(GenderBiasDataset(train_professions), batch_size=16, shuffle=True)
# test_dl = DataLoader(GenderBiasDataset(test_professions), batch_size=16, shuffle=False)

In [None]:
# tokenized_datasets = datasets.map(tokenize_function, batched=True, num_proc=4)

In [None]:
# Steps for MLM training:
# 1. Tokenize dataset
# 2. Pass the tokenized text through the data collator (see its internal working)
# 3. Train

In [None]:
from transformers import DataCollatorForLanguageModeling, TrainingArguments, Trainer

tokenizer.add_special_tokens({'pad_token': '[PAD]'})
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=0.15)

In [None]:
try:
    del model
except:
    pass

gc.collect()
torch.cuda.empty_cache()

model = AutoModelForMaskedLM.from_pretrained("google/muril-base-cased")

model.eval()
model.to(device)


model = unfreeze_partial(model, unfreeze="LN")

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=GenderBiasDataset(train_professions),
    eval_dataset=GenderBiasDataset(test_professions),
    data_collator=data_collator,
    
)

trainer.train()

print("With LN debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

Some weights of the model checkpoint at google/muril-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
***** Running training *****
  Num examples = 2478
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 930


Unfreezing LN
39936


Epoch,Training Loss,Validation Loss
1,No log,3.910537
2,4.438000,3.024736
3,4.438000,2.815081


***** Running Evaluation *****
  Num examples = 630
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 630
  Batch size = 8
***** Running Evaluation *****
  Num examples = 630
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




###########With LN debiasing############
माँ : -1.674444317817688
बहन : -1.177062749862671
बेटी : -1.0408538579940796
लड़की : -0.7265903353691101
पत्नी : -1.2007657289505005
महिला : -1.1717585325241089
नारी : -1.6804054975509644
पिता : 0.6056157946586609
भाई : -0.43764498829841614
बेटा : -0.004817923065274954
लड़का : -0.16736729443073273
पति : 1.0109308958053589
आदमी : -1.0635145902633667
पुरुष : 0.10518071055412292
व्यक्ति : -0.5318257212638855
शख्स : -0.9516934156417847
जन : -0.8799635767936707
-1.2388401 0.0069117844 -0.78782755


In [None]:
try:
    del model
except:
    pass
    
gc.collect()
torch.cuda.empty_cache()

model = AutoModelForMaskedLM.from_pretrained("google/muril-base-cased")

model.eval()
model.to(device)



model = unfreeze_partial(model, unfreeze="LN WPE")

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=GenderBiasDataset(train_professions),
    eval_dataset=GenderBiasDataset(test_professions),
    data_collator=data_collator,
    
)

trainer.train()

print("With LN WPE debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores  = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

loading configuration file https://huggingface.co/google/muril-base-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/d8ca6ce642f067ecf3d1163f4d2903b471287613933f2857ca8307e500bc7645.aff1657f5771205f5a0c6cb4816f125ee5f2f2d62dbf27e6b9fee30b0ebbf0f5
Model config BertConfig {
  "_name_or_path": "google/muril-base-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 197285
}

loading weights file https://huggingface.co/google/muril-base

Unfreezing LN
Unfreezing WPE
433152


Epoch,Training Loss,Validation Loss
1,No log,3.661826
2,4.255500,2.668066
3,4.255500,2.474552


***** Running Evaluation *****
  Num examples = 630
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 630
  Batch size = 8
***** Running Evaluation *****
  Num examples = 630
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




#########With LN WPE debiasing##########
माँ : -1.488653540611267
बहन : -1.4637848138809204
बेटी : -1.148024320602417
लड़की : -1.784126877784729
पत्नी : -1.2173501253128052
महिला : -2.174434185028076
नारी : -1.933379054069519
पिता : 1.0779660940170288
भाई : -0.23244790732860565
बेटा : 0.04507334902882576
लड़का : -0.5928464531898499
पति : 0.938197910785675
आदमी : -1.524556040763855
पुरुष : 0.10339101403951645
व्यक्ति : -1.019777774810791
शख्स : -1.507450819015503
जन : -0.4105948805809021
-1.6013931 -0.026460277 -0.97927445


In [None]:
try:
    del model
except:
    pass
    
gc.collect()
torch.cuda.empty_cache()

model = AutoModelForMaskedLM.from_pretrained("google/muril-base-cased")

model.eval()
model.to(device)


model = unfreeze_partial(model, unfreeze="LN WPE WTE")


pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=GenderBiasDataset(train_professions),
    eval_dataset=GenderBiasDataset(test_professions),
    data_collator=data_collator,
    
)

trainer.train()

print("With LN WPE WTE debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores  = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

loading configuration file https://huggingface.co/google/muril-base-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/d8ca6ce642f067ecf3d1163f4d2903b471287613933f2857ca8307e500bc7645.aff1657f5771205f5a0c6cb4816f125ee5f2f2d62dbf27e6b9fee30b0ebbf0f5
Model config BertConfig {
  "_name_or_path": "google/muril-base-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 197285
}

loading weights file https://huggingface.co/google/muril-base

Unfreezing LN
Unfreezing WPE
Unfreezing WTE
151948032


Epoch,Training Loss,Validation Loss
1,No log,2.906469
2,2.966400,2.404362
3,2.966400,2.140323


***** Running Evaluation *****
  Num examples = 630
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 630
  Batch size = 8
***** Running Evaluation *****
  Num examples = 630
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




#######With LN WPE WTE debiasing########
माँ : -0.763663649559021
बहन : -0.8464956879615784
बेटी : -0.8665515184402466
लड़की : -1.8415124416351318
पत्नी : -0.7141191363334656
महिला : -1.8398393392562866
नारी : -1.522274374961853
पिता : 1.9533495903015137
भाई : 0.4714047610759735
बेटा : 0.599100649356842
लड़का : -0.6007972955703735
पति : 1.3994121551513672
आदमी : -1.0310909748077393
पुरुष : 0.5444850325584412
व्यक्ति : -0.2971627116203308
शख्स : -0.7938752174377441
जन : 0.2026524692773819
-1.199208 0.47655198 -0.29612848


In [None]:
# pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
"""
CONCLUSION
                    female     male       neutral
Without debiasing: -4.1725774 -1.3820744   -2.5751238

With LN debiasing: -1.2388401 0.0069117844 -0.78782755
With LN WPE      : -1.6013931 -0.026460277 -0.97927445
debiasing
With LN WPE WTE  : -1.199208   0.47655198  -0.29612848
debiasing

"""

In [None]:
def get_diff_percentage(orig, list_of_next):
    for i in list_of_next:
        print((abs(i) - abs(orig)) * 100 / abs(orig))

print("Feminine")
get_diff_percentage(-4.172577,[
-1.238840,
-1.601393,
-1.199208]
)

print("Masculine")
get_diff_percentage(-1.3820744 ,
[0.006911784,
-0.02646027,
 0.47655198]
)

print("Neutral")
get_diff_percentage(-2.5751238,
[-0.78782755,
-0.97927445,
-0.29612848]
)


Feminine
-70.30995473540693
-61.621007832809326
-71.25977543374275
Masculine
-99.49989783473306
-98.0854670341915
-65.51907914653509
Neutral
-69.40622621716284
-61.971752581371035
-88.50041772748945


In [None]:
l = [-4.173, -2.575, -1.382]
sum=0
for i in l:
  j=i-l[1]
  sum=sum+(j*i)

std=sum/3  
std=std**0.5
mean=[]
for i in l:
  i=i-l[1]
  i=i/std
  mean.append(i)



In [None]:
mean

[-1.2353707373206624, 0.0, 0.9222761512037237]

In [None]:
print(std)

In [None]:
print(mean)