<a href="https://colab.research.google.com/github/SeongUgKim/gender_bias_in_nlp/blob/main/Jeongrok's_of_MBE_Calcuation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import torch
import difflib
import nltk
import regex as re
import numpy as np
import pickle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m63.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.0 tokenizers-0.13.2 transformers-4.26.1


In [None]:
from transformers import AutoModelForMaskedLM, AutoTokenizer

In [None]:
def load_tokenizer_and_model(lang):
    if lang == 'de':
        model_name = 'deepset/gbert-base'
    elif lang == 'es':
        model_name = 'dccuchile/bert-base-spanish-wwm-uncased'
    elif lang == 'pt':
        model_name = 'neuralmind/bert-base-portuguese-cased'
    elif lang == 'en':
        model_name = 'bert-base-cased'
    elif lang == 'zh':
        model_name = 'hfl/chinese-bert-wwm-ext'

    model = AutoModelForMaskedLM.from_pretrained(model_name, output_hidden_states=True, output_attentions=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = model.eval()
    if torch.cuda.is_available():
        model.to('cuda')
    return tokenizer, model

In [None]:
def read_list(filename):
    with open(filename, 'rb') as f:
        n_list = pickle.load(f, encoding='utf8')
        return n_list

In [None]:
def calculate_aul(model, token_ids, log_softmax, attention):
    output = model(token_ids)
    logits = output.logits.squeeze(0)
    log_probs = log_softmax(logits)
    token_ids = token_ids.view(-1, 1).detach()
    token_log_probs = log_probs.gather(1, token_ids)[1:-1]
    if attention:
        attentions = torch.mean(torch.cat(output.attentions, 0), 0)
        averaged_attentions = torch.mean(attentions, 0)
        averaged_token_attentions = torch.mean(averaged_attentions, 0)
        token_log_probs = token_log_probs.squeeze(1) * averaged_token_attentions[1:-1]
    sentence_log_prob = torch.mean(token_log_probs)
    score = sentence_log_prob.item()
    return score

In [None]:
def calculate_mbe(lang, male_filepath, female_filepath, male_list, female_list):
    tokenizer, model = load_tokenizer_and_model(lang)
    total_score = 0
    stereo_score = 0
    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    masked_id = tokenizer.mask_token_id
    log_softmax = torch.nn.LogSoftmax(dim=1)
    male = read_list(male_filepath) if male_filepath is not None else male_list
    female = read_list(female_filepath) if female_filepath is not None else female_list
    male_inputs = [tokenizer.encode(sentence, return_tensors='pt') for sentence in male]
    female_inputs = [tokenizer.encode(sentence, return_tensors='pt') for sentence in female]
    attention = True
    female_scores = []
    male_scores = []

    for female_tokens in female_inputs:
        with torch.no_grad():
            female_score = calculate_aul(model, female_tokens, log_softmax, attention)
            female_scores.append(female_score)
    for male_tokens in male_inputs:
        with torch.no_grad():
            male_score = calculate_aul(model, male_tokens, log_softmax, attention)
            male_scores.append(male_score)

    female_scores = np.array(female_scores)
    male_scores = np.array(male_scores)
    bias_scores = male_scores > female_scores
    biasRating = np.sum(bias_scores).item()
    total_sentences = (bias_scores.shape[0])
    MBE = biasRating / total_sentences
    return round(MBE * 100, 2)

In [None]:
def cos_sim(v1, v2):
  return np.dot(v1,v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

In [None]:
def calculate_aul_kaneko(model, token_ids, log_softmax, attention):
  output = model(token_ids)
  logits = output.logits.squeeze(0)
  log_probs = log_softmax(logits)
  token_ids = token_ids.view(-1,1).detach()
  token_log_probs = log_probs.gather(1, token_ids)[1:-1]
  if attention:
      attentions = torch.mean(torch.cat(output.attentions, 0), 0)
      averaged_attentions = torch.mean(attentions, 0)
      averaged_token_attentions = torch.mean(averaged_attentions, 0)
      token_log_probs = token_log_probs.squeeze(1) * averaged_token_attentions[1:-1]
  sentence_log_prob = torch.mean(token_log_probs)
  score = sentence_log_prob.item()
  hidden_states = output.hidden_states[-1][:,1:-1]
  hidden_state = torch.mean(hidden_states, 1).detach().cpu().numpy()
  return score, hidden_state


In [None]:
def calculate_mbe_kaneko(lang, male_filepath, female_filepath, male_list, female_list):
    if lang == 'de':
        model_name = 'deepset/gbert-base'
    elif lang == 'es':
        model_name = 'dccuchile/bert-base-spanish-wwm-uncased'
    elif lang == 'pt':
        model_name = 'neuralmind/bert-base-portuguese-cased'
    elif lang == 'en':
        model_name = 'bert-base-cased'
    elif lang == 'zh':
        model_name = 'hfl/chinese-bert-wwm-ext'
    tokenizer, model = load_tokenizer_and_model(lang)
    total_score = 0
    stereo_score = 0
    tokenizer2 = BertTokenizer.from_pretrained(model_name)
    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    masked_id = tokenizer.mask_token_id
    log_softmax = torch.nn.LogSoftmax(dim=1)
    male = read_list(male_filepath) if male_filepath is not None else male_list
    female = read_list(female_filepath) if female_filepath is not None else female_list
    male_inputs = [tokenizer.encode(sentence, return_tensors='pt') for sentence in male if len(tokenizer2.tokenize(sentence)) < 512]
    female_inputs = [tokenizer.encode(sentence, return_tensors='pt') for sentence in female if len(tokenizer2.tokenize(sentence)) < 512]
    attention = True
    female_scores = []
    male_scores = []
    female_embes = []
    male_embes = []

    for female_tokens in female_inputs:
        with torch.no_grad():
            female_score, female_hidden_state = calculate_aul_kaneko(model, female_tokens, log_softmax, attention)
            female_scores.append(female_score)
            female_embes.append(female_hidden_state)
    for male_tokens in male_inputs:
        with torch.no_grad():
            male_score, male_hidden_state = calculate_aul_kaneko(model, male_tokens, log_softmax, attention)
            male_scores.append(male_score)
            male_embes.append(male_hidden_state)

    female_scores = np.array(female_scores)
    female_scores = female_scores.reshape([-1,1])
    male_scores = np.array(male_scores)
    male_scores = male_scores.reshape([-1,1])
    if len(male_scores) > len(female_scores):
      male_scores = male_scores[0:len(female_scores)]
    else:
      female_scores = female_scores[0:len(male_scores)]
    bias_scores = male_scores > female_scores
    female_embes = np.concatenate(female_embes)
    male_embes = np.concatenate(male_embes)
    weights = cos_sim(female_embes, male_embes.T)
    weighted_bias_scores = bias_scores * weights
    MBE = np.sum(weighted_bias_scores) / np.sum(weights)
    return round(MBE * 100, 2)

In [None]:
from transformers import BertTokenizer

In [None]:
def preprocess(lang, filepath):
    if lang == 'de':
        model = 'deepset/gbert-base'
    elif lang == 'es':
        model = 'dccuchile/bert-base-spanish-wwm-uncased'
    elif lang == 'pt':
        model = 'neuralmind/bert-base-portuguese-cased'
    elif lang == 'en':
        model = 'bert-base-cased'
    elif lang == 'zh':
        model = 'hfl/chinese-bert-wwm-ext'

    orignial_list = read_list(filepath)
    tokenizer = BertTokenizer.from_pretrained(model)
    result = [sentence for sentence in orignial_list if len(tokenizer.tokenize(sentence)) < 512]
    return result

In [None]:
rule_male_de = preprocess('de', '/content/drive/MyDrive/Sentences/adj_rule_based_male_sentences_de0 (1)')
rule_female_de = preprocess('de', '/content/drive/MyDrive/Sentences/adj_rule_based_female_sentences_de0 (1)')
print(len(rule_male_de))
print(len(rule_female_de))

18367
18367


In [None]:
calculate_mbe('de', None, None, rule_male_de, rule_female_de)

Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


48.64

In [None]:
calculate_mbe('zh', None, None, rule_male_list, rule_female_list)

Some weights of the model checkpoint at hfl/chinese-bert-wwm-ext were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


47.12

In [None]:
import random

pt_score = []
for i in range(10):
  idx = list(range(len(rule_male_list)))
  subsample = random.sample(idx, 3496)
  new_male_list, new_female_list = [], []
  for sample in subsample:
    new_male_list.append(rule_male_list[sample])
    new_female_list.append(rule_female_list[sample])

  pt_score.append(calculate_mbe('pt', None, None, new_male_list, new_female_list))

print(pt_score)

In [None]:
kaneko_en_m = preprocess('en', '/content/drive/MyDrive/Sentences/kaneko/male_sentences_en')
kaneko_en_f = preprocess('en', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_en')

In [None]:
print(len(kaneko_en_f))
print(len(kaneko_en_m))

13277
25723


In [None]:
import random
k_en_score = []
for i in range(5):
  idx = list(range(len(kaneko_en_f)))
  subsample = random.sample(idx, (len(kaneko_en_f)//5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(kaneko_en_m[sample])
    new_f.append(kaneko_en_f[sample])
  k_en_score.append(calculate_mbe_kaneko('en', None, None, new_m, new_f))

print(k_en_score)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model fro

[51.71, 51.84, 52.76, 53.77, 50.29]


In [None]:
kaneko_es_m = preprocess('es','/content/drive/MyDrive/Sentences/kaneko/male_sentences_es')
kaneko_es_f = preprocess('es', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_es')
print(len(kaneko_es_m))
print(len(kaneko_es_f))

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/248k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/310 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

24912
12856


In [None]:
import random

k_es_score = []
for i in range(5):
  idx = list(range(len(kaneko_es_f)))
  subsample = random.sample(idx, (len(kaneko_es_f)//5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(kaneko_es_m[sample])
    new_f.append(kaneko_es_f[sample])
  k_es_score.append(calculate_mbe_kaneko('es', None, None, new_m, new_f))

print(k_es_score)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/486k [00:00<?, ?B/s]

[47.43, 48.09, 47.91, 49.8, 49.37]


In [None]:
kaneko_pt_m = preprocess('pt','/content/drive/MyDrive/Sentences/kaneko/male_sentences_pt')
kaneko_pt_f = preprocess('pt', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_pt')
print(len(kaneko_pt_m))
print(len(kaneko_pt_f))

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/210k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/43.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/647 [00:00<?, ?B/s]

19589
10357


In [None]:
k_pt_score = []
for i in range(5):
  print(i)
  idx = list(range(len(kaneko_pt_f)))
  subsample = random.sample(idx, (len(kaneko_pt_f)//5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(kaneko_pt_m[sample])
    new_f.append(kaneko_pt_f[sample])
  k_pt_score.append(calculate_mbe_kaneko('pt', None, None, new_m, new_f))

print(k_pt_score)

0


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


1


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


2


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


3


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


4


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[45.79, 46.23, 47.87, 46.77, 46.85]


In [None]:
kaneko_chn_m = preprocess('zh', '/content/drive/MyDrive/Sentences/kaneko/kaneko_male_sentences_chn')
kaneko_chn_f = preprocess('zh', '/content/drive/MyDrive/Sentences/kaneko/kaneko_female_sentences_chn')
print(len(kaneko_chn_m))
print(len(kaneko_chn_f))


Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/19.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/647 [00:00<?, ?B/s]

23960
12308


In [None]:
k_c_score = []
for i in range(5):
  idx = list(range(len(kaneko_chn_f)))
  subsample = random.sample(idx, (len(kaneko_chn_f)//5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(kaneko_chn_m[sample])
    new_f.append(kaneko_chn_f[sample])
  k_c_score.append(calculate_mbe_kaneko('zh', None, None, new_m, new_f))

print(k_c_score)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/412M [00:00<?, ?B/s]

Some weights of the model checkpoint at hfl/chinese-bert-wwm-ext were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)/main/tokenizer.json:   0%|          | 0.00/269k [00:00<?, ?B/s]

Some weights of the model checkpoint at hfl/chinese-bert-wwm-ext were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at hfl/chinese-bert-wwm-ext were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassi

[46.17, 47.51, 46.32, 46.52, 46.82]


In [None]:
kaneko_de_m = preprocess('de', '/content/drive/MyDrive/Sentences/kaneko/male_sentences_de')
kaneko_de_f = preprocess('de', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_de')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/240k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/83.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/362 [00:00<?, ?B/s]

In [None]:
print(len(kaneko_de_f))
print(len(kaneko_de_m))

8611
18008


In [None]:
k_de_score = []
for i in range(5):
  idx = list(range(len(kaneko_de_f)))
  subsample = random.sample(idx, (len(kaneko_de_f)//5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(kaneko_de_m[sample])
    new_f.append(kaneko_de_f[sample])
  k_de_score.append(calculate_mbe_kaneko('de', None, None, new_m, new_f))

print(k_de_score)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/442M [00:00<?, ?B/s]

Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification mod

[43.99, 45.08, 45.77, 45.54, 48.5]


In [None]:
english = [45.71, 45.31, 45.02, 45.31, 45.59, 45.05, 44.91, 44.94, 45.37, 45.48]

In [None]:
from statistics import mean

pt = [51.69, 51.06, 52.0, 52.49, 52.23, 52.37, 52.03, 51.69, 51.4, 50.72]
mean(pt)

51.768

In [None]:
calculate_mbe_kaneko('en', '/content/drive/MyDrive/Sentences/kaneko/male_sentences_en', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_en', None, None)

In [None]:
calculate_mbe_kaneko('es', '/content/drive/MyDrive/Sentences/kaneko/male_sentences_es', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_es', None, None)

In [None]:
calculate_mbe_kaneko('pt', '/content/drive/MyDrive/Sentences/kaneko/male_sentences_pt', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_pt', None, None)

In [None]:
calculate_mbe_kaneko('de', '/content/drive/MyDrive/Sentences/kaneko/male_sentences_de', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_de', None, None)

In [None]:
mbe_pt = calculate_mbe_kaneko('es', '/content/drive/MyDrive/Sentences/kaneko/male_sentences_pt', '/content/drive/MyDrive/Sentences/kaneko/female_sentences_pt', None, None)

In [None]:
l_de_m = preprocess('de', '/content/drive/MyDrive/Sentences/l/even_rand_rule_based_male_sentences_de0')
l_de_f = preprocess('de', '/content/drive/MyDrive/Sentences/l/even_rand_rule_based_female_sentences_de0')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/240k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/83.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/362 [00:00<?, ?B/s]

In [None]:
print(len(l_de_f))
print(len(l_de_m))

19321
19321


In [None]:
import random

l_de = []
for i in range(5):
  idx = list(range(len(l_de_f)))
  subsample = random.sample(idx, (len(l_de_f)//5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(l_de_m[sample])
    new_f.append(l_de_f[sample])
  l_de.append(calculate_mbe('de', None, None, new_m, new_f))

print(l_de)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/442M [00:00<?, ?B/s]

Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification mod

[52.23, 51.42, 53.08, 52.17, 52.67]


In [None]:
l_en_m = preprocess('en', '/content/drive/MyDrive/Sentences/l/even_random_rule_based_male_sentences_en0')
l_en_f = preprocess('en', '/content/drive/MyDrive/Sentences/l/even_random_rule_based_female_sentences_en0')
print(len(l_en_m))
print(len(l_en_f))

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

25993
25993


In [None]:
l_en = []
for i in range(5):
  idx = list(range(len(l_en_f)))
  subsample = random.sample(idx, (len(l_en_f)//5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(l_en_m[sample])
    new_f.append(l_en_f[sample])
  l_en.append(calculate_mbe('en', None, None, new_m, new_f))

print(l_en)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model fro

[43.71, 44.23, 43.9, 44.71, 43.0]


In [None]:
l_es_m = preprocess('es', '/content/drive/MyDrive/Sentences/l/modified_rule_based_female_sentences_es')
l_es_f = preprocess('es', '/content/drive/MyDrive/Sentences/l/modified_rule_based_female_sentences_es')
print(len(l_es_m))
print(len(l_es_f))

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/248k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/310 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

76972
76972


In [None]:
import random

l_es = []
for i in range(5):
  idx = list(range(len(l_es_f)))
  subsample = random.sample(idx, (len(l_es_f)//10))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(l_es_m[sample])
    new_f.append(l_es_f[sample])
  l_es.append(calculate_mbe('es', None, None, new_m, new_f))

print(l_es)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/486k [00:00<?, ?B/s]

[0.0, 0.0, 0.0, 0.0, 0.0]


In [None]:
l_chn_m = preprocess('zh', '/content/drive/MyDrive/Sentences/l/modified_rule_based_female_sentences_cn')
l_chn_f = preprocess('zh', '/content/drive/MyDrive/Sentences/l/modified_rule_based_female_sentences_cn')
print(len(l_chn_m))
print(len(l_chn_f))

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/19.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/647 [00:00<?, ?B/s]

22194
22194


In [None]:
l_c = []
for i in range(5):
  idx = list(range(len(l_chn_f)))
  subsample = random.sample(idx, (len(l_chn_f)//10))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(l_chn_m[sample])
    new_f.append(l_chn_f[sample])
  l_c.append(calculate_mbe('zh', None, None, new_m, new_f))

print(l_c)

In [None]:
lptm = preprocess('pt', '')
lptf = preprocess('pt', '')

print(len(lptm))
print(len(lptf))

In [None]:
l_pt = []
for i in range(5):
  idx = list(range(len(lptf)))
  subsample = random.sample(idx, (len(lptf) // 5))
  new_m, new_f = [], []
  for sample in subsample:
    new_m.append(lptm[sample])
    new_f.append(lptf[sample])
  l_pt.append(calculate_mbe('pt', None, None, new_m, new_f))

print(l_pt)