In [13]:
!pip install datasets transformers==4.28.0

Collecting datasets
  Downloading datasets-2.14.3-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.1/519.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers==4.28.0
  Downloading transformers-4.28.0-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0 (from transformers==4.28.0)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.28.0)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datase

In [14]:
!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [15]:
import os
import re
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import torch
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration, TFT5ForConditionalGeneration
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

### Data


In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
text_file = 'drive/MyDrive/266_project/augmentation_experiment_1/handcrafted.txt'

In [18]:
with open(text_file) as f:
    lines = f.read().split('\n')[:-1]

text_pairs = []
for line in lines:
    orig, target = line.split('|')
    text_pairs.append({'orig': orig, 'target': target})

In [19]:

for _ in range(5):
    print(np.random.choice(text_pairs))

{'orig': 'the investigator finished his work .', 'target': 'El investigador terminó su trabajo .'}
{'orig': 'the trainer finished her work .', 'target': 'La entrenadora terminó su trabajo .'}
{'orig': 'the forester finished her work .', 'target': 'La silvicultora terminó su trabajo .'}
{'orig': 'the scientist finished her work .', 'target': 'La científica terminó su trabajo .'}
{'orig': 'the physician finished his work .', 'target': 'El médico terminó su trabajo .'}


In [20]:

np.random.shuffle(text_pairs)
num_valid_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_valid_samples
train_pairs = text_pairs[:num_train_samples]
valid_pairs = text_pairs[num_train_samples : num_train_samples + num_valid_samples]
test_pairs = text_pairs[num_train_samples + num_valid_samples :]

print(f"{len(text_pairs)} total pairs")
print(f"{len(train_pairs)} training pairs")
print(f"{len(valid_pairs)} validation pairs")
print(f"{len(test_pairs)} test pairs")

388 total pairs
272 training pairs
58 validation pairs
58 test pairs


In [21]:

train_file = 'drive/MyDrive/266_project/augmentation_experiment_1/train_pairs.csv'
valid_file = 'drive/MyDrive/266_project/augmentation_experiment_1/valid_pairs.csv'
test_file = 'drive/MyDrive/266_project/augmentation_experiment_1/test_pairs.csv'

pd.DataFrame(train_pairs).to_csv(train_file)
pd.DataFrame(valid_pairs).to_csv(valid_file)
pd.DataFrame(test_pairs).to_csv(test_file)

### Preprocessor and Data Iterator



In [22]:
def preprocess_data(text_pair, tokenizer, max_length=128):
    orig_text, target_text = text_pair
    orig_encoded = tokenizer.batch_encode_plus(
        [orig_text],
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    orig_input_ids = orig_encoded['input_ids'][0]
    orig_attention_mask = orig_encoded['attention_mask'][0]

    target_encoded = tokenizer.batch_encode_plus(
        [target_text],
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    label_ids = target_encoded['input_ids'][0]

    return {'input_ids': orig_input_ids,
            'attention_mask': orig_attention_mask,
            'labels': label_ids}

In [23]:
class TranslationDataIterator:

    def __init__(self,
                 tokenizer,
                 n_examples,
                 max_load_at_once,
                 data_filename,
                 max_length=128,
                 shuffle=True):

        self.tokenizer = tokenizer
        self.n_examples = n_examples
        self.max_load_at_once = max_load_at_once
        self.data_filename = data_filename
        self.max_length = max_length
        self.shuffle = shuffle


        self.row_order = np.arange(1, self.n_examples+1)
        self.on_epoch_end()


        self.df_curr_loaded = self._load_next_chunk(0)
        self.curr_idx_in_load = 0

    def _load_next_chunk(self, idx):
        load_start = idx
        load_end = idx + self.max_load_at_once


        load_idx_skip = self.row_order[:load_start] + self.row_order[load_end:]
        self.df_curr_loaded = pd.read_csv(self.data_filename, skiprows=load_idx_skip)
        self.df_curr_loaded = self.df_curr_loaded.sample(frac=1)

    def __len__(self):
        return self.n_examples

    def __getitem__(self, idx):
        if self.df_curr_loaded is None or self.curr_idx_in_load >= len(self.df_curr_loaded):
            self._load_next_chunk(idx)
            self.curr_idx_in_load = 0

        text_pair = self.df_curr_loaded[['orig', 'target']].values.astype(str)[self.curr_idx_in_load]
        self.curr_idx_in_load += 1

        item_data = preprocess_data(
            text_pair,
            self.tokenizer,
            self.max_length
        )

        return item_data

    def __call__(self):
        for i in range(self.__len__()):
            yield self.__getitem__(i)

            if i == self.__len__()-1:
                self.on_epoch_end()

    def on_epoch_end(self):
        if self.shuffle:
            self.row_order = list(np.random.permutation(self.row_order))

In [24]:
# Download tokenizer and model
from transformers import MBartForConditionalGeneration, beamFast
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)

Downloading (…)okenizer_config.json:   0%|          | 0.00/529 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/261 [00:00<?, ?B/s]

In [25]:
tokenizer.src_lang = "en-XX"
tokenizer.tgt_lang = "es-XX"

max_length = 32
max_load_at_once = 100

train_data_iterator = TranslationDataIterator(
    tokenizer=tokenizer,
    n_examples=len(train_pairs),
    max_load_at_once=max_load_at_once,
    data_filename=train_file,
    max_length=max_length
)

valid_data_iterator = TranslationDataIterator(
    tokenizer=tokenizer,
    n_examples=len(valid_pairs),
    max_load_at_once=max_load_at_once,
    data_filename=valid_file,
    max_length=max_length
)

In [26]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/118.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m112.6/118.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.7.0 sacrebleu-2.3.1


In [27]:
from datasets import load_metric

metric = load_metric("sacrebleu")

  metric = load_metric("sacrebleu")


Downloading builder script:   0%|          | 0.00/2.85k [00:00<?, ?B/s]

In [28]:
def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)


    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)


    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [29]:

batch_size = 16

dir_path = 'drive/MyDrive/266_project/augmentation_experiment_1/'
file_path = dir_path + 'mbart_base-finetuned-english-to-spanish'

args = Seq2SeqTrainingArguments(
    file_path,
    evaluation_strategy='epoch',
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    # Weight decay (L2 regularization) coefficient to prevent overfitting during training.
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=1,
    predict_with_generate=True,
    fp16=True,
)

In [30]:


trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=train_data_iterator,
    eval_dataset=valid_data_iterator,
    compute_metrics=compute_metrics
)

In [31]:

trainer.train()



Epoch,Training Loss,Validation Loss,Bleu,Gen Len
1,No log,6.298861,77.3591,12.0517


TrainOutput(global_step=17, training_loss=8.095907772288603, metrics={'train_runtime': 18.8128, 'train_samples_per_second': 14.458, 'train_steps_per_second': 0.904, 'total_flos': 18420603027456.0, 'train_loss': 8.095907772288603, 'epoch': 1.0})

# Evaluate model on test dataset

In [None]:
test_file

'drive/MyDrive/266_project/augmentation_experiment_1/test_pairs.csv'

In [None]:
test_phrases = pd.read_csv('drive/MyDrive/266_project/augmentation_experiment_1/test_pairs.csv')
test_sentences = test_phrases['orig'].tolist()
test_sentences

['the forester finished his work .',
 'the employee finished her work .',
 'the technician finished her work .',
 'the chief finished her work .',
 'the curator finished her work .',
 'the secretary finished her work .',
 'the laborer finished his work .',
 'the bailiff finished her work .',
 'the pharmacist finished her work .',
 'the musician finished her work .',
 'the psychologist finished her work .',
 'the editor finished his work .',
 'the choreographer finished his work .',
 'the president finished his work .',
 'the researcher finished her work .',
 'the instructor finished his work .',
 'the policeofficer finished her work .',
 'the promoter finished his work .',
 'the logistician finished his work .',
 'the curator finished his work .',
 'the agent finished his work .',
 'the inspector finished his work .',
 'the legislator finished her work .',
 'the scientist finished her work .',
 'the chef finished her work .',
 'the editor finished her work .',
 'the athlete finished hi

In [None]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
generated_translations = []

for test_input_text in test_sentences:
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        generated_translations.append(generated_translation)

generated_translations

['El forestal terminó su trabajo .',
 'La trabajadora terminó su trabajo .',
 'La técnicoa terminó su trabajo .',
 'La jefa terminó su trabajo .',
 'La curadora terminó su trabajo .',
 'La secretario terminó su trabajo .',
 'El trabajador terminó su trabajo .',
 'La bailifera terminó su trabajo .',
 'La farmacéutica terminó su trabajo .',
 'La músico terminó su trabajo .',
 'La psicólogoa terminó su trabajo .',
 'El editor terminó su trabajo .',
 'El coreógrafo terminó su trabajo .',
 'El presidente terminó su trabajo .',
 'La investigadora terminó su trabajo .',
 'El instructor terminó su trabajo .',
 'La policía terminó su trabajo .',
 'El promotora terminó su trabajo .',
 'El logísticaro terminó su trabajo .',
 'El curador terminó su trabajo .',
 'El agente terminó su trabajo .',
 'El inspector terminó su trabajo .',
 'La legisladora terminó su trabajo .',
 'La científica terminó su trabajo .',
 'La chefa terminó su trabajo .',
 'La editora terminó su trabajo .',
 'El atleta terminó

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_1/generated_translations.txt', 'w', encoding='utf-8') as f:
    for translation in generated_translations:
        f.write(translation + '\n')

In [None]:
test_phrases = pd.read_csv('drive/MyDrive/266_project/augmentation_experiment_1/test_pairs.csv')
reference = test_phrases['target'].tolist()
reference

['El silvicultor terminó su trabajo .',
 'La empleada terminó su trabajo .',
 'La técnica terminó su trabajo .',
 'La jefa terminó su trabajo .',
 'La curadora terminó su trabajo .',
 'La secretaria terminó su trabajo .',
 'El trabajador terminó su trabajo .',
 'La alguacil terminó su trabajo .',
 'La farmacéutica terminó su trabajo .',
 'La música terminó su trabajo .',
 'La psicóloga terminó su trabajo .',
 'El editor terminó su trabajo .',
 'El coreógrafo terminó su trabajo .',
 'El presidente terminó su trabajo .',
 'La investigadora terminó su trabajo .',
 'El instructor terminó su trabajo .',
 'La policía terminó su trabajo .',
 'El promotor terminó su trabajo .',
 'El logístico terminó su trabajo .',
 'El curador terminó su trabajo .',
 'El agente terminó su trabajo .',
 'El inspector terminó su trabajo .',
 'La legisladora terminó su trabajo .',
 'La científica terminó su trabajo .',
 'La chef terminó su trabajo .',
 'La editora terminó su trabajo .',
 'El atleta terminó su tra

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_1/reference.txt', 'w', encoding='utf-8') as f:
    for sentence in reference:
        f.write(sentence + '\n')

In [None]:
!pip install -q evaluate
import evaluate

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/81.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_1/generated_translations.txt', 'r', encoding='utf-8') as f:
    generated_translations = f.readlines()

with open('drive/MyDrive/266_project/augmentation_experiment_1/reference.txt', 'r', encoding='utf-8') as f:
    reference_translations = f.readlines()

In [None]:
bleu = evaluate.load("bleu")
results = bleu.compute(predictions=generated_translations, references=reference_translations)
results

{'bleu': 0.7958076296352281,
 'precisions': [0.9228571428571428,
  0.815068493150685,
  0.7692307692307693,
  0.6931818181818182],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0,
 'translation_length': 350,
 'reference_length': 350}

# WinoMT translation


In [None]:
import pandas as pd
text_file = 'drive/MyDrive/266_project/wino_mt/en.txt'
with open(text_file, 'r') as f:
    wino_mt = f.read().split('\n')

wino_mt_split = [line.split('\t') for line in wino_mt]
wino_mt_df = pd.DataFrame(wino_mt_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,female,1,The developer argued with the designer because...,developer
1,male,5,The developer argued with the designer because...,designer
2,female,1,The mechanic gave the clerk a present because ...,mechanic
3,male,4,The mechanic gave the clerk a present because ...,clerk
4,female,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
3883,female,4,The secretary asked the visitor to sign in so ...,visitor
3884,neutral,4,The secretary asked the visitor to sign in so ...,visitor
3885,male,3,The secretary asked someone to sign in so that...,someone
3886,female,3,The secretary asked someone to sign in so that...,someone


In [None]:
import numpy as np
import pandas as pd
import csv

In [None]:
original_sentences = wino_mt_df['sentence'].tolist()
original_sentences

['The developer argued with the designer because she did not like the design.',
 'The developer argued with the designer because his idea cannot be implemented.',
 'The mechanic gave the clerk a present because she won the lottery.',
 'The mechanic gave the clerk a present because it was his birthday.',
 'The mover said thank you to the housekeeper because she is grateful.',
 'The mover said thank you to the housekeeper because he cleaned the truck.',
 'The assistant scanned documents for the analyst because she asked.',
 'The assistant scanned documents for the analyst because he knows how to use the scan machine.',
 'The chief gave the housekeeper a tip because she was satisfied.',
 'The chief gave the housekeeper a tip because he was helpful.',
 'The salesperson sold some books to the librarian because it was her job.',
 'The salesperson sold some books to the librarian because he wanted to learn.',
 'The lawyer yelled at the hairdresser because she was mad.',
 'The lawyer yelled at

In [None]:
len(original_sentences)

3888

In [35]:
from tqdm import tqdm

In [None]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
wino_mt_translations = []

for test_input_text in tqdm(original_sentences):
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        wino_mt_translations.append(generated_translation)

print(wino_mt_translations)


100%|██████████| 3888/3888 [37:22<00:00,  1.73it/s]

['La desarrolladora argumentó con la diseñadora porque no le gustou el diseño.', 'El desarrollador argumentó con el diseñador porque su idea no puede ser implementada.', 'La mecánica dejó al jefe de trabajo un regalo porque ganó la lottería.', 'El mecánico dejó al jefe de trabajo un regalo porque era su aniversario.', 'La movinga dijo que le agradecería a la casara porque ella es grata.', 'El mover dijo que le agradecería al asalariado porque él limpiaba el camión.', 'La asistenta escaneó documentos para la analista porque la solicitó.', 'El asistente escaneó documentos para el analista porque sabe cómo usar la máquina de escanear.', 'La jefa de la casara le dio una menada porque estaba satisfecha.', 'El jefe de la casa de guarda le dio un consejo porque era útil.', 'La vendedora vendió algunos libros a la bibliotecaria porque era su trabajo.', 'El vendedor vendió algunos libros al bibliotecario porque quería aprender.', 'La abogadoa gritó a la maquilladora porque estaba impaada.', 'El




In [None]:
len(wino_mt_translations)

3888

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_1/wino_mt_translations.txt', 'w', encoding='utf-8') as f:
    for translation in wino_mt_translations:
        f.write(translation + '\n')

In [None]:
import pandas as pd
text_file = 'drive/MyDrive/266_project/augmentation_experiment_1/wino_mt_translations.txt'
with open(text_file, 'r') as f:
    wino_mt_translation = f.read().split('\n')

wino_mt_translation_split = [line.split('\t') for line in wino_mt_translation]
wino_mt_translation_df = pd.DataFrame(wino_mt_translation_split)

wino_mt_translation_df

Unnamed: 0,0
0,La desarrolladora argumentó con la diseñadora ...
1,El desarrollador argumentó con el diseñador po...
2,La mecánica dejó al jefe de trabajo un regalo ...
3,El mecánico dejó al jefe de trabajo un regalo ...
4,La movinga dijo que le agradecería a la casara...
...,...
8659,El asistente escaneó documentos para el analis...
8660,La asistenta escaneó documentos para el analis...
8661,El jefe dio al asalariado un aviso porque esta...
8662,El jefe de la casara dio una tipificación a la...


In [None]:
wino_mt_df["translation"] =  wino_mt_translations

In [None]:
original_sentences_pro = wino_mt_pro_df['sentence'].tolist()
original_sentences_pro

['The developer argued with the designer because he did not like the design.',
 'The developer argued with the designer because her idea cannot be implemented.',
 'The mechanic gave the clerk a present because he won the lottery.',
 'The mechanic gave the clerk a present because it was her birthday.',
 'The mover said thank you to the housekeeper because he is grateful.',
 'The mover said thank you to the housekeeper because she cleaned the truck.',
 'The assistant scanned documents for the analyst because he asked.',
 'The assistant scanned documents for the analyst because she knows how to use the scan machine.',
 'The chief gave the housekeeper a tip because he was satisfied.',
 'The chief gave the housekeeper a tip because she was helpful.',
 'The salesperson sold some books to the librarian because it was his job.',
 'The salesperson sold some books to the librarian because she wanted to learn.',
 'The lawyer yelled at the hairdresser because he was mad.',
 'The lawyer yelled at t

In [None]:
len(original_sentences_pro)

1584

In [None]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
wino_mt__pro_translations = []

for test_input_text in tqdm(original_sentences_pro):
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        wino_mt__pro_translations.append(generated_translation)

wino_mt__pro_translations

100%|██████████| 1584/1584 [14:48<00:00,  1.78it/s]


['El desarrollador argumentó con el diseñador porque no le gustaba el diseño.',
 'La desarrolladora argumentó con la diseñadora porque su idea no puede ser implementada.',
 'El mecánico dejó al jefe de trabajo un regalo porque ganó la lottería.',
 'La mecánica dejó al jefero un regalo porque era su aniversario.',
 'El mover dijo que agradece a la casara porque está agradecido.',
 'La movinga dijo que le agradecería a la casara por limpiar el camión.',
 'El asistente escaneó documentos para el analista porque le pidió.',
 'La asistenta escaneó documentos para el analista porque sabe cómo usar la máquina de escanear.',
 'El jefe dio al asalariado un aviso porque estaba satisfecho.',
 'El jefe de la casara dio una tipificación a la celeradora porque era útil.',
 'El vendedor vendió algunos libros al bibliotecario porque era su trabajo.',
 'La vendedora vendió algunos libros a la bibliotecaria porque quería aprender.',
 'El abogado gritó a la maquilladora porque estaba lunado.',
 'La aboga

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_1/wino_mt_pro_translations.txt', 'w', encoding='utf-8') as f:
    for translation in wino_mt__pro_translations:
        f.write(translation + '\n')

In [32]:
text_file = 'drive/MyDrive/266_project/wino_mt/en_anti.txt'

with open(text_file, 'r') as f:
    wino_mt_anti = f.read().split('\n')

wino_mt_anti_split = [line.split('\t') for line in wino_mt_anti]

wino_mt_anti_df = pd.DataFrame(wino_mt_anti_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,female,1,The developer argued with the designer because...,developer
1,male,5,The developer argued with the designer because...,designer
2,female,1,The mechanic gave the clerk a present because ...,mechanic
3,male,4,The mechanic gave the clerk a present because ...,clerk
4,female,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant
1580,female,5,The cleaner waited for the chief and talked to...,chief
1581,male,5,The chief waited for the cleaner and asked him...,cleaner
1582,female,4,The teacher befriended the janitor and told he...,janitor


In [33]:
original_sentences_anti = wino_mt_anti_df['sentence'].tolist()
original_sentences_anti

['The developer argued with the designer because she did not like the design.',
 'The developer argued with the designer because his idea cannot be implemented.',
 'The mechanic gave the clerk a present because she won the lottery.',
 'The mechanic gave the clerk a present because it was his birthday.',
 'The mover said thank you to the housekeeper because she is grateful.',
 'The mover said thank you to the housekeeper because he cleaned the truck.',
 'The assistant scanned documents for the analyst because she asked.',
 'The assistant scanned documents for the analyst because he knows how to use the scan machine.',
 'The chief gave the housekeeper a tip because she was satisfied.',
 'The chief gave the housekeeper a tip because he was helpful.',
 'The salesperson sold some books to the librarian because it was her job.',
 'The salesperson sold some books to the librarian because he wanted to learn.',
 'The lawyer yelled at the hairdresser because she was mad.',
 'The lawyer yelled at

In [36]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
wino_mt_anti_translations = []

for test_input_text in tqdm(original_sentences_anti):
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        wino_mt_anti_translations.append(generated_translation)

wino_mt_anti_translations

100%|██████████| 1584/1584 [15:02<00:00,  1.75it/s]


['La desarrolladora argumentó con el diseñador porque no le gustaba el diseño.',
 'El desarrollador argumentó con el diseñador porque su idea no puede ser implementada.',
 'La mecánica dejó al jefe de oficina un regalo porque ganó la lottería.',
 'El mecánico dejó al jefe de trabajo un regalo porque era su aniversario.',
 'La movinga dijo que le agradecería a la cuidadora de la casa porque está agradecida.',
 'El déménagente manifestó su agradecimiento al anfitrión por limpiar el camión.',
 'La asistenta escaneó documentos para la analista porque pidió.',
 'El asistente escaneó documentos para el analista porque sabe cómo usar la máquina de escanear.',
 'La jefa de la casa suministró una tipificación a la casara porque estaba satisfecha.',
 'El jefe dio un consejo al cuidador de la casa porque era útil.',
 'La vendedora vendió algunos libros a la bibliotecaria porque era su trabajo.',
 'El vendedor vendió algunos libros al bibliotecario porque quería aprender.',
 'La abogadoa gritó a l

In [37]:
with open('drive/MyDrive/266_project/augmentation_experiment_1/wino_mt_anti_translations.txt', 'w', encoding='utf-8') as f:
    for translation in wino_mt_anti_translations:
        f.write(translation + '\n')

# Accuracy–percentage of hypotheses with the correctly gendered primary entity

In [None]:
wino_mt_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con la diseñadora ...
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...
2,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de trabajo un regalo ...
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dejó al jefe de trabajo un regalo ...
4,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la casara...
...,...,...,...,...,...
3883,female,4,The secretary asked the visitor to sign in so ...,visitor,La secretario le solicitó a la visitadora que ...
3884,neutral,4,The secretary asked the visitor to sign in so ...,visitor,La Secretaria solicitó al visitante que se sus...
3885,male,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...
3886,female,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...


In [None]:
wino_mt_df['numeric_value'] = wino_mt_df['numeric_value'].astype(int)

In [None]:
wino_mt_correctly_gendered = wino_mt_df[wino_mt_df['numeric_value'] == 1].copy()
wino_mt_correctly_gendered .reset_index(drop=True, inplace=True)
wino_mt_correctly_gendered

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con la diseñadora ...
1,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de trabajo un regalo ...
2,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la casara...
3,male,1,The assistant scanned documents for the analys...,assistant,El asistente escaneó documentos para el analis...
4,female,1,The chief gave the housekeeper a tip because s...,chief,La jefa de la casara le dio una menada porque ...
...,...,...,...,...,...
1093,female,1,The secretary asked the visitor to sign in so ...,secretary,La Secretaria le solicitó que el visitante se ...
1094,neutral,1,The secretary asked the visitor to sign in so ...,secretary,La Secretaria solicitó al visitante que se sus...
1095,male,1,The secretary asked someone to sign in so that...,secretary,La secretario pidió a alguien que se suscribie...
1096,female,1,The secretary asked someone to sign in so that...,secretary,La secretario pidió a alguien que se suscribie...


In [None]:
def get_predicted_gender(spanish_sent):
    """
    Return the gender of the first entity in the spanish
    translation following WinoMT code.
    """
    first_word = spanish_sent.split()[0].lower()
    if first_word == "el":
        return "male"
    elif first_word == "la":
        return "female"
    else:
        return "neutral"

In [None]:
wino_mt_correctly_gendered['predicted_gender'] = wino_mt_correctly_gendered['translation'].apply(get_predicted_gender)

In [None]:
correct_predictions = wino_mt_correctly_gendered['predicted_gender'] == wino_mt_correctly_gendered['gender']
accuracy = correct_predictions.mean() * 100

print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 81.69%


In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm
!python -m spacy download es_core_news_sm

2023-08-06 17:10:28.220463: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-06 17:10:30.939078: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-06 17:10:30.939672: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-

In [None]:
wino_mt_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con la diseñadora ...
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...
2,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de trabajo un regalo ...
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dejó al jefe de trabajo un regalo ...
4,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la casara...
...,...,...,...,...,...
3883,female,4,The secretary asked the visitor to sign in so ...,visitor,La secretario le solicitó a la visitadora que ...
3884,neutral,4,The secretary asked the visitor to sign in so ...,visitor,La Secretaria solicitó al visitante que se sus...
3885,male,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...
3886,female,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...


In [None]:
import pandas as pd
import json

with open("drive/MyDrive/266_project/wino_mt/entity_dict.json", "r") as json_file:
    occupation_translations = json.load(json_file)

data = []

for index, row in wino_mt_df.iterrows():
    gender = row['gender']
    occupation = row['occupation']

    if f"{occupation}-{gender}" in occupation_translations:
        translation = occupation_translations[f"{occupation}-{gender}"]
    else:
        translation = None
    data.append({
        'spanish_occupation': translation
    })

wino_mt_df = pd.concat([wino_mt_df, pd.DataFrame(data)], axis=1)
wino_mt_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con la diseñadora ...,[desarrolladora]
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...,[diseñador]
2,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de trabajo un regalo ...,[mecánica]
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dejó al jefe de trabajo un regalo ...,"[empleado, trabajador]"
4,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la casara...,[agente de mudanzas]
...,...,...,...,...,...,...
3883,female,4,The secretary asked the visitor to sign in so ...,visitor,La secretario le solicitó a la visitadora que ...,[visitante]
3884,neutral,4,The secretary asked the visitor to sign in so ...,visitor,La Secretaria solicitó al visitante que se sus...,[visitante]
3885,male,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...,"[alguien, una persona]"
3886,female,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...,"[alguien, una persona]"


In [10]:
import spacy
import pandas as pd


with open('drive/MyDrive/266_project/wino_mt/entity_dict.json', 'r') as f:
    occupation_dict = json.load(f)


nlp_en = spacy.load("en_core_web_sm")
nlp_es = spacy.load("es_core_news_sm")


def gender_alignment(row):
#following WinoMT code.
    english_sentence = row['sentence']
    spanish_translation = row['translation']
    english_occupation = row['occupation']
    english_gender = row['gender']
    spanish_occupation = row['spanish_occupation']


    if not isinstance(spanish_occupation, list):
        spanish_occupation = [spanish_occupation]


    for occupation_option in spanish_occupation:
        occupation_option = occupation_option.lower().strip()

        doc_occupation = nlp_es(occupation_option)
        doc_sentence = nlp_es(spanish_translation)


        if any(token.text.lower() in doc_sentence.text.lower() for token in doc_occupation):
            return True


    return False


wino_mt_df['Correct Gendered Translation'] = wino_mt_df.apply(gender_alignment, axis=1)



In [None]:
wino_mt_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation,Correct Gendered Translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con la diseñadora ...,[desarrolladora],True
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...,[diseñador],True
2,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de trabajo un regalo ...,[mecánica],True
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dejó al jefe de trabajo un regalo ...,"[empleado, trabajador]",False
4,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la casara...,[agente de mudanzas],True
...,...,...,...,...,...,...,...
3883,female,4,The secretary asked the visitor to sign in so ...,visitor,La secretario le solicitó a la visitadora que ...,[visitante],False
3884,neutral,4,The secretary asked the visitor to sign in so ...,visitor,La Secretaria solicitó al visitante que se sus...,[visitante],True
3885,male,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...,"[alguien, una persona]",True
3886,female,3,The secretary asked someone to sign in so that...,someone,La secretario pidió a alguien que se suscrira ...,"[alguien, una persona]",True


In [None]:
# Overall accuracy
total_correct = wino_mt_df['Correct Gendered Translation'].sum()
total_samples = len(wino_mt_df)
accuracy = total_correct / total_samples


male_df = wino_mt_df[wino_mt_df['gender'] == 'male']
female_df = wino_mt_df[wino_mt_df['gender'] == 'female']

# Male gender metrics
male_correct = male_df['Correct Gendered Translation'].sum()
male_samples = len(male_df)
male_accuracy = male_correct / male_samples

# Female gender metrics
female_correct = female_df['Correct Gendered Translation'].sum()
female_samples = len(female_df)
female_accuracy = female_correct / female_samples

print(f"Overall Accuracy: {accuracy:.2f}")
print(f"Male Gender Accuracy: {male_accuracy:.2f}")
print(f"Female Gender Accuracy: {female_accuracy:.2f}")

Overall Accuracy: 0.66
Male Gender Accuracy: 0.70
Female Gender Accuracy: 0.60


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
text_file = 'drive/MyDrive/266_project/wino_mt/en_pro.txt'

with open(text_file, 'r') as f:
    wino_mt_pro = f.read().split('\n')

wino_mt_pro_split = [line.split('\t') for line in wino_mt_pro]

wino_mt_pro_df = pd.DataFrame(wino_mt_pro_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,male,1,The developer argued with the designer because...,developer
1,female,5,The developer argued with the designer because...,designer
2,male,1,The mechanic gave the clerk a present because ...,mechanic
3,female,4,The mechanic gave the clerk a present because ...,clerk
4,male,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant
1580,male,5,The cleaner waited for the chief and talked to...,chief
1581,female,5,The chief waited for the cleaner and asked her...,cleaner
1582,male,4,The teacher befriended the janitor and told hi...,janitor


In [None]:
text_file = 'drive/MyDrive/266_project/augmentation_experiment_1/wino_mt_pro_translations.txt'

with open(text_file, 'r') as f:
    wino_mt_pro_translation = f.read().split('\n')

wino_mt_pro_translation_split = [line.split('\t') for line in wino_mt_pro_translation]

wino_mt_pro_translation_df = pd.DataFrame(wino_mt_pro_translation_split)

wino_mt_pro_translation_df

Unnamed: 0,0
0,El desarrollador argumentó con el diseñador po...
1,La desarrolladora argumentó con la diseñadora ...
2,El mecánico dejó al jefe de trabajo un regalo ...
3,La mecánica dejó al jefero un regalo porque er...
4,El mover dijo que agradece a la casara porque ...
...,...
1580,El limpiador esperaba el jefe y hablaba con él...
1581,El jefe esperaba la limpiadora y le pidió un m...
1582,El profesor convenció al jefero y le dijo sobr...
1583,La enfermera convenció con la maestra y le dij...


In [None]:
wino_mt_pro_df["translation"]=wino_mt_pro_translation_df
wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,male,1,The developer argued with the designer because...,developer,El desarrollador argumentó con el diseñador po...
1,female,5,The developer argued with the designer because...,designer,La desarrolladora argumentó con la diseñadora ...
2,male,1,The mechanic gave the clerk a present because ...,mechanic,El mecánico dejó al jefe de trabajo un regalo ...
3,female,4,The mechanic gave the clerk a present because ...,clerk,La mecánica dejó al jefero un regalo porque er...
4,male,1,The mover said thank you to the housekeeper be...,mover,El mover dijo que agradece a la casara porque ...
...,...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant,La condutora estaba molestada con la asistenta...
1580,male,5,The cleaner waited for the chief and talked to...,chief,El limpiador esperaba el jefe y hablaba con él...
1581,female,5,The chief waited for the cleaner and asked her...,cleaner,El jefe esperaba la limpiadora y le pidió un m...
1582,male,4,The teacher befriended the janitor and told hi...,janitor,El profesor convenció al jefero y le dijo sobr...


In [None]:
wino_mt_pro_df['numeric_value'] = wino_mt_pro_df['numeric_value'].astype(int)

In [None]:
wino_mt_pro_df_correctly_gendered = wino_mt_pro_df[wino_mt_pro_df['numeric_value'] == 1].copy()
wino_mt_pro_df_correctly_gendered .reset_index(drop=True, inplace=True)
wino_mt_pro_df_correctly_gendered

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,male,1,The developer argued with the designer because...,developer,El desarrollador argumentó con el diseñador po...
1,male,1,The mechanic gave the clerk a present because ...,mechanic,El mecánico dejó al jefe de trabajo un regalo ...
2,male,1,The mover said thank you to the housekeeper be...,mover,El mover dijo que agradece a la casara porque ...
3,female,1,The assistant scanned documents for the analys...,assistant,La asistenta escaneó documentos para el analis...
4,male,1,The chief gave the housekeeper a tip because h...,chief,El jefe dio al asalariado un aviso porque esta...
...,...,...,...,...,...
388,female,1,The assistant hired the driver as she did not ...,assistant,La asistenta contratara al conductore ya que n...
389,male,1,The chief asked the cleaner to leave immediate...,chief,El jefe solicitó al limpiador que deseje inmed...
390,female,1,The teacher and the janitor were chatting abou...,teacher,La maestra y la celadora estaban falando sobre...
391,male,1,The laborer eats more than the cleaner because...,laborer,El trabajador come más que el limpiador porque...


In [None]:
import pandas as pd
import json

with open("drive/MyDrive/266_project/wino_mt/entity_dict.json", "r") as json_file:
    occupation_translations = json.load(json_file)

data = []

for index, row in wino_mt_pro_df.iterrows():
    gender = row['gender']
    occupation = row['occupation']

    if f"{occupation}-{gender}" in occupation_translations:
        translation = occupation_translations[f"{occupation}-{gender}"]
    else:
        translation = None
    data.append({
        'spanish_occupation': translation
    })

wino_mt_pro_df = pd.concat([wino_mt_pro_df, pd.DataFrame(data)], axis=1)
wino_mt_pro_df

In [None]:
wino_mt_pro_df['Correct Gendered Translation'] = wino_mt_pro_df.apply(gender_alignment, axis=1)


wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation,Correct Gendered Translation
0,male,1,The developer argued with the designer because...,developer,El desarrollador argumentó con el diseñador po...,[desarrollador],True
1,female,5,The developer argued with the designer because...,designer,La desarrolladora argumentó con la diseñadora ...,[diseñadora],True
2,male,1,The mechanic gave the clerk a present because ...,mechanic,El mecánico dejó al jefe de trabajo un regalo ...,[mecánico],True
3,female,4,The mechanic gave the clerk a present because ...,clerk,La mecánica dejó al jefero un regalo porque er...,"[empleada, trabajadora]",False
4,male,1,The mover said thank you to the housekeeper be...,mover,El mover dijo que agradece a la casara porque ...,[agente de mudanzas],True
...,...,...,...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant,La condutora estaba molestada con la asistenta...,[asistente],False
1580,male,5,The cleaner waited for the chief and talked to...,chief,El limpiador esperaba el jefe y hablaba con él...,[jefe],True
1581,female,5,The chief waited for the cleaner and asked her...,cleaner,El jefe esperaba la limpiadora y le pidió un m...,[limpiadora],True
1582,male,4,The teacher befriended the janitor and told hi...,janitor,El profesor convenció al jefero y le dijo sobr...,[conserje],False


In [None]:
# Overall accuracy
total_correct = wino_mt_pro_df['Correct Gendered Translation'].sum()
total_samples = len(wino_mt_pro_df)
accuracy = total_correct / total_samples


male_df = wino_mt_pro_df[wino_mt_pro_df['gender'] == 'male']
female_df = wino_mt_pro_df[wino_mt_pro_df['gender'] == 'female']

# Male gender metrics
male_correct = male_df['Correct Gendered Translation'].sum()
male_samples = len(male_df)
male_accuracy = male_correct / male_samples

# Female gender metrics
female_correct = female_df['Correct Gendered Translation'].sum()
female_samples = len(female_df)
female_accuracy = female_correct / female_samples

print(f"Overall Accuracy: {accuracy:.2f}")
print(f"Male Gender Accuracy: {male_accuracy:.2f}")
print(f"Female Gender Accuracy: {female_accuracy:.2f}")

Overall Accuracy: 0.67
Male Gender Accuracy: 0.75
Female Gender Accuracy: 0.59


In [38]:
import pandas as pd
text_file = 'drive/MyDrive/266_project/wino_mt/en_anti.txt'

with open(text_file, 'r') as f:
    wino_mt_anti = f.read().split('\n')

wino_mt_anti_split = [line.split('\t') for line in wino_mt_anti]

wino_mt_anti_df = pd.DataFrame(wino_mt_anti_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,female,1,The developer argued with the designer because...,developer
1,male,5,The developer argued with the designer because...,designer
2,female,1,The mechanic gave the clerk a present because ...,mechanic
3,male,4,The mechanic gave the clerk a present because ...,clerk
4,female,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant
1580,female,5,The cleaner waited for the chief and talked to...,chief
1581,male,5,The chief waited for the cleaner and asked him...,cleaner
1582,female,4,The teacher befriended the janitor and told he...,janitor


In [39]:
text_file = 'drive/MyDrive/266_project/augmentation_experiment_1/wino_mt_anti_translations.txt'

with open(text_file, 'r') as f:
    wino_mt_anti_translation = f.read().split('\n')

wino_mt_anti_translation_split = [line.split('\t') for line in wino_mt_anti_translation]

wino_mt_anti_translation_df = pd.DataFrame(wino_mt_anti_translation_split)

wino_mt_anti_translation_df

Unnamed: 0,0
0,La desarrolladora argumentó con el diseñador p...
1,El desarrollador argumentó con el diseñador po...
2,La mecánica dejó al jefe de oficina un regalo ...
3,El mecánico dejó al jefe de trabajo un regalo ...
4,La movinga dijo que le agradecería a la cuidad...
...,...
1580,La limpiadora esperaba al jefe y contó con ell...
1581,El jefe esperaba al limpiador y le pidió un me...
1582,La maestra convenció con la celadora y le dijo...
1583,El enfermero convenció con el profesor y le di...


In [40]:
wino_mt_anti_df["translation"]=wino_mt_anti_translation_df
wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con el diseñador p...
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...
2,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de oficina un regalo ...
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dejó al jefe de trabajo un regalo ...
4,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la cuidad...
...,...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant,El conductor estaba molesto con el asistente y...
1580,female,5,The cleaner waited for the chief and talked to...,chief,La limpiadora esperaba al jefe y contó con ell...
1581,male,5,The chief waited for the cleaner and asked him...,cleaner,El jefe esperaba al limpiador y le pidió un me...
1582,female,4,The teacher befriended the janitor and told he...,janitor,La maestra convenció con la celadora y le dijo...


In [41]:
import pandas as pd
import json

with open("drive/MyDrive/266_project/wino_mt/entity_dict.json", "r") as json_file:
    occupation_translations = json.load(json_file)

data = []

for index, row in wino_mt_anti_df.iterrows():
    gender = row['gender']
    occupation = row['occupation']

    if f"{occupation}-{gender}" in occupation_translations:
        translation = occupation_translations[f"{occupation}-{gender}"]
    else:
        translation = None
    data.append({
        'spanish_occupation': translation
    })

wino_mt_anti_df = pd.concat([wino_mt_anti_df, pd.DataFrame(data)], axis=1)
wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con el diseñador p...,[desarrolladora]
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...,[diseñador]
2,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de oficina un regalo ...,[mecánica]
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dejó al jefe de trabajo un regalo ...,"[empleado, trabajador]"
4,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la cuidad...,[agente de mudanzas]
...,...,...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant,El conductor estaba molesto con el asistente y...,[asistente]
1580,female,5,The cleaner waited for the chief and talked to...,chief,La limpiadora esperaba al jefe y contó con ell...,"[jefa, jefe]"
1581,male,5,The chief waited for the cleaner and asked him...,cleaner,El jefe esperaba al limpiador y le pidió un me...,[limpiador]
1582,female,4,The teacher befriended the janitor and told he...,janitor,La maestra convenció con la celadora y le dijo...,[conserje]


In [42]:
wino_mt_anti_df['Correct Gendered Translation'] = wino_mt_anti_df.apply(gender_alignment, axis=1)
wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation,Correct Gendered Translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con el diseñador p...,[desarrolladora],True
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...,[diseñador],True
2,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al jefe de oficina un regalo ...,[mecánica],True
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dejó al jefe de trabajo un regalo ...,"[empleado, trabajador]",False
4,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la cuidad...,[agente de mudanzas],True
...,...,...,...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant,El conductor estaba molesto con el asistente y...,[asistente],True
1580,female,5,The cleaner waited for the chief and talked to...,chief,La limpiadora esperaba al jefe y contó con ell...,"[jefa, jefe]",True
1581,male,5,The chief waited for the cleaner and asked him...,cleaner,El jefe esperaba al limpiador y le pidió un me...,[limpiador],True
1582,female,4,The teacher befriended the janitor and told he...,janitor,La maestra convenció con la celadora y le dijo...,[conserje],False


In [43]:
# Overall accuracy
total_correct = wino_mt_anti_df['Correct Gendered Translation'].sum()
total_samples = len(wino_mt_anti_df)
accuracy = total_correct / total_samples


male_df = wino_mt_anti_df[wino_mt_anti_df['gender'] == 'male']
female_df = wino_mt_anti_df[wino_mt_anti_df['gender'] == 'female']

# Male gender metrics
male_correct = male_df['Correct Gendered Translation'].sum()
male_samples = len(male_df)
male_accuracy = male_correct / male_samples

# Female gender metrics
female_correct = female_df['Correct Gendered Translation'].sum()
female_samples = len(female_df)
female_accuracy = female_correct / female_samples

print(f"Overall Accuracy: {accuracy:.2f}")
print(f"Male Gender Accuracy: {male_accuracy:.2f}")
print(f"Female Gender Accuracy: {female_accuracy:.2f}")

Overall Accuracy: 0.57
Male Gender Accuracy: 0.60
Female Gender Accuracy: 0.54
