In [1]:
!pip install datasets transformers==4.28.0

Collecting datasets
  Downloading datasets-2.14.3-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.1/519.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers==4.28.0
  Downloading transformers-4.28.0-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0 (from transformers==4.28.0)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.28.0)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datase

In [2]:
!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.3 MB[0m [31m2.3 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.3/1.3 MB[0m [31m23.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [3]:
import os
import re
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import torch
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration, TFT5ForConditionalGeneration
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

### Data



In [4]:

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:

text_file = 'drive/MyDrive/266_project/augmentation_experiment_2/handcrafted-nooverlap-enes.txt'

In [6]:
with open(text_file) as f:
    lines = f.read().split('\n')[:-1]

text_pairs = []
for line in lines:
    orig, target = line.split('|')
    text_pairs.append({'orig': orig, 'target': target})

In [7]:

for _ in range(5):
    print(np.random.choice(text_pairs))

{'orig': 'the fat man finished his work .', 'target': 'El hombre gordo terminó su trabajo .'}
{'orig': 'the broker finished her work .', 'target': 'La corredora terminó su trabajo .'}
{'orig': 'the lucky man finished his work .', 'target': 'El hombre afortunado terminó su trabajo .'}
{'orig': 'the confident man finished his work .', 'target': 'El hombre confiado terminó su trabajo .'}
{'orig': 'the politician finished his work .', 'target': 'El político terminó su trabajo .'}


In [8]:

np.random.shuffle(text_pairs)
num_valid_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_valid_samples
train_pairs = text_pairs[:num_train_samples]
valid_pairs = text_pairs[num_train_samples : num_train_samples + num_valid_samples]
test_pairs = text_pairs[num_train_samples + num_valid_samples :]

print(f"{len(text_pairs)} total pairs")
print(f"{len(train_pairs)} training pairs")
print(f"{len(valid_pairs)} validation pairs")
print(f"{len(test_pairs)} test pairs")

388 total pairs
272 training pairs
58 validation pairs
58 test pairs


In [9]:

train_file = 'drive/MyDrive/266_project/augmentation_experiment_2/train_pairs.csv'
valid_file = 'drive/MyDrive/266_project/augmentation_experiment_2/valid_pairs.csv'
test_file = 'drive/MyDrive/266_project/augmentation_experiment_2/test_pairs.csv'

pd.DataFrame(train_pairs).to_csv(train_file)
pd.DataFrame(valid_pairs).to_csv(valid_file)
pd.DataFrame(test_pairs).to_csv(test_file)

### Preprocessor and Data Iterator

In [10]:
def preprocess_data(text_pair, tokenizer, max_length=128):
    orig_text, target_text = text_pair
    orig_encoded = tokenizer.batch_encode_plus(
        [orig_text],
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    orig_input_ids = orig_encoded['input_ids'][0]
    orig_attention_mask = orig_encoded['attention_mask'][0]

    target_encoded = tokenizer.batch_encode_plus(
        [target_text],
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    label_ids = target_encoded['input_ids'][0]

    return {'input_ids': orig_input_ids,
            'attention_mask': orig_attention_mask,
            'labels': label_ids}

In [11]:
class TranslationDataIterator:

    def __init__(self,
                 tokenizer,
                 n_examples,
                 max_load_at_once,
                 data_filename,
                 max_length=128,
                 shuffle=True):

        self.tokenizer = tokenizer
        self.n_examples = n_examples
        self.max_load_at_once = max_load_at_once
        self.data_filename = data_filename
        self.max_length = max_length
        self.shuffle = shuffle


        self.row_order = np.arange(1, self.n_examples+1)
        self.on_epoch_end()


        self.df_curr_loaded = self._load_next_chunk(0)
        self.curr_idx_in_load = 0

    def _load_next_chunk(self, idx):
        load_start = idx
        load_end = idx + self.max_load_at_once


        load_idx_skip = self.row_order[:load_start] + self.row_order[load_end:]
        self.df_curr_loaded = pd.read_csv(self.data_filename, skiprows=load_idx_skip)
        self.df_curr_loaded = self.df_curr_loaded.sample(frac=1)

    def __len__(self):
        return self.n_examples

    def __getitem__(self, idx):
        if self.df_curr_loaded is None or self.curr_idx_in_load >= len(self.df_curr_loaded):
            self._load_next_chunk(idx)
            self.curr_idx_in_load = 0

        text_pair = self.df_curr_loaded[['orig', 'target']].values.astype(str)[self.curr_idx_in_load]
        self.curr_idx_in_load += 1

        item_data = preprocess_data(
            text_pair,
            self.tokenizer,
            self.max_length
        )

        return item_data

    def __call__(self):
        for i in range(self.__len__()):
            yield self.__getitem__(i)

            if i == self.__len__()-1:
                self.on_epoch_end()

    def on_epoch_end(self):
        if self.shuffle:
            self.row_order = list(np.random.permutation(self.row_order))

The code below loads the pretrained T5 pytorch model and tokenizer, creates the data iterators for train and validation data (we have to pass in the tokenizer), then creates the training arguments and trainer objects that we'll use to fine-tune the pytorch model. You can see the main options you can change in the below code cells, like max length, max number of train examples to load at once in our iterator, and batch size. Reducing each of those will help reduce your memory usage if you're still having trouble with resources.

In [12]:

from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)

Downloading (…)okenizer_config.json:   0%|          | 0.00/529 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/261 [00:00<?, ?B/s]

In [13]:

tokenizer.src_lang = "en-XX"
tokenizer.tgt_lang = "es-XX"

max_length = 32
max_load_at_once = 100

train_data_iterator = TranslationDataIterator(
    tokenizer=tokenizer,
    n_examples=len(train_pairs),
    max_load_at_once=max_load_at_once,
    data_filename=train_file,
    max_length=max_length
)

valid_data_iterator = TranslationDataIterator(
    tokenizer=tokenizer,
    n_examples=len(valid_pairs),
    max_load_at_once=max_load_at_once,
    data_filename=valid_file,
    max_length=max_length
)

In [14]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.7.0 sacrebleu-2.3.1


In [15]:
from datasets import load_metric

metric = load_metric("sacrebleu")

  metric = load_metric("sacrebleu")


Downloading builder script:   0%|          | 0.00/2.85k [00:00<?, ?B/s]

In [16]:
import numpy as np

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)


    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)


    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [17]:


batch_size = 16


dir_path = 'drive/MyDrive/266_project/augmentation_experiment_2/'
file_path = dir_path + 'mbart_base-finetuned-english-to-spanish'

args = Seq2SeqTrainingArguments(
    file_path,
    evaluation_strategy='epoch',
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,

    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=1,
    predict_with_generate=True,
    fp16=True,
)

In [18]:

trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=train_data_iterator,
    eval_dataset=valid_data_iterator,
    compute_metrics=compute_metrics
)

In [19]:

trainer.train()



Epoch,Training Loss,Validation Loss,Bleu,Gen Len
1,No log,6.189423,73.745,12.5172


TrainOutput(global_step=17, training_loss=8.08153668571921, metrics={'train_runtime': 21.3107, 'train_samples_per_second': 12.764, 'train_steps_per_second': 0.798, 'total_flos': 18420603027456.0, 'train_loss': 8.08153668571921, 'epoch': 1.0})

### Does it seem to have worked?

Depending on your task, you'll add your own model evaluation after training. Here's a simple check to make sure it does seem to have fine-tuned T5 for this new task we defined.

In [20]:
test_file

'drive/MyDrive/266_project/augmentation_experiment_2/test_pairs.csv'

In [21]:
test_phrases = pd.read_csv('drive/MyDrive/266_project/augmentation_experiment_2/test_pairs.csv')
test_sentences = test_phrases['orig'].tolist()
test_sentences

['the policeofficer finished his work .',
 'the selfish woman finished her work .',
 'the happy woman finished her work .',
 'the ugly man finished his work .',
 'the assembler finished her work .',
 'the logistician finished her work .',
 'the impractical woman finished her work .',
 'the tactful man finished his work .',
 'the courier finished her work .',
 'the correspondent finished her work .',
 'the surveyor finished her work .',
 'the interpreter finished his work .',
 'the famous man finished his work .',
 'the groundskeeper finished her work .',
 'the musician finished her work .',
 'the clever man finished his work .',
 'the sensible man finished his work .',
 'the detective finished his work .',
 'the legislator finished his work .',
 'the quiet woman finished her work .',
 'the loyal woman finished her work .',
 'the punctual woman finished her work .',
 'the agent finished her work .',
 'the biologist finished his work .',
 'the coach finished his work .',
 'the aide finis

In [22]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
generated_translations = []

for test_input_text in test_sentences:
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        generated_translations.append(generated_translation)

print(generated_translations)




['El funcionario de policía terminó su trabajo .', 'La mujer egoísta terminó su trabajo .', 'La mujer feliz terminó su trabajo .', 'El hombre estúpido terminó su trabajo .', 'La ensambladora terminó su trabajo .', 'La tripuladora terminó su trabajo .', 'La mujer poco práctica terminó su trabajo .', 'El hombre táctil terminó su trabajo .', 'La correra terminó su trabajo .', 'La correspondente terminó su trabajo .', 'La investigadora terminó su trabajo .', 'El interpretador terminó su trabajo .', 'El hombre famoso terminó su trabajo .', 'La guarda forestal terminó su trabajo .', 'La músico terminó su trabajo .', 'El hombre inteligente terminó su trabajo .', 'El hombre sensible terminó su trabajo .', 'El investigador terminó su trabajo .', 'El legislador terminó su trabajo .', 'La mujer silenciosa terminó su trabajo .', 'La mujer fiel terminó su trabajo .', 'La mujer punctual terminó su trabajo .', 'La agente terminó su trabajo .', 'El biólogo terminó su trabajo .', 'El entrenador terminó

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_2/generated_translations.txt', 'w', encoding='utf-8') as f:
    for translation in generated_translations:
        f.write(translation + '\n')


In [None]:
test_phrases = pd.read_csv('drive/MyDrive/266_project/augmentation_experiment_2/test_pairs.csv')
reference = test_phrases['target'].tolist()
reference

['La intérprete terminó su trabajo .',
 'El hombre maleducado terminó su trabajo .',
 'El hombre cuidadoso terminó su trabajo .',
 'La conductora terminó su trabajo .',
 'La instaladora terminó su trabajo .',
 'El artista terminó su trabajo .',
 'El alguacil terminó su trabajo .',
 'La entrenadora terminó su trabajo .',
 'La árbitra terminó su trabajo .',
 'La mujer cariñosa terminó su trabajo .',
 'El economista terminó su trabajo .',
 'La mujer cautelosa terminó su trabajo .',
 'El solador terminó su trabajo .',
 'La mujer sabia terminó su trabajo .',
 'La encargada terminó su trabajo .',
 'El hombre descuidado terminó su trabajo .',
 'La mujer independiente terminó su trabajo .',
 'La política terminó su trabajo .',
 'El vidriero terminó su trabajo .',
 'La marinera terminó su trabajo .',
 'El hombre desconsiderado terminó su trabajo .',
 'El mensajero terminó su trabajo .',
 'El hombre inteligente terminó su trabajo .',
 'El profesor terminó su trabajo .',
 'La mujer indiscreta ter

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_1/reference.txt', 'w', encoding='utf-8') as f:
    for sentence in reference:
        f.write(sentence + '\n')

# Evaluation

In [None]:
!pip install -q evaluate
import evaluate

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/81.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m41.0/81.4 kB[0m [31m940.9 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# translations from the test set
with open('drive/MyDrive/266_project/augmentation_experiment_2/generated_translations.txt', 'r', encoding='utf-8') as f:
    generated_translations = f.readlines()

#  translations for the test set
with open('drive/MyDrive/266_project/augmentation_experiment_1/reference.txt', 'r', encoding='utf-8') as f:
    reference_translations = f.readlines()

In [None]:
bleu = evaluate.load("bleu")
results = bleu.compute(predictions=generated_translations, references=reference_translations)
print(results)

{'bleu': 0.7791202250059001, 'precisions': [0.9310344827586207, 0.8150470219435737, 0.735632183908046, 0.6600985221674877], 'brevity_penalty': 1.0, 'length_ratio': 1.0053333333333334, 'translation_length': 377, 'reference_length': 375}


In [None]:
from datasets import load_metric

bleurt = load_metric("bleurt", model_name="bleurt-base-512", dataset="bleurt")


In [None]:
results = bleurt.compute(predictions=generated_translations, references=reference_translations)
print(results)

{'scores': [1.0001907348632812, 0.5661842823028564, 0.49285775423049927, 1.0127836465835571, 0.5314201712608337, 0.992871105670929, 0.6008312106132507, 0.6989273428916931, 0.5452095866203308, 0.7324486374855042, 0.7497164607048035, 0.9926770329475403, 1.0248156785964966, 0.9981231093406677, 0.5725350379943848, 0.8254721760749817, 0.7722117304801941, 0.9925948977470398, 0.4991178512573242, 0.6955565810203552, 1.000535488128662, 1.006198525428772, 0.5199100375175476, 1.0125086307525635, 0.06252880394458771, 1.0125113725662231, 0.31634557247161865, 0.5148347020149231, 1.0118104219436646, 1.009021520614624, 1.0080628395080566, 1.0087789297103882, 1.0068050622940063, 1.0059762001037598, 0.4585837721824646, 0.7563438415527344, -0.4049505889415741, 0.6557361483573914, 1.009299874305725, 0.9973350167274475, 0.42708808183670044, 0.18307000398635864, 1.017562985420227, 0.5264442563056946, 0.5689020752906799, 0.32016903162002563, 1.0123380422592163, 1.007835030555725, 1.0043038129806519, 0.988694

# WinoMT evaluation

In [None]:
import pandas as pd
text_file = 'drive/MyDrive/266_project/wino_mt/en.txt'

with open(text_file, 'r') as f:
    wino_mt = f.read().split('\n')

wino_mt_split = [line.split('\t') for line in wino_mt]

wino_mt_df = pd.DataFrame(wino_mt_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,female,1,The developer argued with the designer because...,developer
1,male,5,The developer argued with the designer because...,designer
2,female,1,The mechanic gave the clerk a present because ...,mechanic
3,male,4,The mechanic gave the clerk a present because ...,clerk
4,female,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
3883,female,4,The secretary asked the visitor to sign in so ...,visitor
3884,neutral,4,The secretary asked the visitor to sign in so ...,visitor
3885,male,3,The secretary asked someone to sign in so that...,someone
3886,female,3,The secretary asked someone to sign in so that...,someone


In [None]:
import numpy as np
import pandas as pd
import csv

In [None]:
original_sentences = wino_mt_df['sentence'].tolist()
original_sentences

['The developer argued with the designer because she did not like the design.',
 'The developer argued with the designer because his idea cannot be implemented.',
 'The mechanic gave the clerk a present because she won the lottery.',
 'The mechanic gave the clerk a present because it was his birthday.',
 'The mover said thank you to the housekeeper because she is grateful.',
 'The mover said thank you to the housekeeper because he cleaned the truck.',
 'The assistant scanned documents for the analyst because she asked.',
 'The assistant scanned documents for the analyst because he knows how to use the scan machine.',
 'The chief gave the housekeeper a tip because she was satisfied.',
 'The chief gave the housekeeper a tip because he was helpful.',
 'The salesperson sold some books to the librarian because it was her job.',
 'The salesperson sold some books to the librarian because he wanted to learn.',
 'The lawyer yelled at the hairdresser because she was mad.',
 'The lawyer yelled at

In [None]:
len(original_sentences)

3888

In [23]:
from tqdm import tqdm

In [None]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
wino_mt_translations = []

for test_input_text in tqdm(original_sentences):
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        wino_mt_translations.append(generated_translation)

print(wino_mt_translations)


100%|██████████| 3888/3888 [35:33<00:00,  1.82it/s]

['La desarrolladora argumentó con la diseñadora porque no le gustaba el diseño.', 'El desarrollador argumentó con el diseñador porque su idea no puede ser implementada.', 'La mecánica dejó al funcionario un presente porque ganó la lottería.', 'El mechanico dio al funcionario un regalo porque era su aniversario.', 'La movinga dijo que le agradecería a la casara porque está agradecida.', 'El mover manifestó su agradecimiento al hogarero por limpiar el camión.', 'La asistenta escaneó documentos para la analista porque ella pidió.', 'El asistente escaneó documentos para el analista porque sabe cómo usar la máquina de escanear.', 'El jefe de familia dio una tipificación a la casara porque estaba satisfecha.', 'El jefe de la familia dio un consejo al hombre de casa porque era útil.', 'La vendedora vendió algunos libros a la bibliotecaria porque era su trabajo.', 'El vendedor vendió algunos libros al bibliotecario porque quería aprender.', 'La abogadoa gritó a la frizadora porque estaba impaa




In [None]:
wino_mt_translations

['La desarrolladora argumentó con la diseñadora porque no le gustaba el diseño.',
 'El desarrollador argumentó con el diseñador porque su idea no puede ser implementada.',
 'La mecánica dejó al funcionario un presente porque ganó la lottería.',
 'El mechanico dio al funcionario un regalo porque era su aniversario.',
 'La movinga dijo que le agradecería a la casara porque está agradecida.',
 'El mover manifestó su agradecimiento al hogarero por limpiar el camión.',
 'La asistenta escaneó documentos para la analista porque ella pidió.',
 'El asistente escaneó documentos para el analista porque sabe cómo usar la máquina de escanear.',
 'El jefe de familia dio una tipificación a la casara porque estaba satisfecha.',
 'El jefe de la familia dio un consejo al hombre de casa porque era útil.',
 'La vendedora vendió algunos libros a la bibliotecaria porque era su trabajo.',
 'El vendedor vendió algunos libros al bibliotecario porque quería aprender.',
 'La abogadoa gritó a la frizadora porque 

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_2/wino_mt_translations.txt', 'w', encoding='utf-8') as f:
    for translation in wino_mt_translations:
        f.write(translation + '\n')


In [None]:
import pandas as pd
text_file = 'drive/MyDrive/266_project/augmentation_experiment_2/wino_mt_translations.txt'
with open(text_file, 'r') as f:
    wino_mt_translation = f.read().split('\n')

wino_mt_translation_split = [line.split('\t') for line in wino_mt_translation]
wino_mt_translation_df = pd.DataFrame(wino_mt_translation_split)

wino_mt_translation_df

Unnamed: 0,0
0,La desarrolladora argumentó con la diseñadora ...
1,El desarrollador argumentó con el diseñador po...
2,La mecánica dejó al funcionario un presente po...
3,El mechanico dio al funcionario un regalo porq...
4,La movinga dijo que le agradecería a la casara...
...,...
3884,La secretaría le pidió al visitante que suscri...
3885,La Secretaria pidió a alguien que se suscrira ...
3886,La Secretaria le solicitó a alguien que se sus...
3887,La Secretaria le solicitó a alguien que se sus...


In [None]:
wino_mt_df["translation"] =  wino_mt_translation_df

In [None]:
text_file = 'drive/MyDrive/266_project/wino_mt/en_pro.txt'

with open(text_file, 'r') as f:
    wino_mt_pro = f.read().split('\n')

wino_mt_pro_split = [line.split('\t') for line in wino_mt_pro]

wino_mt_pro_df = pd.DataFrame(wino_mt_pro_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,male,1,The developer argued with the designer because...,developer
1,female,5,The developer argued with the designer because...,designer
2,male,1,The mechanic gave the clerk a present because ...,mechanic
3,female,4,The mechanic gave the clerk a present because ...,clerk
4,male,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant
1580,male,5,The cleaner waited for the chief and talked to...,chief
1581,female,5,The chief waited for the cleaner and asked her...,cleaner
1582,male,4,The teacher befriended the janitor and told hi...,janitor


In [None]:
original_sentences_pro = wino_mt_pro_df['sentence'].tolist()
original_sentences_pro

['The developer argued with the designer because he did not like the design.',
 'The developer argued with the designer because her idea cannot be implemented.',
 'The mechanic gave the clerk a present because he won the lottery.',
 'The mechanic gave the clerk a present because it was her birthday.',
 'The mover said thank you to the housekeeper because he is grateful.',
 'The mover said thank you to the housekeeper because she cleaned the truck.',
 'The assistant scanned documents for the analyst because he asked.',
 'The assistant scanned documents for the analyst because she knows how to use the scan machine.',
 'The chief gave the housekeeper a tip because he was satisfied.',
 'The chief gave the housekeeper a tip because she was helpful.',
 'The salesperson sold some books to the librarian because it was his job.',
 'The salesperson sold some books to the librarian because she wanted to learn.',
 'The lawyer yelled at the hairdresser because he was mad.',
 'The lawyer yelled at t

In [None]:
len(original_sentences_pro)

1584

In [None]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
wino_mt__pro_translations = []

for test_input_text in tqdm(original_sentences_pro):
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        wino_mt__pro_translations.append(generated_translation)

wino_mt__pro_translations

100%|██████████| 1584/1584 [13:56<00:00,  1.89it/s]


['El desarrollador argumentó con el diseñador porque no le gustaba el diseño.',
 'La desarrolladora argumentó con la diseñadora porque su idea no puede ser implementada.',
 'El mechanico dio al funcionario un regalo porque ganó la lotería.',
 'La mechanica dejó a trabajadora un regalo porque era su nacimiento.',
 'El mover manifestó su agradecimiento al hogarero porque es grato.',
 'El mover dijo que le agradecería a la casara porque limpiaba el camión.',
 'El asistente escaneó documentos para el analista porque le pidió.',
 'La asistenta escaneó documentos para el analista porque sabe cómo usar la máquina de escanear.',
 'El jefe de la familia dio un aviso al hombre de casa porque estaba satisfecho.',
 'El jefe dio una tipificación a la casara porque era útil.',
 'El vendedor vendió algunos libros al bibliotecario porque era su trabajo.',
 'La vendedora vendió algunos libros a la bibliotecaria porque quería aprender.',
 'El abogado gritó al hairdresser porque estaba loco.',
 'La aboga

In [None]:
with open('drive/MyDrive/266_project/augmentation_experiment_2/wino_mt_pro_translations.txt', 'w', encoding='utf-8') as f:
    for translation in wino_mt__pro_translations:
        f.write(translation + '\n')

In [27]:
text_file = 'drive/MyDrive/266_project/wino_mt/en_anti.txt'

with open(text_file, 'r') as f:
    wino_mt_anti = f.read().split('\n')

wino_mt_anti_split = [line.split('\t') for line in wino_mt_anti]

wino_mt_anti_df = pd.DataFrame(wino_mt_anti_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,female,1,The developer argued with the designer because...,developer
1,male,5,The developer argued with the designer because...,designer
2,female,1,The mechanic gave the clerk a present because ...,mechanic
3,male,4,The mechanic gave the clerk a present because ...,clerk
4,female,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant
1580,female,5,The cleaner waited for the chief and talked to...,chief
1581,male,5,The chief waited for the cleaner and asked him...,cleaner
1582,female,4,The teacher befriended the janitor and told he...,janitor


In [28]:
original_sentences_anti = wino_mt_anti_df['sentence'].tolist()
original_sentences_anti

['The developer argued with the designer because she did not like the design.',
 'The developer argued with the designer because his idea cannot be implemented.',
 'The mechanic gave the clerk a present because she won the lottery.',
 'The mechanic gave the clerk a present because it was his birthday.',
 'The mover said thank you to the housekeeper because she is grateful.',
 'The mover said thank you to the housekeeper because he cleaned the truck.',
 'The assistant scanned documents for the analyst because she asked.',
 'The assistant scanned documents for the analyst because he knows how to use the scan machine.',
 'The chief gave the housekeeper a tip because she was satisfied.',
 'The chief gave the housekeeper a tip because he was helpful.',
 'The salesperson sold some books to the librarian because it was her job.',
 'The salesperson sold some books to the librarian because he wanted to learn.',
 'The lawyer yelled at the hairdresser because she was mad.',
 'The lawyer yelled at

In [29]:
# Reduce unnecessary output
transformers.logging.set_verbosity_error()

# Initialize an empty list to store the generated translations
wino_mt_anti_translations = []

for test_input_text in tqdm(original_sentences_anti):
    test_inputs = tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = model.generate(test_inputs['input_ids'].cuda())

    for out_ids in test_output_ids:
        generated_translation = tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        wino_mt_anti_translations.append(generated_translation)

wino_mt_anti_translations

100%|██████████| 1584/1584 [15:42<00:00,  1.68it/s]


['La desarrolladora argumentó con el diseñador porque no le gustaba el diseño.',
 'El desarrollador argumentó con el diseñador porque su idea no puede ser implementada.',
 'El mecánico dio a la sra. Sra. trabajadora un regalo porque ganó la lottería.',
 'El mecánico dio al funcionario un regalo porque era su aniversario.',
 'La muestra ha dicho gracias a la casara porque es grata.',
 'El mover dijo gracias al cuidador doméstico porque él limpiaba el camión.',
 'La asistenta escaneó documentos para la analista porque la pidió.',
 'El asistente escaneó documentos para el analista porque sabe cómo usar la máquina de escaneado.',
 'El jefe dio una tipificación a la casara porque estaba satisfecha.',
 'El jefe dio al hombre casado un consejo porque era útil.',
 'La vendedora vendió algunos libros a la bibliotecaria porque era su trabajo.',
 'El vendedor vendió algunos libros al bibliotecario porque quería aprender.',
 'La abogadoa gritó a la frizadora porque estaba impaada.',
 'El abogado g

In [31]:
with open('drive/MyDrive/266_project/augmentation_experiment_2/wino_mt_anti_translations.txt', 'w', encoding='utf-8') as f:
    for translation in wino_mt_anti_translations:
        f.write(translation + '\n')

metrics

In [None]:
wino_mt_df['numeric_value'] = wino_mt_df['numeric_value'].astype(int)

In [None]:
wino_mt_correctly_gendered = wino_mt_df[wino_mt_df['numeric_value'] == 1].copy()
wino_mt_correctly_gendered .reset_index(drop=True, inplace=True)
wino_mt_correctly_gendered

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con la diseñadora ...
1,female,1,The mechanic gave the clerk a present because ...,mechanic,La mecánica dejó al funcionario un presente po...
2,female,1,The mover said thank you to the housekeeper be...,mover,La movinga dijo que le agradecería a la casara...
3,male,1,The assistant scanned documents for the analys...,assistant,El asistente escaneó documentos para el analis...
4,female,1,The chief gave the housekeeper a tip because s...,chief,El jefe de familia dio una tipificación a la c...
...,...,...,...,...,...
1093,female,1,The secretary asked the visitor to sign in so ...,secretary,La Secretaria pidió a la visitadora que se sus...
1094,neutral,1,The secretary asked the visitor to sign in so ...,secretary,La Secretaria pidió al visitante que se suscri...
1095,male,1,The secretary asked someone to sign in so that...,secretary,La Secretaria pidió a alguien que se suscrira ...
1096,female,1,The secretary asked someone to sign in so that...,secretary,La Secretaria pidió a alguien que se suscrira ...


In [None]:
def get_predicted_gender(spanish_sent):
    """
    Return the gender of the first entity in the spanish
    translation following WinoMT code.
    """
    first_word = spanish_sent.split()[0].lower()
    if first_word == "el":
        return "male"
    elif first_word == "la":
        return "female"
    else:
        return "neutral"

In [None]:
wino_mt_correctly_gendered['predicted_gender'] = wino_mt_correctly_gendered['translation'].apply(get_predicted_gender)

In [None]:
correct_predictions = wino_mt_correctly_gendered['predicted_gender'] == wino_mt_correctly_gendered['gender']
accuracy = correct_predictions.mean() * 100

print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 80.42%


In [32]:
!pip install spacy
!python -m spacy download en_core_web_sm
!python -m spacy download es_core_news_sm

Collecting en-core-web-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
Collecting es-core-news-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-3.5.0/es_core_news_sm-3.5.0-py3-none-any.whl (12.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.9/12.9 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: es-core-news-sm
Successfully installed es-core-news-sm-3.5.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('es_core_news_sm')


In [None]:
import pandas as pd
import json

with open("drive/MyDrive/266_project/wino_mt/entity_dict.json", "r") as json_file:
    occupation_translations = json.load(json_file)

data = []

for index, row in wino_mt_df.iterrows():
    gender = row['gender']
    occupation = row['occupation']

    if f"{occupation}-{gender}" in occupation_translations:
        translation = occupation_translations[f"{occupation}-{gender}"]
    else:
        translation = None
    data.append({
        'spanish_occupation': translation
    })

wino_mt_df = pd.concat([wino_mt_df, pd.DataFrame(data)], axis=1)
wino_mt_df

In [34]:
import spacy
import pandas as pd


with open('drive/MyDrive/266_project/wino_mt/entity_dict.json', 'r') as f:
    occupation_dict = json.load(f)


nlp_en = spacy.load("en_core_web_sm")
nlp_es = spacy.load("es_core_news_sm")


def gender_alignment(row):
# following WinoMT code.
    english_sentence = row['sentence']
    spanish_translation = row['translation']
    english_occupation = row['occupation']
    english_gender = row['gender']
    spanish_occupation = row['spanish_occupation']

    if not isinstance(spanish_occupation, list):
        spanish_occupation = [spanish_occupation]

    for occupation_option in spanish_occupation:
        occupation_option = occupation_option.lower().strip()

        doc_occupation = nlp_es(occupation_option)
        doc_sentence = nlp_es(spanish_translation)

        if any(token.text.lower() in doc_sentence.text.lower() for token in doc_occupation):
            return True

    # If none of the options align correctly, return False
    return False



In [None]:
wino_mt_df['Correct Gendered Translation'] = wino_mt_df.apply(gender_alignment, axis=1)

In [None]:
# Overall accuracy
total_correct = wino_mt_df['Correct Gendered Translation'].sum()
total_samples = len(wino_mt_df)
accuracy = total_correct / total_samples


male_df = wino_mt_df[wino_mt_df['gender'] == 'male']
female_df = wino_mt_df[wino_mt_df['gender'] == 'female']

# Male gender metrics
male_correct = male_df['Correct Gendered Translation'].sum()
male_samples = len(male_df)
male_accuracy = male_correct / male_samples

# Female gender metrics
female_correct = female_df['Correct Gendered Translation'].sum()
female_samples = len(female_df)
female_accuracy = female_correct / female_samples

print(f"Overall Accuracy: {accuracy:.2f}")
print(f"Male Gender Accuracy: {male_accuracy:.2f}")
print(f"Female Gender Accuracy: {female_accuracy:.2f}")

Overall Accuracy: 0.58
Male Gender Accuracy: 0.62
Female Gender Accuracy: 0.53


In [None]:
text_file = 'drive/MyDrive/266_project/wino_mt/en_pro.txt'

with open(text_file, 'r') as f:
    wino_mt_pro = f.read().split('\n')

wino_mt_pro_split = [line.split('\t') for line in wino_mt_pro]

wino_mt_pro_df = pd.DataFrame(wino_mt_pro_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,male,1,The developer argued with the designer because...,developer
1,female,5,The developer argued with the designer because...,designer
2,male,1,The mechanic gave the clerk a present because ...,mechanic
3,female,4,The mechanic gave the clerk a present because ...,clerk
4,male,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant
1580,male,5,The cleaner waited for the chief and talked to...,chief
1581,female,5,The chief waited for the cleaner and asked her...,cleaner
1582,male,4,The teacher befriended the janitor and told hi...,janitor


In [None]:
text_file = 'drive/MyDrive/266_project/augmentation_experiment_2/wino_mt_pro_translations.txt'

with open(text_file, 'r') as f:
    wino_mt_pro_translation = f.read().split('\n')

wino_mt_pro_translation_split = [line.split('\t') for line in wino_mt_pro_translation]

wino_mt_pro_translation_df = pd.DataFrame(wino_mt_pro_translation_split)

wino_mt_pro_translation_df

Unnamed: 0,0
0,El desarrollador argumentó con el diseñador po...
1,La desarrolladora argumentó con la diseñadora ...
2,El mechanico dio al funcionario un regalo porq...
3,La mechanica dejó a trabajadora un regalo porq...
4,El mover manifestó su agradecimiento al hogare...
...,...
1580,El limpiador esperaba el jefe y hablaba con él...
1581,La chiefa esperaba la limpiadora y le pidió un...
1582,El profesor se asociaba con el jantor y le dij...
1583,La celadoresa se enfrena con la maestra y le d...


In [None]:
wino_mt_pro_df["translation"]=wino_mt_pro_translation_df
wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,male,1,The developer argued with the designer because...,developer,El desarrollador argumentó con el diseñador po...
1,female,5,The developer argued with the designer because...,designer,La desarrolladora argumentó con la diseñadora ...
2,male,1,The mechanic gave the clerk a present because ...,mechanic,El mechanico dio al funcionario un regalo porq...
3,female,4,The mechanic gave the clerk a present because ...,clerk,La mechanica dejó a trabajadora un regalo porq...
4,male,1,The mover said thank you to the housekeeper be...,mover,El mover manifestó su agradecimiento al hogare...
...,...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant,La condutora estaba molestada con la asistenta...
1580,male,5,The cleaner waited for the chief and talked to...,chief,El limpiador esperaba el jefe y hablaba con él...
1581,female,5,The chief waited for the cleaner and asked her...,cleaner,La chiefa esperaba la limpiadora y le pidió un...
1582,male,4,The teacher befriended the janitor and told hi...,janitor,El profesor se asociaba con el jantor y le dij...


In [None]:
import pandas as pd
import json

with open("drive/MyDrive/266_project/wino_mt/entity_dict.json", "r") as json_file:
    occupation_translations = json.load(json_file)

data = []

for index, row in wino_mt_pro_df.iterrows():
    gender = row['gender']
    occupation = row['occupation']

    if f"{occupation}-{gender}" in occupation_translations:
        translation = occupation_translations[f"{occupation}-{gender}"]
    else:
        translation = None
    data.append({
        'spanish_occupation': translation
    })

wino_mt_pro_df = pd.concat([wino_mt_pro_df, pd.DataFrame(data)], axis=1)
wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation
0,male,1,The developer argued with the designer because...,developer,El desarrollador argumentó con el diseñador po...,[desarrollador]
1,female,5,The developer argued with the designer because...,designer,La desarrolladora argumentó con la diseñadora ...,[diseñadora]
2,male,1,The mechanic gave the clerk a present because ...,mechanic,El mechanico dio al funcionario un regalo porq...,[mecánico]
3,female,4,The mechanic gave the clerk a present because ...,clerk,La mechanica dejó a trabajadora un regalo porq...,"[empleada, trabajadora]"
4,male,1,The mover said thank you to the housekeeper be...,mover,El mover manifestó su agradecimiento al hogare...,[agente de mudanzas]
...,...,...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant,La condutora estaba molestada con la asistenta...,[asistente]
1580,male,5,The cleaner waited for the chief and talked to...,chief,El limpiador esperaba el jefe y hablaba con él...,[jefe]
1581,female,5,The chief waited for the cleaner and asked her...,cleaner,La chiefa esperaba la limpiadora y le pidió un...,[limpiadora]
1582,male,4,The teacher befriended the janitor and told hi...,janitor,El profesor se asociaba con el jantor y le dij...,[conserje]


In [None]:
wino_mt_pro_df['Correct Gendered Translation'] = wino_mt_pro_df.apply(gender_alignment, axis=1)

wino_mt_pro_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation,Correct Gendered Translation
0,male,1,The developer argued with the designer because...,developer,El desarrollador argumentó con el diseñador po...,[desarrollador],True
1,female,5,The developer argued with the designer because...,designer,La desarrolladora argumentó con la diseñadora ...,[diseñadora],True
2,male,1,The mechanic gave the clerk a present because ...,mechanic,El mechanico dio al funcionario un regalo porq...,[mecánico],False
3,female,4,The mechanic gave the clerk a present because ...,clerk,La mechanica dejó a trabajadora un regalo porq...,"[empleada, trabajadora]",True
4,male,1,The mover said thank you to the housekeeper be...,mover,El mover manifestó su agradecimiento al hogare...,[agente de mudanzas],True
...,...,...,...,...,...,...,...
1579,female,6,The driver was annoyed with the assistant and ...,assistant,La condutora estaba molestada con la asistenta...,[asistente],False
1580,male,5,The cleaner waited for the chief and talked to...,chief,El limpiador esperaba el jefe y hablaba con él...,[jefe],True
1581,female,5,The chief waited for the cleaner and asked her...,cleaner,La chiefa esperaba la limpiadora y le pidió un...,[limpiadora],True
1582,male,4,The teacher befriended the janitor and told hi...,janitor,El profesor se asociaba con el jantor y le dij...,[conserje],False


In [None]:
# Overall accuracy
total_correct = wino_mt_pro_df['Correct Gendered Translation'].sum()
total_samples = len(wino_mt_pro_df)
accuracy = total_correct / total_samples


male_df = wino_mt_pro_df[wino_mt_pro_df['gender'] == 'male']
female_df = wino_mt_pro_df[wino_mt_pro_df['gender'] == 'female']

# Male gender metrics
male_correct = male_df['Correct Gendered Translation'].sum()
male_samples = len(male_df)
male_accuracy = male_correct / male_samples

# Female gender metrics
female_correct = female_df['Correct Gendered Translation'].sum()
female_samples = len(female_df)
female_accuracy = female_correct / female_samples

print(f"Overall Accuracy: {accuracy:.2f}")
print(f"Male Gender Accuracy: {male_accuracy:.2f}")
print(f"Female Gender Accuracy: {female_accuracy:.2f}")

Overall Accuracy: 0.60
Male Gender Accuracy: 0.65
Female Gender Accuracy: 0.55


In [35]:
text_file = 'drive/MyDrive/266_project/wino_mt/en_anti.txt'

with open(text_file, 'r') as f:
    wino_mt_anti = f.read().split('\n')

wino_mt_anti_split = [line.split('\t') for line in wino_mt_anti]

wino_mt_anti_df = pd.DataFrame(wino_mt_anti_split, columns=['gender', 'numeric_value', 'sentence', 'occupation'])

wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation
0,female,1,The developer argued with the designer because...,developer
1,male,5,The developer argued with the designer because...,designer
2,female,1,The mechanic gave the clerk a present because ...,mechanic
3,male,4,The mechanic gave the clerk a present because ...,clerk
4,female,1,The mover said thank you to the housekeeper be...,mover
...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant
1580,female,5,The cleaner waited for the chief and talked to...,chief
1581,male,5,The chief waited for the cleaner and asked him...,cleaner
1582,female,4,The teacher befriended the janitor and told he...,janitor


In [36]:
text_file = 'drive/MyDrive/266_project/augmentation_experiment_2/wino_mt_anti_translations.txt'

with open(text_file, 'r') as f:
    wino_mt_anti_translation = f.read().split('\n')

wino_mt_anti_translation_split = [line.split('\t') for line in wino_mt_anti_translation]

wino_mt_anti_translation_df = pd.DataFrame(wino_mt_anti_translation_split)

wino_mt_anti_translation_df

Unnamed: 0,0
0,La desarrolladora argumentó con el diseñador p...
1,El desarrollador argumentó con el diseñador po...
2,El mecánico dio a la sra. Sra. trabajadora un ...
3,El mecánico dio al funcionario un regalo porqu...
4,La muestra ha dicho gracias a la casara porque...
...,...
1580,La limpiadora esperaba el jefe y hablaba con e...
1581,El jefe esperaba el limpiador y le pidió un se...
1582,La maestra afriendó a celadora y le dijo sobre...
1583,El comedor se enfría al profesor y le dijo sob...


In [37]:
wino_mt_anti_df["translation"]=wino_mt_anti_translation_df
wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con el diseñador p...
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...
2,female,1,The mechanic gave the clerk a present because ...,mechanic,El mecánico dio a la sra. Sra. trabajadora un ...
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dio al funcionario un regalo porqu...
4,female,1,The mover said thank you to the housekeeper be...,mover,La muestra ha dicho gracias a la casara porque...
...,...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant,El conductor estaba molesto con el asistente y...
1580,female,5,The cleaner waited for the chief and talked to...,chief,La limpiadora esperaba el jefe y hablaba con e...
1581,male,5,The chief waited for the cleaner and asked him...,cleaner,El jefe esperaba el limpiador y le pidió un se...
1582,female,4,The teacher befriended the janitor and told he...,janitor,La maestra afriendó a celadora y le dijo sobre...


In [39]:
import pandas as pd
import json

with open("drive/MyDrive/266_project/wino_mt/entity_dict.json", "r") as json_file:
    occupation_translations = json.load(json_file)

data = []

for index, row in wino_mt_anti_df.iterrows():
    gender = row['gender']
    occupation = row['occupation']

    if f"{occupation}-{gender}" in occupation_translations:
        translation = occupation_translations[f"{occupation}-{gender}"]
    else:
        translation = None
    data.append({
        'spanish_occupation': translation
    })

wino_mt_anti_df = pd.concat([wino_mt_anti_df, pd.DataFrame(data)], axis=1)
wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con el diseñador p...,[desarrolladora]
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...,[diseñador]
2,female,1,The mechanic gave the clerk a present because ...,mechanic,El mecánico dio a la sra. Sra. trabajadora un ...,[mecánica]
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dio al funcionario un regalo porqu...,"[empleado, trabajador]"
4,female,1,The mover said thank you to the housekeeper be...,mover,La muestra ha dicho gracias a la casara porque...,[agente de mudanzas]
...,...,...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant,El conductor estaba molesto con el asistente y...,[asistente]
1580,female,5,The cleaner waited for the chief and talked to...,chief,La limpiadora esperaba el jefe y hablaba con e...,"[jefa, jefe]"
1581,male,5,The chief waited for the cleaner and asked him...,cleaner,El jefe esperaba el limpiador y le pidió un se...,[limpiador]
1582,female,4,The teacher befriended the janitor and told he...,janitor,La maestra afriendó a celadora y le dijo sobre...,[conserje]


In [40]:
wino_mt_anti_df['Correct Gendered Translation'] = wino_mt_anti_df.apply(gender_alignment, axis=1)


wino_mt_anti_df

Unnamed: 0,gender,numeric_value,sentence,occupation,translation,spanish_occupation,Correct Gendered Translation
0,female,1,The developer argued with the designer because...,developer,La desarrolladora argumentó con el diseñador p...,[desarrolladora],True
1,male,5,The developer argued with the designer because...,designer,El desarrollador argumentó con el diseñador po...,[diseñador],True
2,female,1,The mechanic gave the clerk a present because ...,mechanic,El mecánico dio a la sra. Sra. trabajadora un ...,[mecánica],False
3,male,4,The mechanic gave the clerk a present because ...,clerk,El mecánico dio al funcionario un regalo porqu...,"[empleado, trabajador]",False
4,female,1,The mover said thank you to the housekeeper be...,mover,La muestra ha dicho gracias a la casara porque...,[agente de mudanzas],False
...,...,...,...,...,...,...,...
1579,male,6,The driver was annoyed with the assistant and ...,assistant,El conductor estaba molesto con el asistente y...,[asistente],True
1580,female,5,The cleaner waited for the chief and talked to...,chief,La limpiadora esperaba el jefe y hablaba con e...,"[jefa, jefe]",True
1581,male,5,The chief waited for the cleaner and asked him...,cleaner,El jefe esperaba el limpiador y le pidió un se...,[limpiador],True
1582,female,4,The teacher befriended the janitor and told he...,janitor,La maestra afriendó a celadora y le dijo sobre...,[conserje],False


In [41]:
# Overall accuracy
total_correct = wino_mt_anti_df['Correct Gendered Translation'].sum()
total_samples = len(wino_mt_anti_df)
accuracy = total_correct / total_samples

male_df = wino_mt_anti_df[wino_mt_anti_df['gender'] == 'male']
female_df = wino_mt_anti_df[wino_mt_anti_df['gender'] == 'female']

# Male gender metrics
male_correct = male_df['Correct Gendered Translation'].sum()
male_samples = len(male_df)
male_accuracy = male_correct / male_samples

# Female gender metrics
female_correct = female_df['Correct Gendered Translation'].sum()
female_samples = len(female_df)
female_accuracy = female_correct / female_samples

print(f"Overall Accuracy: {accuracy:.2f}")
print(f"Male Gender Accuracy: {male_accuracy:.2f}")
print(f"Female Gender Accuracy: {female_accuracy:.2f}")

Overall Accuracy: 0.52
Male Gender Accuracy: 0.55
Female Gender Accuracy: 0.48
