In [2]:
import random
from pathlib import Path
import numpy as np
import pytorch_lightning as pl

import pandas as pd
from IPython.display import clear_output


import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import AutoTokenizer
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

from sklearn.metrics import precision_recall_fscore_support, mean_absolute_error

from functools import partial

from aux_relative_text.multilingual_amazon_anchors import MultilingualAmazonAnchors
from typing import *

from modules.stitching_module import StitchingModule

from datasets import load_dataset, ClassLabel

# Tensorboard extension (for visualization purposes later)
%load_ext tensorboard

# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = Path("./data")
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = Path("./saved_models/rel_multi_vanilla")
RESULT_PATH = Path("./results/rel_multi_vanilla")

PROJECT_ROOT = Path("./")

pd.options.display.max_columns = None
pd.options.display.max_rows = None

# Setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)

Global seed set to 42


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
Device: cuda:0


# Data

In [3]:
fine_grained: bool = True
target_key: str = "class"
data_key: str = "content"
anchor_dataset_name: str = "amazon_translated"  
ALL_LANGS = ("en", "es", "fr")
num_anchors: int = 768
train_perc: float = 0.25

In [4]:
def get_dataset(lang: str, split: str, perc: float, fine_grained: bool):
    pl.seed_everything(42)
    assert 0 < perc <= 1
    dataset = load_dataset("amazon_reviews_multi", lang)[split]

    if not fine_grained:
        dataset = dataset.filter(lambda sample: sample["stars"] != 3)

    # Select a random subset
    indices = list(range(len(dataset)))
    random.shuffle(indices)
    indices = indices[: int(len(indices) * perc)]
    dataset = dataset.select(indices)

    def clean_sample(sample):
        title: str = sample["review_title"].strip('"').strip(".").strip()
        body: str = sample["review_body"].strip('"').strip(".").strip()

        if body.lower().startswith(title.lower()):
            title = ""

        if len(title) > 0 and title[-1].isalpha():
            title = f"{title}."

        sample["content"] = f"{title} {body}".lstrip(".").strip()
        if fine_grained:
            sample[target_key] = str(sample["stars"] - 1)
        else:
            sample[target_key] = sample["stars"] > 3
        return sample

    dataset = dataset.map(clean_sample)
    dataset = dataset.cast_column(
        target_key,
        ClassLabel(num_classes=5 if fine_grained else 2, names=list(map(str, range(1, 6) if fine_grained else (0, 1)))),
    )

    return dataset

def _amazon_translated_get_samples(lang: str, sample_idxs):
    anchor_dataset = MultilingualAmazonAnchors(split="train", language=lang)
    anchors = []
    for anchor_idx in sample_idxs:
        anchor = anchor_dataset[anchor_idx]
        anchor[data_key] = anchor["data"]
        anchors.append(anchor)
    return anchors

In [5]:
train_datasets = {
    lang: get_dataset(lang=lang, split="train", perc=train_perc, fine_grained=fine_grained) for lang in ALL_LANGS
    }

test_datasets = {
    lang: get_dataset(lang=lang, split="test", perc=1, fine_grained=fine_grained) for lang in ALL_LANGS
    }

val_datasets = {
    lang: get_dataset(lang=lang, split="validation", perc=1, fine_grained=fine_grained) for lang in ALL_LANGS
    }

num_labels = list(train_datasets.values())[0].features[target_key].num_classes

Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-ec0ea0aad8f98192.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-115fb520e0899335.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-5124f3d24b8cfecb.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-d9c8d8f2f813d97f.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-0985b4a32f5feef9.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-39a640cabb5a59c4.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-8553f71d56c9ba4c.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-2c8384112752703f.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-e3dc951c42308c5b.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-bf13bb2c70209559.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-b471ec6ce2ee1b83.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-6f62c30e3bb3f98f.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-e7ff0dc70b32da22.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/en/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-586950f56ae31790.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-fe159db7bc22043d.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-0c3248cebcb837fb.arrow
Global seed set to 42
Found cached dataset amazon_reviews_multi (/home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-c271de877e412081.arrow
Loading cached processed dataset at /home/thepopi300/.cache/huggingface/datasets/amazon_reviews_multi/fr/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-9106148a706e9181.arrow


In [6]:
train_datasets["es"][5]

{'review_id': 'es_0291786',
 'product_id': 'product_es_0674372',
 'reviewer_id': 'reviewer_es_0553268',
 'stars': 1,
 'review_body': 'Pinzas de malísima calidad. La mayoría vienen astilladas. Resultan hasta peligrosas. A pesar de ser un producto plus, su precio es más caro que las que venden en la calle y de peor calidad. No esperaba esta calidad de un producto vendido por amazon. Totalmente decepcionada.',
 'review_title': 'Malísima calidad',
 'language': 'es',
 'product_category': 'home',
 'content': 'Malísima calidad. Pinzas de malísima calidad. La mayoría vienen astilladas. Resultan hasta peligrosas. A pesar de ser un producto plus, su precio es más caro que las que venden en la calle y de peor calidad. No esperaba esta calidad de un producto vendido por amazon. Totalmente decepcionada',
 'class': 0}

In [7]:
assert len(set(frozenset(train_dataset.features.keys()) for train_dataset in train_datasets.values())) == 1
class2idx = train_datasets["en"].features[target_key].str2int

train_datasets["en"].features

{'review_id': Value(dtype='string', id=None),
 'product_id': Value(dtype='string', id=None),
 'reviewer_id': Value(dtype='string', id=None),
 'stars': Value(dtype='int32', id=None),
 'review_body': Value(dtype='string', id=None),
 'review_title': Value(dtype='string', id=None),
 'language': Value(dtype='string', id=None),
 'product_category': Value(dtype='string', id=None),
 'content': Value(dtype='string', id=None),
 'class': ClassLabel(names=['1', '2', '3', '4', '5'], id=None)}

Get pararel anchors

In [8]:
anchor_dataset2num_samples = 1000
anchor_dataset2first_anchors = [
        776,
        507,
        895,
        922,
        33,
        483,
        85,
        750,
        354,
        523,
        184,
        809,
        418,
        615,
        682,
        501,
        760,
        49,
        732,
        336,
    ]


assert num_anchors <= anchor_dataset2num_samples

pl.seed_everything(42)
anchor_idxs = list(range(anchor_dataset2num_samples))
random.shuffle(anchor_idxs)
anchor_idxs = anchor_idxs[:num_anchors]

assert anchor_idxs[:20] == anchor_dataset2first_anchors  # better safe than sorry
lang2anchors = {
    lang: _amazon_translated_get_samples(lang=lang, sample_idxs=anchor_idxs) for lang in ALL_LANGS
}

Global seed set to 42


This is how we can handdle automatically the tokenizer

In [9]:
def collate_fn(batch, tokenizer, cls=True):
    data = []
    labels = []
    for sample in batch:
        data.append(sample[data_key])
        if cls:
            labels.append(sample[target_key])

    encoding = tokenizer(
        data,
        return_tensors="pt",
        return_special_tokens_mask=True,
        truncation=True,
        max_length=512,
        padding=True,
    )
    del encoding["special_tokens_mask"]

    if cls:
        result = (encoding, torch.tensor(labels))
    else:
        result = encoding

    return  result

# Train

In [10]:
from pl_modules.pl_roberta import LitRelRoberta

In [11]:
lang2transformer_name = {
    "en": "roberta-base",
    "es": "PlanTL-GOB-ES/roberta-base-bne",
    "fr": "ClassCat/roberta-base-french",
    #"ja": "nlp-waseda/roberta-base-japanese",
}
assert set(lang2transformer_name.keys()) == set(ALL_LANGS)

In [12]:
train_lang2dataloader = {}
test_lang2dataloader = {}
val_lang2dataloader = {}
anchors_lang2dataloader = {}

for lang in ALL_LANGS:
    transformer_name = lang2transformer_name[lang]
    print(transformer_name)
    lang_tokenizer = AutoTokenizer.from_pretrained(transformer_name)
    train_lang2dataloader[lang] = DataLoader(train_datasets[lang],
                                       num_workers=4,
                                       collate_fn=partial(collate_fn, tokenizer=lang_tokenizer),
                                       shuffle=True,
                                       pin_memory=True,
                                       drop_last=True,
                                       batch_size=16,
                                       )
    
    test_lang2dataloader[lang] = DataLoader(test_datasets[lang],
                                       num_workers=4,
                                       collate_fn=partial(collate_fn, tokenizer=lang_tokenizer),
                                       batch_size=32,
                                       )
    
    val_lang2dataloader[lang] = DataLoader(val_datasets[lang],
                                       num_workers=4,
                                       collate_fn=partial(collate_fn, tokenizer=lang_tokenizer),
                                       batch_size=32,
                                       )
    
    anchors_lang2dataloader[lang] = DataLoader(lang2anchors[lang],
                                       num_workers=4,
                                       pin_memory=True,
                                       collate_fn=partial(collate_fn, tokenizer=lang_tokenizer, cls=False),
                                       batch_size=48,
                                       )

roberta-base
PlanTL-GOB-ES/roberta-base-bne
ClassCat/roberta-base-french


In [13]:
EPOCHS = 5 if fine_grained else 3


def train_network(lang, mode="relative", seed=24, fine_tune=False):
    
    # Create a PyTorch Lightning trainer with the generation callback
    
    if fine_grained:
        title = CHECKPOINT_PATH / 'fine_grained' 
    else:
        title = CHECKPOINT_PATH / 'coarse_grained' 
    
    if fine_tune:
        title = title / f"finetune_{lang}_{mode}_seed{seed}"
    else:
        title = title / f"full_{lang}_{mode}_seed{seed}"
    
    trainer = pl.Trainer(default_root_dir=title, 
                         accelerator="gpu" if str(device).startswith("cuda") else "cpu",
                         devices=1,
                         accumulate_grad_batches=num_labels,
                         max_epochs=EPOCHS, 
                         callbacks=[ModelCheckpoint(save_weights_only=True),
                                    LearningRateMonitor(logging_interval='step')
                                    ])
    
    trainer.logger._log_graph = True         # If True, we plot the computation graph in tensorboard
    trainer.logger._default_hp_metric = None # Optional logging argument that we don't need
    
    transformer_model = lang2transformer_name[lang]
    
    anchor_loader = None
    if mode == "relative":
        anchor_loader = anchors_lang2dataloader[lang]
    
    
    train_loader = train_lang2dataloader[lang]
    
    if fine_tune:
        freq_anchors = len(train_loader)
    else:
        freq_anchors = 100*num_labels
    
    model = LitRelRoberta(num_labels=num_labels,
                          transformer_model=transformer_model,
                          anchor_dataloader=anchor_loader,
                          hidden_size=num_anchors,
                          normalization_mode="batchnorm",
                          output_normalization_mode=None,
                          dropout_prob=0.1,
                          seed=seed,
                          steps=EPOCHS*len(train_loader),
                          weight_decay=0.01, 
                          head_lr=1e-3/num_labels,
                          encoder_lr=1.75e-4/num_labels,
                          layer_decay=0.65,
                          scheduler_act=True,
                          freq_anchors=freq_anchors,
                          device=device,
                          fine_tune=fine_tune
                          )
    
    val_loader = val_lang2dataloader[lang]
   
    trainer.fit(model, train_loader, val_loader)
    
    model.to("cpu")
    del model
    

In [14]:
SEEDS = [1]
TRAIN = False

if TRAIN:
    for seed in tqdm(SEEDS, leave=False, desc="seed"):
        for fine_tune in tqdm([True, False], leave=False, desc="fine_tune"):
            for embedding_type in tqdm(["absolute", "relative"], leave=False, desc="embedding_type"):
                for train_lang in tqdm(ALL_LANGS, leave=False, desc="lang"):
                    train_network(train_lang, mode=embedding_type, seed=seed, fine_tune=fine_tune)
                    clear_output(wait=True)


# Results

In [15]:
def test_model(model, dataloader, title=""):
    preds = []
    model.to(device)
    model.eval()
    with torch.no_grad():
        batch_idx = 0
        for batch, _ in tqdm(dataloader, position=0, leave=True, desc="Computing"+title):
            batch.to(device)
            batch_latents = model(batch_idx=batch_idx, **batch)["prediction"].argmax(-1)
            preds.append(batch_latents)
            batch_idx = 1

    preds = torch.cat(preds, dim=0).detach().cpu().numpy()
    test_y = np.array(test_datasets["en"][target_key])

    precision, recall, fscore, _ = precision_recall_fscore_support(test_y, preds, average="weighted")
    mae = mean_absolute_error(y_true=test_y, y_pred=preds)
    acc = (preds == test_y).mean()
    return precision, recall, acc, fscore, mae

In [16]:
numeric_results = {
    "finetune": {
        "seed": [],
        "embed_type": [],
        "enc_lang": [],
        "dec_lang": [],
        "precision": [],
        "recall": [],
        "acc": [],
        "fscore": [],
        "mae": [],
        "stitched": []
    },
     "full": {
        "seed": [],
        "embed_type": [],
        "enc_lang": [],
        "dec_lang": [],
        "precision": [],
        "recall": [],
        "acc": [],
        "fscore": [],
        "mae": [],
        "stitched": []
    },
}

for seed in [0]:
    models = {
        train_mode: {
            embedding_type: {
                    train_lang: LitRelRoberta.load_from_checkpoint(
                                  CHECKPOINT_PATH / 
                                  f"{'fine_grained' if fine_grained else 'coarse_grained'}/{train_mode}_{train_lang}_{embedding_type}_seed{seed}" /
                                  f"lightning_logs/version_0/checkpoints/{'epoch=4-step=3125.ckpt' if fine_grained else 'epoch=2-step=3750.ckpt'}" )

                    for train_lang in ALL_LANGS
                }
                for embedding_type in ["absolute", "relative"]
        }
        for train_mode in tqdm(["finetune", "full"], leave=True, desc="mode")
    }
    
    for mode in ["finetune", "full"]:
        for embed_type in ["absolute", "relative"]:
            for enc_lang  in ALL_LANGS:
                for dec_lang  in ALL_LANGS:
                    
                    model = models[mode][embed_type][enc_lang].net
                    if embed_type == "relative":
                        model.anchor_dataloader = anchors_lang2dataloader[enc_lang]
                        
                    if enc_lang != dec_lang:
                        model_dec = models[mode][embed_type][dec_lang].net
                        model = StitchingModule(model, model_dec)
                      
                        
                    # The data is paired with its encoder
                    test_loader = test_lang2dataloader[enc_lang]
                    title = f" {mode}_seed{seed}_{embed_type}_{enc_lang}_{dec_lang}"

                    precision, recall, acc, fscore, mae = test_model(model, test_loader, title)
                    numeric_results[mode]["embed_type"].append(embed_type)
                    numeric_results[mode]["enc_lang"].append(enc_lang)
                    numeric_results[mode]["dec_lang"].append(dec_lang)
                    numeric_results[mode]["precision"].append(precision)
                    numeric_results[mode]["recall"].append(recall)
                    numeric_results[mode]["acc"].append(acc)
                    numeric_results[mode]["fscore"].append(fscore)
                    numeric_results[mode]["stitched"].append(enc_lang != dec_lang)
                    numeric_results[mode]["mae"].append(mae)
                    numeric_results[mode]["seed"].append(seed)
    
    for mode in ["finetune", "full"]:
        for embed_type in ["absolute", "relative"]:
            for enc_lang  in ALL_LANGS:
                   del models[mode][embed_type][enc_lang]
                    


mode:   0%|                                                                              | 0/2 [00:00<?, ?it/s]Global seed set to 0
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['roberta.pooler.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Global seed set to 0
Some weights of the model checkpoint at PlanTL-GOB-ES/roberta-base-bne were not used

Global seed set to 0
Some weights of the model checkpoint at ClassCat/roberta-base-french were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
mode: 100%|██████████████████████████████████████████████████████████████████████| 2/2 [00:55<00:00, 27.75s/it]
Computing finetune_seed0_absolute_en_en: 100%|███████████████████████████████| 157/157 [00:16<00:00,  9.51it/s]
Computing finetune_seed0_absolute_en_es: 100%|████████████████

Computing full_seed0_relative_fr_fr: 100%|███████████████████████████████████| 157/157 [00:15<00:00,  9.87it/s]


In [24]:
for mode in ["full"]:
    df = pd.DataFrame(numeric_results[mode])
    df_2 = pd.read_csv(
        RESULT_PATH / f"nlp_multilingual-stitching-amazon-{'fine_grained' if fine_grained else 'coarse_grained'}-finetune-{train_perc}.tsv",
        sep='\t')
    
    df = pd.concat([df, df_2])
    df.to_csv(
        RESULT_PATH / f"nlp_multilingual-stitching-amazon-{'fine_grained' if fine_grained else 'coarse_grained'}-{mode}-{train_perc}.tsv",
        sep="\t",
        index=False
    )

In [22]:
df = pd.read_csv(
    RESULT_PATH / f"nlp_multilingual-stitching-amazon-{'fine_grained' if fine_grained else 'coarse_grained'}-finetune-{train_perc}.tsv",
    sep='\t')


df = df.drop(columns=["stitched", "seed", "precision", "recall"]).groupby(
    ["embed_type", "enc_lang", "dec_lang"]
).agg([np.mean,
       np.std
      ]).round(3)


df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,acc,fscore,fscore,mae,mae
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
embed_type,enc_lang,dec_lang,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
absolute,en,en,0.659,0.0,0.646,0.009,0.398,0.014
absolute,en,es,0.19,0.012,0.07,0.005,1.834,0.045
absolute,en,fr,0.102,0.139,0.036,0.043,1.988,1.114
absolute,es,en,0.37,0.025,0.238,0.006,1.185,0.016
absolute,es,es,0.646,0.002,0.624,0.001,0.427,0.004
absolute,es,fr,0.251,0.202,0.202,0.088,1.755,0.841
absolute,fr,en,0.224,0.008,0.164,0.019,1.02,0.006
absolute,fr,es,0.205,0.055,0.167,0.02,1.82,0.231
absolute,fr,fr,0.599,0.006,0.558,0.012,0.528,0.015
relative,en,en,0.649,0.005,0.626,0.001,0.426,0.007


In [25]:
df = pd.read_csv(
    RESULT_PATH / f"nlp_multilingual-stitching-amazon-{'fine_grained' if fine_grained else 'coarse_grained'}-full-{train_perc}.tsv",
    sep='\t')


df = df.drop(columns=["stitched", "seed", "precision", "recall"]).groupby(
    ["embed_type", "enc_lang", "dec_lang"]
).agg([np.mean,
       np.std
      ]).round(3)


df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,acc,fscore,fscore,mae,mae
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
embed_type,enc_lang,dec_lang,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
absolute,en,en,0.683,0.041,0.675,0.05,0.362,0.062
absolute,en,es,0.234,0.076,0.149,0.136,1.607,0.396
absolute,en,fr,0.143,0.122,0.088,0.095,1.711,0.922
absolute,es,en,0.301,0.121,0.213,0.043,1.397,0.367
absolute,es,es,0.659,0.023,0.643,0.033,0.398,0.05
absolute,es,fr,0.214,0.157,0.187,0.068,1.861,0.622
absolute,fr,en,0.29,0.115,0.235,0.123,0.954,0.115
absolute,fr,es,0.175,0.065,0.148,0.037,1.928,0.248
absolute,fr,fr,0.621,0.039,0.589,0.056,0.479,0.086
relative,en,en,0.677,0.05,0.66,0.059,0.379,0.081
