# **Machine learning for low-resource NLP**: Advancing AI for Linguistic Inclusion
Cross-lingual transfer learning and pseudo-labeling for multilingual named entity recognition

In [1]:
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from IPython.display import display, Markdown

import torch
import torch.optim as optim
from datasets import load_dataset
from sklearn.model_selection import train_test_split

In [2]:
from model import BertBilstmCrf
from dataloader import create_dataloaders
from training import train_model, train_pseudo_labeling, evaluate_epoch
from config import BaseConfig, TrainConfig, FineTuneConfig, PseudoLabelingConfig

In [3]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


set_seed(BaseConfig.RANDOM_STATE)

In [4]:
def load_wikiann_datasets(language_codes, cutoff=None):

    language_data = {}
    for lang in language_codes:

        # Load raw data from hugging face
        lang_dataset = load_dataset("unimelb-nlp/wikiann", name=lang)

        # Get data from different splits and combine
        train_df = pd.DataFrame(lang_dataset["train"])
        val_df = pd.DataFrame(lang_dataset["validation"])
        test_df = pd.DataFrame(lang_dataset["test"])

        complete_df = pd.concat([train_df, val_df, test_df]).reset_index(drop=True)
        complete_df = complete_df.head(cutoff) if cutoff else complete_df

        # Split data into new train/val/test splits
        train, temp = train_test_split(
            complete_df, test_size=0.2, random_state=BaseConfig.RANDOM_STATE
        )
        val, test = train_test_split(
            temp, test_size=0.5, random_state=BaseConfig.RANDOM_STATE
        )

        language_data[lang] = {"train": train, "val": val, "test": test}

    return language_data


# Download and store data
low_resource_datasets = load_wikiann_datasets(BaseConfig.low_resource_langs)

In [5]:
def setup_optimizer(model, CONFIG):
    param_groups = []
    # Check model layers and add appropiate learning rates
    if hasattr(model, "bert"):
        param_groups.append(
            {"params": model.bert.parameters(), "lr": CONFIG.BERT_LEARNING_RATE}
        )
    if hasattr(model, "lstm"):
        param_groups.append(
            {"params": model.lstm.parameters(), "lr": CONFIG.LSTM_LEARNING_RATE}
        )
    if hasattr(model, "crf"):
        param_groups.append(
            {"params": model.crf.parameters(), "lr": CONFIG.CRF_LEARNING_RATE}
        )
    optimizer = optim.Adam(param_groups, weight_decay=CONFIG.WEIGHT_DECAY)

    return optimizer

### Baseline Experiment
Baseline BERT-BiLSTM-CRF model trained on multilingual NER data

In [None]:
baseline_results = []

# Iterate through low-resource languages
for lang, lang_data in tqdm(low_resource_datasets.items(), ncols=80):

    train_loader, val_loader, test_loader = create_dataloaders(lang_data)

    # ------------------------------------------ TRAINING ------------------------------------------ #

    model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
    optimizer = setup_optimizer(model, TrainConfig)
    best_model_state, train_f1, val_f1 = train_model(
        model, optimizer, train_loader, val_loader, TrainConfig
    )

    # ------------------------------------------ EVALUATION ------------------------------------------ #
    eval_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
    eval_model.load_state_dict(best_model_state, TrainConfig)
    test_loss, test_f1 = evaluate_epoch(eval_model, test_loader, TrainConfig)

    # ------------------------------------------ RESULTS ------------------------------------------ #
    torch.save(best_model_state, f"models/{lang}_baseline.pth")

    baseline_results.append(
        {"language": lang, "train_f1": train_f1, "val_f1": val_f1, "test_f1": test_f1}
    )

# Save and display results
baseline = pd.DataFrame(baseline_results)
baseline.to_csv("results/baseline.csv", index=False)

markdown_table = baseline.to_markdown(index=False)
display(Markdown(markdown_table))

100%|█████████████████████████████████████████████| 6/6 [04:31<00:00, 45.20s/it]


| language   |   train_f1 |   val_f1 |   test_f1 |
|:-----------|-----------:|---------:|----------:|
| mg         |   0.993728 | 0.933673 |  0.960352 |
| fo         |   0.974224 | 0.897482 |  0.901099 |
| co         |   0.956204 | 0.852308 |  0.81323  |
| hsb        |   0.951443 | 0.923387 |  0.854578 |
| bh         |   0.981549 | 0.888689 |  0.80212  |
| cv         |   0.977741 | 0.892617 |  0.830443 |

In [6]:
baseline = pd.read_csv("results/baseline.csv")

### Cross-lingual Transfer Learning
A technique where a model trained on one language (usually with more labeled data) is adapted to perform well on another language, leveraging shared linguistic representations.

In [None]:
transfer_results = []

for augmentation_factor in tqdm(range(1, 24), ncols=80):

    high_resource_datasets = load_wikiann_datasets(
        BaseConfig.high_resource_langs, augmentation_factor * 240
    )

    # Iterate through low-resource and adjacent high-resource languages
    for (low_resource_lang, low_resource_data), (
        high_resource_lang,
        high_resource_data,
    ) in tqdm(
        zip(low_resource_datasets.items(), high_resource_datasets.items()),
        ncols=80,
        leave=False,
    ):

        high_train_loader, high_val_loader, _ = create_dataloaders(high_resource_data)
        low_train_loader, low_val_loader, low_test_loader = create_dataloaders(
            low_resource_data
        )

        # ------------------------------------------ PRE-TRAINING ------------------------------------------ #

        high_resource_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
        optimizer = setup_optimizer(high_resource_model, TrainConfig)

        high_resource_model_state, train_f1, val_f1 = train_model(
            high_resource_model,
            optimizer,
            high_train_loader,
            high_val_loader,
            TrainConfig,
        )

        # ------------------------------------------ FINE-TUNING ------------------------------------------ #

        model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
        model.load_state_dict(high_resource_model_state)
        optimizer = setup_optimizer(model, FineTuneConfig)

        best_model_state, train_f1, val_f1 = train_model(
            model, optimizer, low_train_loader, low_val_loader, FineTuneConfig
        )

        # ------------------------------------------ EVALUATION ------------------------------------------ #

        eval_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
        eval_model.load_state_dict(best_model_state)
        test_loss, test_f1 = evaluate_epoch(eval_model, low_test_loader)

        # ------------------------------------------ RESULTS ------------------------------------------ #
        torch.save(
            best_model_state,
            f"models/{low_resource_lang}_{high_resource_lang}_transfer.pth",
        )

        baseline_performance = baseline.loc[
            baseline["language"] == low_resource_lang, "test_f1"
        ].item()
        improvement = (test_f1 - baseline_performance) / baseline_performance * 100

        transfer_results.append(
            {
                "high_resource_language": high_resource_lang,
                "low_resource_lang": low_resource_lang,
                "augmentation_factor": augmentation_factor,
                "train_f1": train_f1,
                "val_f1": val_f1,
                "test_f1": test_f1,
                "improvement": improvement,
            }
        )

        print(
            f"Aug: {augmentation_factor}  {low_resource_lang} Improvement over baseline: {improvement:.5f}"
        )

transfer_data = pd.DataFrame(transfer_results)
transfer_data.to_csv("results/transfer_learning.csv", index=False)

### Iterative Pseudo Labeling
A semi-supervised learning approach where a model generates predictions on unlabeled data, selects confident predictions as pseudo-labels, and retrains iteratively to improve performance.

In [None]:
iterative_pseudo_labeling_results = []

# Iterate through low-resource languages
high_resource_datasets = load_wikiann_datasets(BaseConfig.high_resource_langs, 10000)

for conf_quantile in [0.88, 0.89, 0.9, 0.91, 0.92, 0.93]:
    PseudoLabelingConfig.CONFIDENCE_QUANTILE = conf_quantile
    for _ in range(5):

        # Iterate through low-resource and adjacent high-resource languages
        for (lang, low_resource_data), (_, high_resource_data) in tqdm(
            zip(low_resource_datasets.items(), high_resource_datasets.items()),
            ncols=80,
        ):

            train_loader, val_loader, test_loader = create_dataloaders(
                low_resource_data
            )
            unlabeled_data = high_resource_data["train"]

            # ------------------------------------------ TRAINING ------------------------------------------ #

            model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
            model.load_state_dict(torch.load(f"models/{lang}_baseline.pth"))
            optimizer = setup_optimizer(model, PseudoLabelingConfig)
            best_model_state, train_f1, val_f1 = train_pseudo_labeling(
                model,
                optimizer,
                train_loader,
                val_loader,
                unlabeled_data,
                PseudoLabelingConfig,
            )

            # ------------------------------------------ EVALUATION ------------------------------------------ #

            eval_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
            eval_model.load_state_dict(best_model_state)
            test_loss, test_f1 = evaluate_epoch(eval_model, test_loader)

            # ------------------------------------------ RESULTS ------------------------------------------ #
            torch.save(best_model_state, f"models/{lang}_iterative_pseudo_labeling.pth")

            baseline_performance = baseline.loc[
                baseline["language"] == lang, "test_f1"
            ].item()
            improvement = (test_f1 - baseline_performance) / baseline_performance * 100

            iterative_pseudo_labeling_results.append(
                {
                    "language": lang,
                    "train_f1": train_f1,
                    "val_f1": val_f1,
                    "test_f1": test_f1,
                    "improvement": improvement,
                    "confidence_quantile": conf_quantile,
                }
            )

            print(f"Language: {lang}    Improvement over baseline: {improvement:.5f}")

# Save results
iterative_pseudo_labeling = pd.DataFrame(iterative_pseudo_labeling_results)
iterative_pseudo_labeling.to_csv(
    "results/1full_iterative_pseudo_labeling.csv", index=False
)

1it [04:09, 249.24s/it]

Language: mg    Improvement over baseline: 0.00000
Added 504 rows of data
Added 0 rows of data
Added 0 rows of data


2it [09:45, 300.41s/it]

Language: fo    Improvement over baseline: 1.21951
Added 9 rows of data
Added 0 rows of data


3it [14:37, 296.52s/it]

Language: co    Improvement over baseline: 8.61244
Added 29 rows of data
Added 4 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data


4it [21:26, 340.85s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [24:26, 282.87s/it]

Language: bh    Improvement over baseline: 3.08370


6it [28:30, 285.02s/it]


Language: cv    Improvement over baseline: 0.23202


0it [00:00, ?it/s]

Added 341 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [06:04, 364.86s/it]

Language: mg    Improvement over baseline: 0.45872
Added 302 rows of data


2it [10:35, 309.59s/it]

Language: fo    Improvement over baseline: 1.21951
Added 18 rows of data
Added 0 rows of data
Added 0 rows of data
Added 5 rows of data
Added 0 rows of data
Added 0 rows of data


3it [17:09, 347.94s/it]

Language: co    Improvement over baseline: 2.39234
Added 17 rows of data


4it [21:45, 319.48s/it]

Language: hsb    Improvement over baseline: -0.84034
Added 175 rows of data
Added 0 rows of data
Added 0 rows of data


5it [25:41, 289.51s/it]

Language: bh    Improvement over baseline: 0.44053


6it [29:16, 292.71s/it]


Language: cv    Improvement over baseline: 3.24826


0it [00:00, ?it/s]

Added 380 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 54 rows of data
Added 0 rows of data


1it [06:24, 384.36s/it]

Language: mg    Improvement over baseline: -1.60550


2it [10:24, 299.59s/it]

Language: fo    Improvement over baseline: 2.43902


3it [13:35, 249.78s/it]

Language: co    Improvement over baseline: 7.65550
Added 25 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 2 rows of data
Added 0 rows of data
Added 5 rows of data
Added 2 rows of data
Added 0 rows of data
Added 7 rows of data


4it [23:19, 381.76s/it]

Language: hsb    Improvement over baseline: 0.21008


5it [25:16, 286.48s/it]

Language: bh    Improvement over baseline: 3.08370


6it [29:24, 294.16s/it]


Language: cv    Improvement over baseline: 0.23202


0it [00:00, ?it/s]

Added 369 rows of data


1it [04:17, 257.80s/it]

Language: mg    Improvement over baseline: 0.45872
Added 397 rows of data
Added 0 rows of data


2it [09:19, 283.34s/it]

Language: fo    Improvement over baseline: 1.21951
Added 11 rows of data
Added 1 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [16:47, 358.83s/it]

Language: co    Improvement over baseline: 9.56938
Added 19 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data
Added 2 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [27:24, 468.74s/it]

Language: hsb    Improvement over baseline: -3.36134


5it [29:43, 349.76s/it]

Language: bh    Improvement over baseline: 1.32159
Added 123 rows of data


6it [34:16, 342.70s/it]


Language: cv    Improvement over baseline: 2.32019


1it [03:50, 230.22s/it]

Language: mg    Improvement over baseline: 0.45872
Added 275 rows of data
Added 0 rows of data


2it [08:59, 276.56s/it]

Language: fo    Improvement over baseline: 2.78746
Added 11 rows of data
Added 0 rows of data


3it [13:54, 285.18s/it]

Language: co    Improvement over baseline: 10.04785


4it [17:05, 247.82s/it]

Language: hsb    Improvement over baseline: -0.84034


5it [19:09, 203.35s/it]

Language: bh    Improvement over baseline: 0.44053
Added 169 rows of data
Added 0 rows of data


6it [24:16, 242.77s/it]


Language: cv    Improvement over baseline: 1.16009


1it [03:25, 205.80s/it]

Language: mg    Improvement over baseline: 0.22936
Added 455 rows of data
Added 0 rows of data
Added 0 rows of data
Added 13 rows of data
Added 4 rows of data


2it [09:58, 315.52s/it]

Language: fo    Improvement over baseline: 1.21951
Added 15 rows of data


3it [14:19, 290.60s/it]

Language: co    Improvement over baseline: 5.74163
Added 23 rows of data


4it [19:00, 286.97s/it]

Language: hsb    Improvement over baseline: 1.47059


5it [21:04, 228.25s/it]

Language: bh    Improvement over baseline: 1.76211


6it [23:53, 238.96s/it]


Language: cv    Improvement over baseline: 1.62413


0it [00:00, ?it/s]

Added 322 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [06:10, 370.24s/it]

Language: mg    Improvement over baseline: 0.45872


2it [10:13, 295.51s/it]

Language: fo    Improvement over baseline: 1.56794
Added 11 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data


3it [17:43, 366.18s/it]

Language: co    Improvement over baseline: 5.74163


4it [20:04, 277.32s/it]

Language: hsb    Improvement over baseline: -4.83193


5it [21:46, 214.08s/it]

Language: bh    Improvement over baseline: 1.98238
Added 90 rows of data
Added 0 rows of data


6it [26:46, 267.67s/it]


Language: cv    Improvement over baseline: 2.78422


0it [00:00, ?it/s]

Added 302 rows of data
Added 0 rows of data


1it [04:47, 287.71s/it]

Language: mg    Improvement over baseline: 0.45872
Added 249 rows of data
Added 0 rows of data
Added 0 rows of data


2it [10:14, 310.82s/it]

Language: fo    Improvement over baseline: 2.26481


3it [13:46, 265.65s/it]

Language: co    Improvement over baseline: 9.09091
Added 20 rows of data
Added 0 rows of data
Added 6 rows of data
Added 2 rows of data
Added 4 rows of data
Added 20 rows of data


4it [20:45, 326.26s/it]

Language: hsb    Improvement over baseline: -1.47059
Added 239 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


5it [25:26, 309.68s/it]

Language: bh    Improvement over baseline: 3.96476


6it [29:28, 294.81s/it]


Language: cv    Improvement over baseline: 1.39211


1it [03:47, 227.72s/it]

Language: mg    Improvement over baseline: 0.45872


2it [06:30, 189.70s/it]

Language: fo    Improvement over baseline: -0.87108
Added 14 rows of data
Added 0 rows of data


3it [11:18, 234.39s/it]

Language: co    Improvement over baseline: 9.09091
Added 16 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [17:20, 284.69s/it]

Language: hsb    Improvement over baseline: 0.63025


5it [19:37, 231.58s/it]

Language: bh    Improvement over baseline: 1.54185
Added 74 rows of data
Added 0 rows of data
Added 0 rows of data


6it [25:00, 250.01s/it]


Language: cv    Improvement over baseline: 3.24826


1it [03:53, 233.61s/it]

Language: mg    Improvement over baseline: 0.45872


2it [06:40, 194.49s/it]

Language: fo    Improvement over baseline: 1.04530


3it [09:45, 190.18s/it]

Language: co    Improvement over baseline: 6.69856
Added 44 rows of data
Added 1 rows of data
Added 0 rows of data
Added 0 rows of data


4it [15:39, 254.76s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [18:18, 220.08s/it]

Language: bh    Improvement over baseline: 3.30396
Added 1 rows of data


6it [22:52, 228.72s/it]


Language: cv    Improvement over baseline: 4.40835


0it [00:00, ?it/s]

Added 286 rows of data
Added 0 rows of data
Added 0 rows of data
Added 2 rows of data


1it [05:34, 334.65s/it]

Language: mg    Improvement over baseline: 0.45872
Added 379 rows of data
Added 0 rows of data


2it [10:33, 313.71s/it]

Language: fo    Improvement over baseline: 0.17422
Added 16 rows of data


3it [14:55, 289.79s/it]

Language: co    Improvement over baseline: 8.61244
Added 24 rows of data
Added 0 rows of data
Added 2 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 2 rows of data
Added 0 rows of data
Added 7 rows of data
Added 0 rows of data
Added 2 rows of data
Added 2 rows of data
Added 2 rows of data


4it [26:00, 438.20s/it]

Language: hsb    Improvement over baseline: 0.21008


5it [29:04, 346.58s/it]

Language: bh    Improvement over baseline: 4.84581
Added 40 rows of data


6it [33:44, 337.43s/it]


Language: cv    Improvement over baseline: 3.94432


1it [03:21, 201.99s/it]

Language: mg    Improvement over baseline: 0.45872
Added 426 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 8 rows of data


2it [09:56, 315.45s/it]

Language: fo    Improvement over baseline: 0.69686
Added 9 rows of data
Added 3 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [18:10, 396.67s/it]

Language: co    Improvement over baseline: 8.61244
Added 19 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [24:13, 383.41s/it]

Language: hsb    Improvement over baseline: -2.10084


5it [26:55, 303.49s/it]

Language: bh    Improvement over baseline: 6.60793


6it [30:33, 305.62s/it]


Language: cv    Improvement over baseline: 0.92807


1it [02:58, 178.41s/it]

Language: mg    Improvement over baseline: 0.45872


2it [06:10, 186.65s/it]

Language: fo    Improvement over baseline: 1.21951
Added 10 rows of data
Added 7 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [14:06, 318.82s/it]

Language: co    Improvement over baseline: 9.09091
Added 37 rows of data


4it [18:42, 301.59s/it]

Language: hsb    Improvement over baseline: -1.05042
Added 117 rows of data
Added 0 rows of data
Added 12 rows of data
Added 0 rows of data


5it [22:58, 285.40s/it]

Language: bh    Improvement over baseline: 6.16740


6it [27:10, 271.81s/it]


Language: cv    Improvement over baseline: 3.94432


0it [00:00, ?it/s]

Added 307 rows of data


1it [04:10, 250.73s/it]

Language: mg    Improvement over baseline: 0.22936
Added 355 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data


2it [10:40, 332.59s/it]

Language: fo    Improvement over baseline: 0.17422


3it [13:53, 269.02s/it]

Language: co    Improvement over baseline: 9.56938
Added 17 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 6 rows of data
Added 0 rows of data
Added 2 rows of data
Added 0 rows of data
Added 0 rows of data
Added 2 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [23:51, 398.89s/it]

Language: hsb    Improvement over baseline: 2.10084
Added 210 rows of data
Added 1 rows of data
Added 0 rows of data


5it [27:59, 344.29s/it]

Language: bh    Improvement over baseline: 3.96476
Added 101 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


6it [34:28, 344.71s/it]


Language: cv    Improvement over baseline: 0.23202


0it [00:00, ?it/s]

Added 277 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data
Added 128 rows of data
Added 0 rows of data
Added 4 rows of data


1it [06:52, 412.86s/it]

Language: mg    Improvement over baseline: -0.45872
Added 389 rows of data
Added 0 rows of data
Added 0 rows of data


2it [12:24, 364.92s/it]

Language: fo    Improvement over baseline: 0.87108
Added 4 rows of data
Added 0 rows of data
Added 1 rows of data
Added 0 rows of data
Added 2 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [20:18, 414.78s/it]

Language: co    Improvement over baseline: 2.39234
Added 12 rows of data
Added 0 rows of data
Added 25 rows of data


4it [25:53, 383.41s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [27:34, 281.66s/it]

Language: bh    Improvement over baseline: 3.30396


6it [31:18, 313.12s/it]


Language: cv    Improvement over baseline: 3.48028


1it [03:50, 230.16s/it]

Language: mg    Improvement over baseline: 0.45872


2it [06:59, 206.10s/it]

Language: fo    Improvement over baseline: 3.83275


3it [10:10, 199.23s/it]

Language: co    Improvement over baseline: 6.69856
Added 8 rows of data
Added 0 rows of data
Added 1 rows of data


4it [15:39, 250.40s/it]

Language: hsb    Improvement over baseline: -1.47059
Added 103 rows of data


5it [18:56, 231.24s/it]

Language: bh    Improvement over baseline: 2.86344
Added 13 rows of data
Added 0 rows of data


6it [23:56, 239.34s/it]


Language: cv    Improvement over baseline: 2.32019


0it [00:00, ?it/s]

Added 350 rows of data


1it [04:11, 251.41s/it]

Language: mg    Improvement over baseline: 0.00000
Added 378 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 13 rows of data


2it [10:47, 336.69s/it]

Language: fo    Improvement over baseline: 3.31010
Added 9 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [19:12, 413.45s/it]

Language: co    Improvement over baseline: 6.69856
Added 5 rows of data
Added 0 rows of data
Added 0 rows of data
Added 2 rows of data


4it [25:14, 393.19s/it]

Language: hsb    Improvement over baseline: -3.78151


5it [27:36, 302.54s/it]

Language: bh    Improvement over baseline: 4.84581


6it [30:22, 303.68s/it]


Language: cv    Improvement over baseline: 1.16009


1it [03:47, 227.81s/it]

Language: mg    Improvement over baseline: 0.45872
Added 323 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 9 rows of data


2it [10:16, 322.27s/it]

Language: fo    Improvement over baseline: 1.39373


3it [13:49, 272.72s/it]

Language: co    Improvement over baseline: 9.09091
Added 13 rows of data
Added 0 rows of data


4it [18:56, 286.23s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [20:57, 226.43s/it]

Language: bh    Improvement over baseline: 1.76211
Added 75 rows of data
Added 0 rows of data


6it [25:53, 259.00s/it]


Language: cv    Improvement over baseline: 1.85615


0it [00:00, ?it/s]

Added 354 rows of data
Added 0 rows of data
Added 0 rows of data


1it [05:09, 309.35s/it]

Language: mg    Improvement over baseline: 0.45872
Added 268 rows of data
Added 0 rows of data
Added 0 rows of data
Added 4 rows of data
Added 2 rows of data


2it [11:38, 356.49s/it]

Language: fo    Improvement over baseline: 0.52265


3it [14:48, 280.20s/it]

Language: co    Improvement over baseline: 6.22010
Added 9 rows of data
Added 0 rows of data
Added 4 rows of data
Added 0 rows of data
Added 1 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data
Added 156 rows of data


4it [24:59, 410.76s/it]

Language: hsb    Improvement over baseline: -2.31092
Added 207 rows of data
Added 0 rows of data
Added 0 rows of data


5it [29:03, 350.79s/it]

Language: bh    Improvement over baseline: 2.20264


6it [33:04, 330.80s/it]


Language: cv    Improvement over baseline: -0.23202


1it [02:07, 127.29s/it]

Language: mg    Improvement over baseline: 0.22936


2it [05:23, 167.95s/it]

Language: fo    Improvement over baseline: -0.87108
Added 8 rows of data
Added 5 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [12:24, 283.47s/it]

Language: co    Improvement over baseline: 7.17703
Added 11 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [19:41, 343.92s/it]

Language: hsb    Improvement over baseline: 1.26050
Added 200 rows of data
Added 0 rows of data
Added 0 rows of data


5it [23:44, 307.53s/it]

Language: bh    Improvement over baseline: 5.06608
Added 46 rows of data
Added 0 rows of data
Added 0 rows of data
Added 9 rows of data


6it [29:39, 296.51s/it]


Language: cv    Improvement over baseline: 3.24826


1it [03:48, 228.56s/it]

Language: mg    Improvement over baseline: 0.45872
Added 308 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [09:56, 310.52s/it]

Language: fo    Improvement over baseline: 0.69686


3it [13:03, 254.00s/it]

Language: co    Improvement over baseline: 7.17703
Added 5 rows of data
Added 2 rows of data
Added 2 rows of data


4it [18:27, 281.86s/it]

Language: hsb    Improvement over baseline: 1.26050
Added 218 rows of data


5it [21:53, 254.24s/it]

Language: bh    Improvement over baseline: 5.06608
Added 20 rows of data
Added 0 rows of data
Added 0 rows of data


6it [27:26, 274.37s/it]


Language: cv    Improvement over baseline: 2.55220


1it [03:53, 233.91s/it]

Language: mg    Improvement over baseline: 0.45872
Added 275 rows of data


2it [08:35, 261.94s/it]

Language: fo    Improvement over baseline: 1.21951


3it [12:16, 243.25s/it]

Language: co    Improvement over baseline: 3.34928
Added 5 rows of data
Added 1 rows of data


4it [17:11, 263.79s/it]

Language: hsb    Improvement over baseline: -2.10084


5it [20:03, 230.56s/it]

Language: bh    Improvement over baseline: 1.76211


6it [24:14, 242.37s/it]


Language: cv    Improvement over baseline: 2.78422


1it [03:46, 226.15s/it]

Language: mg    Improvement over baseline: 0.22936


2it [07:50, 236.83s/it]

Language: fo    Improvement over baseline: 1.04530


3it [11:22, 225.72s/it]

Language: co    Improvement over baseline: 11.48325
Added 10 rows of data
Added 0 rows of data
Added 0 rows of data


4it [17:05, 271.67s/it]

Language: hsb    Improvement over baseline: -1.26050


5it [19:19, 222.27s/it]

Language: bh    Improvement over baseline: 3.74449
Added 4 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


6it [25:40, 256.67s/it]


Language: cv    Improvement over baseline: 4.64037


0it [00:00, ?it/s]

Added 354 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 25 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 12 rows of data
Added 0 rows of data
Added 2 rows of data
Added 87 rows of data


1it [11:40, 700.19s/it]

Language: mg    Improvement over baseline: -0.91743
Added 318 rows of data
Added 0 rows of data


2it [17:32, 495.31s/it]

Language: fo    Improvement over baseline: 2.09059


3it [21:06, 367.00s/it]

Language: co    Improvement over baseline: 4.30622
Added 1 rows of data
Added 0 rows of data
Added 0 rows of data
Added 3 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data
Added 0 rows of data


4it [29:26, 419.46s/it]

Language: hsb    Improvement over baseline: -0.42017


5it [31:52, 320.86s/it]

Language: bh    Improvement over baseline: 3.74449


6it [35:33, 355.58s/it]


Language: cv    Improvement over baseline: 0.23202


0it [00:00, ?it/s]

Added 298 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [05:32, 332.55s/it]

Language: mg    Improvement over baseline: 0.91743


2it [09:40, 282.98s/it]

Language: fo    Improvement over baseline: 0.87108


3it [13:37, 261.68s/it]

Language: co    Improvement over baseline: 10.04785


4it [15:54, 212.78s/it]

Language: hsb    Improvement over baseline: -5.67227
Added 147 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 10 rows of data


5it [20:33, 236.66s/it]

Language: bh    Improvement over baseline: 1.98238


6it [24:39, 246.55s/it]


Language: cv    Improvement over baseline: -2.32019


1it [03:44, 224.86s/it]

Language: mg    Improvement over baseline: 0.45872
Added 343 rows of data
Added 0 rows of data


2it [08:39, 265.70s/it]

Language: fo    Improvement over baseline: 1.56794


3it [11:17, 216.80s/it]

Language: co    Improvement over baseline: 9.09091
Added 6 rows of data
Added 0 rows of data
Added 0 rows of data


4it [16:40, 258.79s/it]

Language: hsb    Improvement over baseline: -0.84034
Added 110 rows of data


5it [19:52, 234.65s/it]

Language: bh    Improvement over baseline: -0.66079


6it [23:31, 235.32s/it]


Language: cv    Improvement over baseline: 2.08817


0it [00:00, ?it/s]

Added 322 rows of data
Added 0 rows of data


1it [04:35, 275.69s/it]

Language: mg    Improvement over baseline: 0.45872
Added 308 rows of data
Added 0 rows of data
Added 0 rows of data
Added 16 rows of data
Added 1 rows of data


2it [11:04, 342.32s/it]

Language: fo    Improvement over baseline: 2.43902


3it [14:46, 287.26s/it]

Language: co    Improvement over baseline: 2.39234
Added 10 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data


4it [23:00, 368.99s/it]

Language: hsb    Improvement over baseline: 0.84034
Added 124 rows of data
Added 0 rows of data
Added 0 rows of data
Added 11 rows of data
Added 1 rows of data


5it [27:58, 343.37s/it]

Language: bh    Improvement over baseline: 2.64317


6it [31:39, 316.59s/it]


Language: cv    Improvement over baseline: -0.69606


1it [04:09, 249.00s/it]

Language: mg    Improvement over baseline: 0.22936
Added 249 rows of data


2it [09:09, 279.45s/it]

Language: fo    Improvement over baseline: 0.52265


3it [12:13, 235.70s/it]

Language: co    Improvement over baseline: 5.26316
Added 0 rows of data
Added 2 rows of data
Added 0 rows of data
Added 12 rows of data
Added 3 rows of data
Added 8 rows of data
Added 0 rows of data
Added 1 rows of data
Added 0 rows of data


4it [21:21, 359.07s/it]

Language: hsb    Improvement over baseline: 2.31092


5it [23:39, 279.47s/it]

Language: bh    Improvement over baseline: 2.20264


6it [26:54, 269.15s/it]


Language: cv    Improvement over baseline: 0.46404


1it [03:38, 218.30s/it]

Language: mg    Improvement over baseline: 0.45872


2it [06:41, 197.81s/it]

Language: fo    Improvement over baseline: -2.43902


3it [10:46, 219.06s/it]

Language: co    Improvement over baseline: 9.09091
Added 2 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [18:18, 311.05s/it]

Language: hsb    Improvement over baseline: -0.63025
Added 147 rows of data
Added 0 rows of data
Added 4 rows of data
Added 0 rows of data


5it [22:49, 296.77s/it]

Language: bh    Improvement over baseline: 1.76211
Added 0 rows of data
Added 0 rows of data


6it [27:52, 278.67s/it]


Language: cv    Improvement over baseline: 1.62413


0it [00:00, ?it/s]

Added 257 rows of data
Added 0 rows of data
Added 2 rows of data
Added 86 rows of data
Added 4 rows of data
Added 13 rows of data
Added 5 rows of data
Added 0 rows of data
Added 41 rows of data


1it [08:12, 492.54s/it]

Language: mg    Improvement over baseline: -0.45872
Added 257 rows of data
Added 0 rows of data
Added 0 rows of data
Added 2 rows of data
Added 5 rows of data


2it [14:54, 439.03s/it]

Language: fo    Improvement over baseline: 2.78746
Added 5 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data
Added 1 rows of data


3it [22:30, 446.86s/it]

Language: co    Improvement over baseline: 8.61244
Added 6 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data


4it [29:34, 437.97s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [31:20, 318.10s/it]

Language: bh    Improvement over baseline: 1.10132
Added 2 rows of data
Added 0 rows of data


6it [36:46, 367.81s/it]

Language: cv    Improvement over baseline: 0.23202





9:

class PseudoLabelingConfig(BaseConfig):
    EPOCHS                = 25
    PATIENCE              = 5
    BERT_LEARNING_RATE    = 0.00002
    LSTM_LEARNING_RATE    = 0.003
    CRF_LEARNING_RATE     = 0.00003

    CONFIDENCE_QUANTILE   = 0.95
    PSEUDO_DELAY          = 8
    ENTROPY_THRESHOLD     = 0.2

10:

class PseudoLabelingConfig(BaseConfig):
    EPOCHS                = 25
    PATIENCE              = 5
    BERT_LEARNING_RATE    = 0.00002
    LSTM_LEARNING_RATE    = 0.003
    CRF_LEARNING_RATE     = 0.00003

    CONFIDENCE_QUANTILE   = 0.90
    PSEUDO_DELAY          = 8
    ENTROPY_THRESHOLD     = 0.15

In [20]:
df = pd.read_csv("results/full_iterative_pseudo_labeling.csv")
df

Unnamed: 0,language,train_f1,val_f1,test_f1,improvement,confidence_quantile
0,mg,0.999543,0.943878,0.960352,0.000000,0.88
1,fo,0.992799,0.908273,0.912088,1.219512,0.88
2,co,0.998431,0.852308,0.883268,8.612440,0.88
3,hsb,0.963006,0.933468,0.838420,-1.890756,0.88
4,bh,0.995533,0.903052,0.826855,3.083700,0.88
...,...,...,...,...,...,...
175,fo,0.995790,0.917266,0.926217,2.787456,0.93
176,co,0.991126,0.870769,0.883268,8.612440,0.93
177,hsb,0.989406,0.951613,0.838420,-1.890756,0.93
178,bh,0.977077,0.881508,0.810954,1.101322,0.93


In [21]:
df[df["language"] == "hsb"]

Unnamed: 0,language,train_f1,val_f1,test_f1,improvement,confidence_quantile
3,hsb,0.963006,0.933468,0.83842,-1.890756,0.88
9,hsb,0.982419,0.939516,0.847397,-0.840336,0.88
15,hsb,0.982341,0.96371,0.856373,0.210084,0.88
21,hsb,0.989991,0.961694,0.825853,-3.361345,0.88
27,hsb,0.971733,0.933468,0.847397,-0.840336,0.88
33,hsb,0.985929,0.939516,0.867145,1.470588,0.89
39,hsb,0.940353,0.917339,0.813285,-4.831933,0.89
45,hsb,0.973022,0.959677,0.842011,-1.470588,0.89
51,hsb,0.99649,0.941532,0.859964,0.630252,0.89
57,hsb,0.989977,0.947581,0.83842,-1.890756,0.89
