# **Machine learning for low-resource NLP**: Advancing AI for Linguistic Inclusion
Cross-lingual transfer learning and pseudo-labeling for multilingual named entity recognition

In [1]:
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from IPython.display import display, Markdown

import torch
import torch.optim as optim
from datasets import load_dataset
from sklearn.model_selection import train_test_split

In [2]:
from model import BertBilstmCrf
from dataloader import create_dataloaders
from training import train_model, train_pseudo_labeling, evaluate_epoch
from config import BaseConfig, TrainConfig, FineTuneConfig, PseudoLabelingConfig

In [3]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


set_seed(BaseConfig.RANDOM_STATE)

In [4]:
def load_wikiann_datasets(language_codes, cutoff=None):

    language_data = {}
    for lang in language_codes:

        # Load raw data from hugging face
        lang_dataset = load_dataset("unimelb-nlp/wikiann", name=lang)

        # Get data from different splits and combine
        train_df = pd.DataFrame(lang_dataset["train"])
        val_df = pd.DataFrame(lang_dataset["validation"])
        test_df = pd.DataFrame(lang_dataset["test"])

        complete_df = pd.concat([train_df, val_df, test_df]).reset_index(drop=True)
        complete_df = complete_df.head(cutoff) if cutoff else complete_df

        # Split data into new train/val/test splits
        train, temp = train_test_split(
            complete_df, test_size=0.2, random_state=BaseConfig.RANDOM_STATE
        )
        val, test = train_test_split(
            temp, test_size=0.5, random_state=BaseConfig.RANDOM_STATE
        )

        language_data[lang] = {"train": train, "val": val, "test": test}

    return language_data


# Download and store data
low_resource_datasets = load_wikiann_datasets(BaseConfig.low_resource_langs)

In [5]:
def setup_optimizer(model, CONFIG):
    param_groups = []
    # Check model layers and add appropiate learning rates
    if hasattr(model, "bert"):
        param_groups.append(
            {"params": model.bert.parameters(), "lr": TrainConfig.BERT_LEARNING_RATE}
        )
    if hasattr(model, "lstm"):
        param_groups.append(
            {"params": model.lstm.parameters(), "lr": TrainConfig.LSTM_LEARNING_RATE}
        )
    if hasattr(model, "crf"):
        param_groups.append(
            {"params": model.crf.parameters(), "lr": TrainConfig.CRF_LEARNING_RATE}
        )
    optimizer = optim.Adam(param_groups, weight_decay=TrainConfig.WEIGHT_DECAY)

    return optimizer

### Baseline Experiment
Baseline BERT-BiLSTM-CRF model trained on multilingual NER data

In [None]:
baseline_results = []

# Iterate through low-resource languages
for lang, lang_data in tqdm(low_resource_datasets.items(), ncols=80):

    train_loader, val_loader, test_loader = create_dataloaders(lang_data)

    # ------------------------------------------ TRAINING ------------------------------------------ #

    model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
    optimizer = setup_optimizer(model, TrainConfig)
    best_model_state, train_f1, val_f1 = train_model(
        model, optimizer, train_loader, val_loader, TrainConfig
    )

    # ------------------------------------------ EVALUATION ------------------------------------------ #
    eval_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
    eval_model.load_state_dict(best_model_state, TrainConfig)
    test_loss, test_f1 = evaluate_epoch(eval_model, test_loader)

    # ------------------------------------------ RESULTS ------------------------------------------ #
    torch.save(best_model_state, f"models/{lang}_baseline.pth")

    baseline_results.append(
        {"language": lang, "train_f1": train_f1, "val_f1": val_f1, "test_f1": test_f1}
    )

# Save and display results
baseline = pd.DataFrame(baseline_results)
baseline.to_csv("results/baseline.csv", index=False)

markdown_table = baseline.to_markdown(index=False)
display(Markdown(markdown_table))

100%|█████████████████████████████████████████████| 6/6 [05:28<00:00, 54.80s/it]


| language   |   train_f1 |   val_f1 |   test_f1 |
|:-----------|-----------:|---------:|----------:|
| mg         |   0.993728 | 0.933673 |  0.960352 |
| fo         |   0.974224 | 0.897482 |  0.901099 |
| co         |   0.956204 | 0.852308 |  0.81323  |
| hsb        |   0.951443 | 0.923387 |  0.854578 |
| bh         |   0.981549 | 0.888689 |  0.80212  |
| cv         |   0.977741 | 0.892617 |  0.830443 |

### Cross-lingual Transfer Learning
A technique where a model trained on one language (usually with more labeled data) is adapted to perform well on another language, leveraging shared linguistic representations.

In [7]:
transfer_results = []

for augmentation_factor in tqdm(range(1, 24), ncols=80):

    high_resource_datasets = load_wikiann_datasets(
        BaseConfig.high_resource_langs, augmentation_factor * 240
    )

    # Iterate through low-resource and adjacent high-resource languages
    for (low_resource_lang, low_resource_data), (
        high_resource_lang,
        high_resource_data,
    ) in tqdm(
        zip(low_resource_datasets.items(), high_resource_datasets.items()),
        ncols=80,
        leave=False,
    ):

        high_train_loader, high_val_loader, _ = create_dataloaders(high_resource_data)
        low_train_loader, low_val_loader, low_test_loader = create_dataloaders(
            low_resource_data
        )

        # ------------------------------------------ PRE-TRAINING ------------------------------------------ #

        high_resource_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
        optimizer = setup_optimizer(high_resource_model, TrainConfig)

        high_resource_model_state, train_f1, val_f1 = train_model(
            high_resource_model,
            optimizer,
            high_train_loader,
            high_val_loader,
            TrainConfig,
        )

        # ------------------------------------------ FINE-TUNING ------------------------------------------ #

        model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
        model.load_state_dict(high_resource_model_state)
        optimizer = setup_optimizer(model, FineTuneConfig)

        best_model_state, train_f1, val_f1 = train_model(
            model, optimizer, low_train_loader, low_val_loader, FineTuneConfig
        )

        # ------------------------------------------ EVALUATION ------------------------------------------ #

        eval_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
        eval_model.load_state_dict(best_model_state)
        test_loss, test_f1 = evaluate_epoch(eval_model, low_test_loader)

        # ------------------------------------------ RESULTS ------------------------------------------ #
        torch.save(
            best_model_state,
            f"models/{low_resource_lang}_{high_resource_lang}_transfer.pth",
        )

        baseline_performance = baseline.loc[
            baseline["language"] == low_resource_lang, "test_f1"
        ].item()
        improvement = (test_f1 - baseline_performance) / baseline_performance * 100

        transfer_results.append(
            {
                "high_resource_language": high_resource_lang,
                "low_resource_lang": low_resource_lang,
                "augmentation_factor": augmentation_factor,
                "train_f1": train_f1,
                "val_f1": val_f1,
                "test_f1": test_f1,
                "improvement": improvement,
            }
        )

        print(
            f"Aug: {augmentation_factor}  {low_resource_lang} Improvement over baseline: {improvement:.5f}"
        )

transfer_data = pd.DataFrame(transfer_results)
transfer_data.to_csv("results/test_transfer_learning.csv", index=False)

  0%|                                                    | 0/23 [00:00<?, ?it/s]

Aug: 1  mg Improvement over baseline: 0.00000




Aug: 1  fo Improvement over baseline: -0.34843




Aug: 1  co Improvement over baseline: 3.34928




Aug: 1  hsb Improvement over baseline: -2.94118




Aug: 1  bh Improvement over baseline: 8.59031


  4%|█▊                                       | 1/23 [06:41<2:27:19, 401.80s/it]

Aug: 1  cv Improvement over baseline: 3.24826


  4%|█▊                                       | 1/23 [07:28<2:44:24, 448.37s/it]


KeyboardInterrupt: 

### Iterative Pseudo Labeling
A semi-supervised learning approach where a model generates predictions on unlabeled data, selects confident predictions as pseudo-labels, and retrains iteratively to improve performance.

In [None]:
iterative_pseudo_labeling_results = []

# Iterate through low-resource languages
high_resource_datasets = load_wikiann_datasets(BaseConfig.high_resource_langs, 10000)

for conf_quantile in [
    0.82,
    0.83,
    0.84,
    0.85,
    0.86,
    0.87,
    0.88,
    0.89,
    0.9,
    0.91,
    0.92,
    0.93,
    0.94,
    0.95,
    0.96,
    0.97,
    0.98,
    0.99,
]:
    PseudoLabelingConfig.CONFIDENCE_QUANTILE = conf_quantile
    for _ in range(3):

        # Iterate through low-resource and adjacent high-resource languages
        for (lang, low_resource_data), (_, high_resource_data) in tqdm(
            zip(low_resource_datasets.items(), high_resource_datasets.items()),
            ncols=80,
        ):

            train_loader, val_loader, test_loader = create_dataloaders(
                low_resource_data
            )
            unlabeled_data = high_resource_data["train"]

            # ------------------------------------------ TRAINING ------------------------------------------ #

            model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
            model.load_state_dict(torch.load(f"models/{lang}_baseline.pth"))
            optimizer = setup_optimizer(model, PseudoLabelingConfig)
            best_model_state, train_f1, val_f1 = train_pseudo_labeling(
                model,
                optimizer,
                train_loader,
                val_loader,
                unlabeled_data,
                PseudoLabelingConfig,
            )

            # ------------------------------------------ EVALUATION ------------------------------------------ #

            eval_model = BertBilstmCrf(BaseConfig.NUM_TAGS).to(BaseConfig.DEVICE)
            eval_model.load_state_dict(best_model_state)
            test_loss, test_f1 = evaluate_epoch(eval_model, test_loader)

            # ------------------------------------------ RESULTS ------------------------------------------ #
            torch.save(best_model_state, f"models/{lang}_iterative_pseudo_labeling.pth")

            baseline_performance = baseline.loc[
                baseline["language"] == lang, "test_f1"
            ].item()
            improvement = (test_f1 - baseline_performance) / baseline_performance * 100

            iterative_pseudo_labeling_results.append(
                {
                    "language": lang,
                    "train_f1": train_f1,
                    "val_f1": val_f1,
                    "test_f1": test_f1,
                    "improvement": improvement,
                    "confidence_quantile": conf_quantile,
                }
            )

            print(f"Language: {lang}    Improvement over baseline: {improvement:.5f}")

# Save results
iterative_pseudo_labeling = pd.DataFrame(iterative_pseudo_labeling_results)
iterative_pseudo_labeling.to_csv(
    "results/second_full_iterative_pseudo_labeling.csv", index=False
)

0it [00:00, ?it/s]

Added 467 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [08:42, 522.31s/it]

Language: mg    Improvement over baseline: 0.22936


2it [12:53, 363.06s/it]

Language: fo    Improvement over baseline: -2.09059


3it [16:56, 308.25s/it]

Language: co    Improvement over baseline: -1.91388
Added 104 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [26:06, 403.51s/it]

Language: hsb    Improvement over baseline: -2.31092


5it [30:04, 343.79s/it]

Language: bh    Improvement over baseline: -0.44053


6it [35:04, 350.80s/it]


Language: cv    Improvement over baseline: 4.17633


0it [00:00, ?it/s]

Added 365 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [08:42, 522.73s/it]

Language: mg    Improvement over baseline: 0.00000


2it [12:37, 353.17s/it]

Language: fo    Improvement over baseline: -1.56794


3it [15:04, 259.21s/it]

Language: co    Improvement over baseline: 0.47847
Added 113 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [21:32, 310.06s/it]

Language: hsb    Improvement over baseline: -1.26050


5it [23:57, 250.50s/it]

Language: bh    Improvement over baseline: 4.84581


6it [27:34, 275.75s/it]


Language: cv    Improvement over baseline: 5.33643


1it [04:06, 246.83s/it]

Language: mg    Improvement over baseline: 0.45872


2it [07:48, 231.96s/it]

Language: fo    Improvement over baseline: 0.69686
Added 11 rows of data


3it [12:56, 266.57s/it]

Language: co    Improvement over baseline: 8.13397
Added 1559 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [21:34, 365.88s/it]

Language: hsb    Improvement over baseline: -0.42017


5it [23:33, 276.93s/it]

Language: bh    Improvement over baseline: -1.98238
Added 594 rows of data


6it [28:50, 288.36s/it]


Language: cv    Improvement over baseline: 2.32019


0it [00:00, ?it/s]

Added 303 rows of data


1it [04:53, 293.86s/it]

Language: mg    Improvement over baseline: 0.45872
Added 751 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [12:06, 375.49s/it]

Language: fo    Improvement over baseline: -1.39373


3it [15:02, 284.19s/it]

Language: co    Improvement over baseline: 0.00000
Added 197 rows of data
Added 0 rows of data
Added 0 rows of data


4it [22:17, 343.99s/it]

Language: hsb    Improvement over baseline: -2.52101
Added 151 rows of data
Added 0 rows of data


5it [26:58, 321.03s/it]

Language: bh    Improvement over baseline: 3.96476
Added 717 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


6it [34:34, 345.69s/it]


Language: cv    Improvement over baseline: 4.87239


1it [02:42, 162.19s/it]

Language: mg    Improvement over baseline: 0.00000
Added 689 rows of data
Added 0 rows of data
Added 0 rows of data


2it [08:46, 281.29s/it]

Language: fo    Improvement over baseline: -0.52265
Added 15 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [15:16, 330.76s/it]

Language: co    Improvement over baseline: 8.13397


4it [18:38, 280.06s/it]

Language: hsb    Improvement over baseline: -0.84034
Added 166 rows of data
Added 0 rows of data


5it [22:27, 261.60s/it]

Language: bh    Improvement over baseline: 4.62555
Added 464 rows of data
Added 0 rows of data
Added 0 rows of data


6it [28:23, 283.95s/it]


Language: cv    Improvement over baseline: 3.01624


0it [00:00, ?it/s]

Added 404 rows of data


1it [04:46, 286.18s/it]

Language: mg    Improvement over baseline: 0.45872
Added 824 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [11:35, 358.54s/it]

Language: fo    Improvement over baseline: 1.74216


3it [13:59, 260.76s/it]

Language: co    Improvement over baseline: 5.26316
Added 70 rows of data
Added 0 rows of data


4it [19:19, 284.10s/it]

Language: hsb    Improvement over baseline: -2.52101


5it [21:49, 235.85s/it]

Language: bh    Improvement over baseline: 4.62555


6it [24:20, 243.44s/it]


Language: cv    Improvement over baseline: 1.85615


1it [04:00, 240.84s/it]

Language: mg    Improvement over baseline: 0.45872


2it [06:29, 186.46s/it]

Language: fo    Improvement over baseline: -5.05226
Added 7 rows of data
Added 0 rows of data
Added 0 rows of data


3it [12:46, 273.68s/it]

Language: co    Improvement over baseline: 8.13397
Added 74 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [20:37, 351.56s/it]

Language: hsb    Improvement over baseline: -1.05042
Added 107 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


5it [26:11, 345.28s/it]

Language: bh    Improvement over baseline: 9.25110


6it [30:05, 300.86s/it]


Language: cv    Improvement over baseline: 3.48028


0it [00:00, ?it/s]

Added 436 rows of data
Added 0 rows of data


1it [05:46, 346.42s/it]

Language: mg    Improvement over baseline: 0.00000
Added 211 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [13:05, 401.08s/it]

Language: fo    Improvement over baseline: 0.87108
Added 13 rows of data
Added 0 rows of data
Added 8 rows of data


3it [19:34, 395.42s/it]

Language: co    Improvement over baseline: 3.82775


4it [24:35, 358.07s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [26:28, 269.95s/it]

Language: bh    Improvement over baseline: -4.62555
Added 370 rows of data


6it [31:43, 317.19s/it]


Language: cv    Improvement over baseline: 3.94432


1it [03:47, 227.73s/it]

Language: mg    Improvement over baseline: 0.45872
Added 662 rows of data
Added 0 rows of data


2it [09:39, 300.47s/it]

Language: fo    Improvement over baseline: 1.56794


3it [14:02, 283.56s/it]

Language: co    Improvement over baseline: 4.78469
Added 35 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [20:27, 323.76s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [23:40, 276.60s/it]

Language: bh    Improvement over baseline: 0.88106
Added 335 rows of data
Added 0 rows of data


6it [29:07, 291.17s/it]


Language: cv    Improvement over baseline: 0.92807


1it [04:02, 242.12s/it]

Language: mg    Improvement over baseline: -1.60550
Added 899 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [11:28, 361.98s/it]

Language: fo    Improvement over baseline: 1.39373
Added 27 rows of data


3it [16:12, 326.72s/it]

Language: co    Improvement over baseline: 8.13397
Added 18 rows of data
Added 0 rows of data


4it [21:28, 322.21s/it]

Language: hsb    Improvement over baseline: -1.68067


5it [23:34, 251.64s/it]

Language: bh    Improvement over baseline: 1.32159
Added 382 rows of data
Added 0 rows of data


6it [28:49, 288.31s/it]


Language: cv    Improvement over baseline: 1.85615


1it [02:42, 162.67s/it]

Language: mg    Improvement over baseline: 0.68807
Added 247 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [09:28, 305.43s/it]

Language: fo    Improvement over baseline: -1.39373


3it [13:40, 281.37s/it]

Language: co    Improvement over baseline: 3.82775
Added 302 rows of data
Added 0 rows of data


4it [19:01, 296.82s/it]

Language: hsb    Improvement over baseline: 1.26050


5it [22:04, 255.99s/it]

Language: bh    Improvement over baseline: 1.32159
Added 164 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


6it [28:51, 288.63s/it]


Language: cv    Improvement over baseline: 2.32019


1it [03:38, 218.58s/it]

Language: mg    Improvement over baseline: 0.00000


2it [06:40, 197.06s/it]

Language: fo    Improvement over baseline: -3.31010


3it [09:54, 195.78s/it]

Language: co    Improvement over baseline: 2.87081
Added 441 rows of data
Added 0 rows of data


4it [15:17, 245.80s/it]

Language: hsb    Improvement over baseline: -3.36134


5it [18:04, 217.55s/it]

Language: bh    Improvement over baseline: -3.30396
Added 135 rows of data
Added 0 rows of data
Added 0 rows of data


6it [23:51, 238.63s/it]


Language: cv    Improvement over baseline: 3.48028


0it [00:00, ?it/s]

Added 383 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [06:26, 386.47s/it]

Language: mg    Improvement over baseline: 0.45872


2it [09:18, 260.39s/it]

Language: fo    Improvement over baseline: -0.34843
Added 31 rows of data
Added 0 rows of data
Added 0 rows of data


3it [14:57, 296.45s/it]

Language: co    Improvement over baseline: 11.96172
Added 347 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [21:58, 345.35s/it]

Language: hsb    Improvement over baseline: -2.52101


5it [24:26, 274.16s/it]

Language: bh    Improvement over baseline: 1.98238


6it [28:18, 283.07s/it]


Language: cv    Improvement over baseline: 0.23202


0it [00:00, ?it/s]

Added 382 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [06:24, 384.27s/it]

Language: mg    Improvement over baseline: 0.45872


2it [10:18, 296.08s/it]

Language: fo    Improvement over baseline: -0.17422
Added 10 rows of data


3it [14:58, 288.82s/it]

Language: co    Improvement over baseline: 6.22010
Added 134 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [21:45, 335.49s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [24:14, 267.96s/it]

Language: bh    Improvement over baseline: 2.64317


6it [28:34, 285.82s/it]


Language: cv    Improvement over baseline: 3.48028


0it [00:00, ?it/s]

Added 388 rows of data
Added 0 rows of data


1it [05:02, 302.71s/it]

Language: mg    Improvement over baseline: 0.00000


2it [08:26, 244.31s/it]

Language: fo    Improvement over baseline: 1.21951
Added 14 rows of data


3it [13:03, 259.59s/it]

Language: co    Improvement over baseline: 11.00478
Added 544 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [20:10, 325.43s/it]

Language: hsb    Improvement over baseline: 0.21008
Added 199 rows of data


5it [23:41, 284.33s/it]

Language: bh    Improvement over baseline: 3.96476


6it [27:32, 275.50s/it]


Language: cv    Improvement over baseline: 8.81671


0it [00:00, ?it/s]

Added 207 rows of data


1it [04:31, 271.65s/it]

Language: mg    Improvement over baseline: 0.00000


2it [08:25, 249.17s/it]

Language: fo    Improvement over baseline: 0.00000
Added 6 rows of data


3it [13:08, 264.69s/it]

Language: co    Improvement over baseline: 8.13397


4it [16:34, 241.74s/it]

Language: hsb    Improvement over baseline: -3.36134
Added 426 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


5it [21:24, 259.03s/it]

Language: bh    Improvement over baseline: 9.47137


6it [24:20, 243.41s/it]


Language: cv    Improvement over baseline: 2.78422


0it [00:00, ?it/s]

Added 381 rows of data


1it [04:31, 271.41s/it]

Language: mg    Improvement over baseline: 0.45872
Added 251 rows of data
Added 0 rows of data


2it [09:47, 297.89s/it]

Language: fo    Improvement over baseline: 1.74216


3it [12:11, 227.52s/it]

Language: co    Improvement over baseline: 2.87081
Added 173 rows of data
Added 0 rows of data


4it [17:40, 267.68s/it]

Language: hsb    Improvement over baseline: 0.00000


5it [19:48, 217.09s/it]

Language: bh    Improvement over baseline: 3.08370
Added 613 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


6it [26:22, 263.80s/it]


Language: cv    Improvement over baseline: 6.72854


1it [03:38, 218.36s/it]

Language: mg    Improvement over baseline: 0.45872
Added 644 rows of data
Added 0 rows of data
Added 0 rows of data


2it [09:34, 299.19s/it]

Language: fo    Improvement over baseline: 1.56794


3it [11:56, 227.72s/it]

Language: co    Improvement over baseline: 1.43541
Added 27 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 15 rows of data


4it [18:43, 298.38s/it]

Language: hsb    Improvement over baseline: -1.89076


5it [20:49, 236.30s/it]

Language: bh    Improvement over baseline: 1.32159
Added 399 rows of data


6it [25:37, 256.33s/it]


Language: cv    Improvement over baseline: 3.24826


1it [03:29, 209.93s/it]

Language: mg    Improvement over baseline: -1.83486
Added 591 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [10:37, 337.74s/it]

Language: fo    Improvement over baseline: -0.69686


3it [14:47, 297.84s/it]

Language: co    Improvement over baseline: 6.69856
Added 54 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [21:29, 339.01s/it]

Language: hsb    Improvement over baseline: 0.00000


5it [23:38, 263.08s/it]

Language: bh    Improvement over baseline: 0.88106


6it [26:30, 265.16s/it]


Language: cv    Improvement over baseline: 4.40835


1it [03:33, 213.79s/it]

Language: mg    Improvement over baseline: 1.37615


2it [06:02, 175.40s/it]

Language: fo    Improvement over baseline: -3.31010


3it [09:43, 196.11s/it]

Language: co    Improvement over baseline: 7.65550
Added 32 rows of data
Added 0 rows of data


4it [15:03, 245.10s/it]

Language: hsb    Improvement over baseline: -1.05042
Added 163 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


5it [20:00, 264.06s/it]

Language: bh    Improvement over baseline: 3.52423
Added 379 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


6it [26:57, 269.65s/it]


Language: cv    Improvement over baseline: 5.33643


0it [00:00, ?it/s]

Added 271 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [05:51, 351.39s/it]

Language: mg    Improvement over baseline: 0.22936


2it [09:38, 278.08s/it]

Language: fo    Improvement over baseline: -0.69686


3it [12:01, 216.41s/it]

Language: co    Improvement over baseline: 1.91388
Added 44 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [18:53, 293.93s/it]

Language: hsb    Improvement over baseline: 0.21008


5it [21:01, 233.92s/it]

Language: bh    Improvement over baseline: 2.42291


6it [24:46, 247.81s/it]


Language: cv    Improvement over baseline: 3.24826


0it [00:00, ?it/s]

Added 279 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [05:56, 356.86s/it]

Language: mg    Improvement over baseline: 0.00000


2it [09:13, 262.56s/it]

Language: fo    Improvement over baseline: 1.39373
Added 22 rows of data
Added 0 rows of data


3it [14:13, 279.88s/it]

Language: co    Improvement over baseline: 8.61244
Added 74 rows of data


4it [18:59, 282.13s/it]

Language: hsb    Improvement over baseline: 0.00000


5it [21:44, 239.96s/it]

Language: bh    Improvement over baseline: 2.42291
Added 89 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


6it [27:59, 279.88s/it]


Language: cv    Improvement over baseline: 2.08817


1it [02:43, 163.64s/it]

Language: mg    Improvement over baseline: 0.22936
Added 427 rows of data
Added 0 rows of data
Added 0 rows of data


2it [08:39, 276.60s/it]

Language: fo    Improvement over baseline: 1.04530
Added 5 rows of data
Added 0 rows of data
Added 6 rows of data


3it [14:10, 301.58s/it]

Language: co    Improvement over baseline: 5.74163
Added 103 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [21:00, 344.31s/it]

Language: hsb    Improvement over baseline: -2.31092


5it [23:11, 267.40s/it]

Language: bh    Improvement over baseline: -1.76211
Added 274 rows of data
Added 0 rows of data


6it [28:34, 285.79s/it]


Language: cv    Improvement over baseline: 3.01624


0it [00:00, ?it/s]

Added 43 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [05:48, 348.06s/it]

Language: mg    Improvement over baseline: -0.45872
Added 780 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


2it [12:26, 377.91s/it]

Language: fo    Improvement over baseline: 2.96167
Added 3 rows of data
Added 2 rows of data
Added 10 rows of data
Added 2 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [20:44, 432.36s/it]

Language: co    Improvement over baseline: 8.13397


4it [23:40, 331.25s/it]

Language: hsb    Improvement over baseline: -1.05042


5it [25:26, 249.95s/it]

Language: bh    Improvement over baseline: -0.66079
Added 92 rows of data
Added 0 rows of data
Added 0 rows of data


6it [31:14, 312.49s/it]


Language: cv    Improvement over baseline: 1.85615


0it [00:00, ?it/s]

Added 136 rows of data


1it [04:29, 269.62s/it]

Language: mg    Improvement over baseline: -0.45872


2it [08:19, 246.09s/it]

Language: fo    Improvement over baseline: -0.87108
Added 4 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 1 rows of data


3it [16:22, 354.59s/it]

Language: co    Improvement over baseline: 5.26316
Added 8 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


4it [24:09, 398.86s/it]

Language: hsb    Improvement over baseline: -1.26050
Added 10 rows of data
Added 24 rows of data
Added 3 rows of data
Added 0 rows of data


5it [28:39, 352.41s/it]

Language: bh    Improvement over baseline: 4.40529
Added 18 rows of data
Added 0 rows of data


6it [33:56, 339.42s/it]


Language: cv    Improvement over baseline: 4.40835


0it [00:00, ?it/s]

Added 190 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


1it [06:17, 377.53s/it]

Language: mg    Improvement over baseline: 0.45872


2it [10:05, 289.31s/it]

Language: fo    Improvement over baseline: 1.56794


3it [13:00, 237.44s/it]

Language: co    Improvement over baseline: 7.65550


4it [17:21, 246.68s/it]

Language: hsb    Improvement over baseline: -4.20168


5it [19:26, 202.66s/it]

Language: bh    Improvement over baseline: 0.88106


6it [23:17, 232.99s/it]


Language: cv    Improvement over baseline: 3.71230


1it [04:06, 246.71s/it]

Language: mg    Improvement over baseline: 0.45872


2it [07:26, 218.98s/it]

Language: fo    Improvement over baseline: -1.21951
Added 7 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


3it [13:24, 282.55s/it]

Language: co    Improvement over baseline: 10.52632
Added 3 rows of data


4it [18:12, 284.88s/it]

Language: hsb    Improvement over baseline: -2.31092
Added 30 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data
Added 0 rows of data


5it [23:06, 288.15s/it]

Language: bh    Improvement over baseline: 2.42291


6it [25:59, 259.95s/it]

Language: cv    Improvement over baseline: 2.55220





9:

class PseudoLabelingConfig(BaseConfig):
    EPOCHS                = 25
    PATIENCE              = 5
    BERT_LEARNING_RATE    = 0.00002
    LSTM_LEARNING_RATE    = 0.003
    CRF_LEARNING_RATE     = 0.00003

    CONFIDENCE_QUANTILE   = 0.95
    PSEUDO_DELAY          = 8
    ENTROPY_THRESHOLD     = 0.2

10:

class PseudoLabelingConfig(BaseConfig):
    EPOCHS                = 25
    PATIENCE              = 5
    BERT_LEARNING_RATE    = 0.00002
    LSTM_LEARNING_RATE    = 0.003
    CRF_LEARNING_RATE     = 0.00003

    CONFIDENCE_QUANTILE   = 0.90
    PSEUDO_DELAY          = 8
    ENTROPY_THRESHOLD     = 0.15

In [20]:
df = pd.read_csv("results/full_iterative_pseudo_labeling.csv")
df

Unnamed: 0,language,train_f1,val_f1,test_f1,improvement,confidence_quantile
0,mg,0.999543,0.943878,0.960352,0.000000,0.88
1,fo,0.992799,0.908273,0.912088,1.219512,0.88
2,co,0.998431,0.852308,0.883268,8.612440,0.88
3,hsb,0.963006,0.933468,0.838420,-1.890756,0.88
4,bh,0.995533,0.903052,0.826855,3.083700,0.88
...,...,...,...,...,...,...
175,fo,0.995790,0.917266,0.926217,2.787456,0.93
176,co,0.991126,0.870769,0.883268,8.612440,0.93
177,hsb,0.989406,0.951613,0.838420,-1.890756,0.93
178,bh,0.977077,0.881508,0.810954,1.101322,0.93


In [21]:
df[df["language"] == "hsb"]

Unnamed: 0,language,train_f1,val_f1,test_f1,improvement,confidence_quantile
3,hsb,0.963006,0.933468,0.83842,-1.890756,0.88
9,hsb,0.982419,0.939516,0.847397,-0.840336,0.88
15,hsb,0.982341,0.96371,0.856373,0.210084,0.88
21,hsb,0.989991,0.961694,0.825853,-3.361345,0.88
27,hsb,0.971733,0.933468,0.847397,-0.840336,0.88
33,hsb,0.985929,0.939516,0.867145,1.470588,0.89
39,hsb,0.940353,0.917339,0.813285,-4.831933,0.89
45,hsb,0.973022,0.959677,0.842011,-1.470588,0.89
51,hsb,0.99649,0.941532,0.859964,0.630252,0.89
57,hsb,0.989977,0.947581,0.83842,-1.890756,0.89
