In [4]:
# Needed in colab:
% pip install transformers[torch] datasets evaluate wandb

In [1]:
from main_script import main
import argparse

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Optional: -- make sure to remove the --wandb_log flag if you don't use tho..
# I can give you my key
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mindooradventurer[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# Define the arguments (copied from main_script.py):
parser = argparse.ArgumentParser(
    description=    "A simple script to quickly run different models and "
                    "different (filtered) versions of the dataset with the "
                    "exact same hyperparameters, etc."
)
parser.add_argument("--experiment", type=str, required=True,
                    help="Name of the experiment for saving.")
parser.add_argument("--model", type=str, required=True,
                    help=   "'🤗 transformers' model name "
                            "(e.g. 'GroNLP/bert-base-dutch-cased')")
parser.add_argument("--train_inp_cols", type=str, required=True,
                    default="maximedb/sick_nl",
                    help="Columns for train input (comma separate them!)")
parser.add_argument("--test_inp_cols", type=str, required=True,
                    default="maximedb/sick_nl",
                    help="Columns for test input (comma separate them!)")
parser.add_argument("--wandb_log", action="store_true",
                    help="Log to wandb. Project name is experiment name.")

# Filtering arguments:
parser.add_argument("--filter_cols", type=str,
                    help="The columns in the training dataset to apply a "
                    "threshold filter to (comma separate them!). "
                    "IMPORTANT! --filter_thv must contain equally many "
                    "comma separated threshold values!")
parser.add_argument("--filter_thv", type=str,
                    help="Threshold values corresponding to the colums "
                    "specified in --filter_cols (comma separate them!). "
                    "Will filter out everything below those values.")

_StoreAction(option_strings=['--filter_thv'], dest='filter_thv', nargs=None, const=None, default=None, type=<class 'str'>, choices=None, required=False, help='Threshold values corresponding to the colums specified in --filter_cols (comma separate them!). Will filter out everything below those values.', metavar=None)

In [4]:
# Arguments for different runs:
# TODO: change these to the ones you need to run!
run_args = [
    # 100% of the dataset:
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log",
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log",

    # 50% da:
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.2624,0.2624",
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.2624,0.2624",

    # 25% da:
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.4861,0.4861",
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.4861,0.4861",

    # # 10% da:
    # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.6651,0.6651",
    # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.6651,0.6651",

    # 50% mqm:
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.107,0.107",
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.107,0.107",
    
    # 25% mqm:
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.1183,0.1183",
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.1183,0.1183",
    
    # # 10% mqm:
    # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.127,0.127",
    # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.127,0.127",
    
    # 50% by pruning both da and mqm:
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.201648,0.201648,0.0986,0.0986",
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.201648,0.201648,0.0986,0.0986",

    # 25% by pruning both da and mqm:
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.3926,0.3926,0.111188,0.111188",
    "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.3926,0.3926,0.111188,0.111188"

    # # 10% by pruning both da and mqm:
    # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.555,0.555,0.1219,0.1219"
    # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-multilingual-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.555,0.555,0.1219,0.1219"
]

# run_args = [
#     # 100% of the dataset:
#     # "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log"
#     # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log",

#     # 50% da:
#     # "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.2624,0.2624",
#     # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.2624,0.2624",

#     # 25% da:
#     # "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.4861,0.4861",
#     # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.4861,0.4861",

#     # 10% da:
#     # "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.6651,0.6651",
#     # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis --filter_thv 0.6651,0.6651"

#     # 50% by pruning both da and mqm:
#     # "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.201648,0.201648,0.0986,0.0986",
#     # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.201648,0.201648,0.0986,0.0986",

#     # 25% by pruning both da and mqm:
#     # "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.3926,0.3926,0.111188,0.111188",
#     # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.3926,0.3926,0.111188,0.111188"

#     # 10% by pruning both da and mqm:
#     # "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.555,0.555,0.1219,0.1219"
#     # "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols da_premise,da_hypothesis,mqm_premise,mqm_hypothesis --filter_thv 0.555,0.555,0.1219,0.1219"
# ]   

# These runs were already done, except the 10% ones
# run_args = [
#     # 100% of the dataset:
#     "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log",
#     "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log",
    
#     # 50% mqm:
#     "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.107,0.107",
#     "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.107,0.107",
    
#     # 25% mqm:
#     "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.1183,0.1183",
#     "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.1183,0.1183",
    
#     # 10% mqm:
#     "--experiment ik-nlp-mt-quality-filter --model GroNLP/bert-base-dutch-cased --train_inp_cols premise_nl,hypothesis_nl --test_inp_cols sentence_A,sentence_B --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.127,0.127",
#     "--experiment ik-nlp-mt-quality-filter --model google-bert/bert-base-cased --train_inp_cols premise_en,hypothesis_en --test_inp_cols sentence_A_original,sentence_B_original --wandb_log --filter_cols mqm_premise,mqm_hypothesis --filter_thv 0.127,0.127",
# ]

In [None]:
for run in run_args:
    args = parser.parse_args(run.split())
    main(args)

Running experiment with:
    model:           google-bert/bert-base-multilingual-cased
    train_cols:      premise_nl,hypothesis_nl
    test_cols:       sentence_A,sentence_B

Getting the training set.
Filtering the training dataset.
No filters specified. Using full dataset!
Tokenizing dataset for training.


Training the model! Using 🤗 defaults, and batch_size=32.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss,Validation Loss,Accuracy
500,0.9078,0.764028,0.67405
1000,0.8083,0.725526,0.688376
1500,0.7725,0.76603,0.685328
2000,0.7434,0.675592,0.727698
2500,0.7107,0.650844,0.736944
