# Hyperparameter optimization

In [2]:
import pandas as pd
import numpy as np
import torch

torch.cuda.empty_cache()

def read_file(fname: str, correct_labels=True) -> pd.DataFrame:
    """Reads a filename, return df with text and labels."""

    df = pd.read_table(fname, sep="\t", header=None, names="text,labels,role".split(","))
    if correct_labels:
        offensive_ids = df.labels != "Acceptable speech"
        df.labels[offensive_ids] = 1
        df.labels[~offensive_ids] = 0
    else:
        import warnings
        warnings.warn("Labels not corrected!")
    df = df.drop(columns=["role"])
    return df


def run_hyperparams_optimization(model_name, model_type, lang):
    import torch
    torch.cuda.empty_cache()
    if lang not in {"sl", "hr", "en"}:
        raise AttributeError(f"Language {lang} is not valid")
    eval_file, train_file = f"../data/merged-{lang}.test.tsv" , f"../data/merged-{lang}.train.tsv"
    from simpletransformers.classification import ClassificationModel, ClassificationArgs
    import wandb


    model_args = ClassificationArgs()
    model_args.use_early_stopping = True
    model_args.early_stopping_delta = 0.01
    model_args.early_stopping_metric = "mcc"
    model_args.early_stopping_metric_minimize = False
    model_args.early_stopping_patience = 3
    model_args.evaluate_during_training_steps = 1000

    model_args.evaluate_during_training = True
    model_args.manual_seed = 4
    model_args.use_multiprocessing = True
    model_args.eval_batch_size = 8
    model_args.labels_list = [0, 1]
    model_args.wandb_project = "task3"
    model_args.reprocess_input_data = True
    model_args.overwrite_output_dir = True
    model_args.no_save = True

    sweep_config = {
        "method": "bayes",  # grid, random
        "metric": {"name": "mcc", "goal": "maximize"},
        "parameters": {
            "num_train_epochs":  {"max": 15, "min": 3},
            "learning_rate": {"min": 1e-6, "max": 1e-4},
            "train_batch_size": {"max": 100, "min": 5}
        },
    }

    sweep_id = wandb.sweep(sweep_config, project="task3_"+model_name.replace("/", "_")+model_type)

    train_df = read_file(train_file, correct_labels=True)
    eval_df = read_file(eval_file, correct_labels=True)

    def train():
        # Initialize a new wandb run
        wandb.init()

        # Create a TransformerModel
        model = ClassificationModel(
            model_type,
            model_name,
            use_cuda=True,
            args=model_args,
            sweep_config=wandb.config,
        )
        model.overwrite_output_dir = True
        model.no_save = True
        
        
        # Train the model
        model.train_model(train_df, eval_df=eval_df)

        # Evaluate the model
        model.eval_model(eval_df)

        # Sync wandb
        wandb.join()

    wandb.agent(sweep_id, train, count=50)


In [None]:
model_name = "xlm-roberta-base"
model_type = "xlmroberta"
language = "en"

run_hyperparams_optimization(model_name, model_type, language)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: ypeq6ds8
Sweep URL: https://wandb.ai/5roop/task3_xlm-roberta-basexlmroberta/sweeps/ypeq6ds8


[34m[1mwandb[0m: Agent Starting Run: mvqg2hej with config:
[34m[1mwandb[0m: 	learning_rate: 7.831626915734913e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 79
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m5roop[0m (use `wandb login --relogin` to force relogin)


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=119.0, style=ProgressStyle(de…

  torch.nn.utils.clip_grad_norm_(





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=119.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=119.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=119.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=119.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98676050792…

0,1
Training loss,0.00462
lr,0.0
global_step,1547.0
_runtime,443.0
_timestamp,1631632748.0
_step,46.0
tp,613.0
tn,1173.0
fp,227.0
fn,289.0


0,1
Training loss,██▇▅▄▅▅▅▅▃▂▂▃▂▂▁▁▂▁▁▁▁▂▂▁▁▁▁▁▁
lr,▅███▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,█▁▆▃▃▁▃▅▅▄▆▅▄▄
tn,▁█▅▇▇█▇▆▆▇▅▆▇▆
fp,█▁▄▂▂▁▂▃▃▂▄▃▂▃
fn,▁█▃▆▆█▆▄▄▅▃▄▅▅


[34m[1mwandb[0m: Agent Starting Run: s78aio2w with config:
[34m[1mwandb[0m: 	learning_rate: 7.634763102811421e-05
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 62
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 15', max=151.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 15', max=151.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 15', max=151.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 15', max=151.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 15', max=151.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 15', max=151.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 15', max=151.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98471466098…

0,1
Training loss,0.00018
lr,0.0
global_step,2265.0
_runtime,551.0
_timestamp,1631633311.0
_step,64.0
tp,636.0
tn,1142.0
fp,258.0
fn,266.0


0,1
Training loss,██▇▆▆▆▅▆▄▅▃▄▃▃▃▃▃▂▁▂▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▅████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,█▁▆▃▃▅▄▃▅▅▅▅▄▄▄▅▅
tn,▁█▅█▇▆▇▇▆▆▆▆▇▇▇▆▆
fp,█▁▄▁▂▃▂▂▃▃▃▃▂▂▂▃▃
fn,▁█▃▆▆▄▅▆▄▄▄▄▅▅▅▄▄


[34m[1mwandb[0m: Agent Starting Run: hdzti2j9 with config:
[34m[1mwandb[0m: 	learning_rate: 4.4324890522344835e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 73
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=128.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=128.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=128.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=128.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98813809383…

0,1
Training loss,0.0821
lr,0.0
global_step,1664.0
_runtime,454.0
_timestamp,1631633776.0
_step,49.0
tp,636.0
tn,1147.0
fp,253.0
fn,266.0


0,1
Training loss,██▆█▆▆▆▄▅▅▄▄▃▂▄▃▂▁▄▂▂▁▁▁▁▁▁▃▁▂▂▁▂
lr,▅███▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,██▅▁▄▁▇▄▅▂▅▄▃▄
tn,▁▁▅█▆▇▄▆▅▇▅▆▆▅
fp,██▄▁▃▂▅▃▄▂▄▃▃▄
fn,▁▁▄█▅█▂▅▄▇▄▅▆▅


[34m[1mwandb[0m: Agent Starting Run: c9ox7bgv with config:
[34m[1mwandb[0m: 	learning_rate: 1.9998729905462498e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 25
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=374.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=374.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=374.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=374.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=374.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=374.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=374.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=374.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=374.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=374.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=374.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=374.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=374.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=374.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98649648538…

0,1
Training loss,0.00077
lr,0.0
global_step,5000.0
_runtime,687.0
_timestamp,1631634475.0
_step,120.0
tp,604.0
tn,1164.0
fp,236.0
fn,298.0


0,1
Training loss,██▇▅▃█▆▄▄▂▃▇▃▄▃▂▄▂▁▄▃▁▁▂▁▁▁▁▅▁▁▁▁▃▁▁▁▁▂▁
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,█▁▅▇█▃▇▃▂▄▇▂▄▄▃▄▄▃
tn,▁█▅▂▃▇▃▇▇▆▄▇▆▆▇▆▆▇
fp,█▁▄▇▆▂▆▂▂▃▅▂▃▃▂▃▃▂
fn,▁█▄▂▁▆▂▆▇▅▂▇▅▅▆▅▅▆


[34m[1mwandb[0m: Agent Starting Run: km9g304l with config:
[34m[1mwandb[0m: 	learning_rate: 4.743363596039793e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 95
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=99.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=99.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=99.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98886881600…

0,1
Training loss,0.06832
lr,0.0
global_step,1287.0
_runtime,431.0
_timestamp,1631634918.0
_step,41.0
tp,621.0
tn,1175.0
fp,225.0
fn,281.0


0,1
Training loss,█▇▆▅▅▄▅▃▂▂▂▂▂▂▂▂▂▁▃▁▁▁▁▁▂
lr,▅██▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
tp,▆▇█▁▃▂▇▃▆▅▅▅▄▄
tn,▂▃▁█▆█▄▇▅▆▆▆▆▆
fp,▇▆█▁▃▁▅▂▄▃▃▃▃▃
fn,▃▂▁█▆▇▂▆▃▄▄▄▅▅


[34m[1mwandb[0m: Agent Starting Run: aq1f9ak0 with config:
[34m[1mwandb[0m: 	learning_rate: 4.2532382925377725e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=94.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98868657304…

0,1
Training loss,0.07321
lr,0.0
global_step,940.0
_runtime,327.0
_timestamp,1631635256.0
_step,30.0
tp,599.0
tn,1175.0
fp,225.0
fn,303.0


0,1
Training loss,█▇▆▅▇▅▅▃▃▂▃▂▂▁▂▁▁▁
lr,▅██▇▇▆▆▅▅▄▄▄▃▃▂▂▁▁
global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇██
_runtime,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
tp,█▁▅▁▂▄▃▄▃▃
tn,▁█▆█▇▇▇▆▇▇
fp,█▁▃▁▂▂▂▃▂▂
fn,▁█▄█▇▅▆▅▆▆


[34m[1mwandb[0m: Agent Starting Run: mhjjvzbj with config:
[34m[1mwandb[0m: 	learning_rate: 6.883214982532322e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	train_batch_size: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=8.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 8', max=125.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 8', max=125.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 8', max=125.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 8', max=125.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 8', max=125.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 8', max=125.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 8', max=125.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 8', max=125.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98891730605…

0,1
Training loss,0.01607
lr,0.0
global_step,1000.0
_runtime,290.0
_timestamp,1631635557.0
_step,31.0
tp,609.0
tn,1184.0
fp,216.0
fn,293.0


0,1
Training loss,█▆▇█▆▅▆▄▇▆▅▄▄▄▃▁▂▁▂▁
lr,▅██▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁
global_step,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
_runtime,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█████
_timestamp,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█████
_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇███
tp,▆▁█▄▆▅▆▆▆
tn,▄█▁▇▄▅▄▅▅
fp,▅▁█▂▅▄▅▄▄
fn,▃█▁▅▃▄▃▃▃


[34m[1mwandb[0m: Agent Starting Run: 6rmwouo1 with config:
[34m[1mwandb[0m: 	learning_rate: 1.9789497644198483e-06
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 48
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=195.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 15', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 15', max=195.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 15', max=195.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 15', max=195.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 15', max=195.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 15', max=195.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.99006941528…

0,1
Training loss,0.58752
lr,0.0
global_step,2925.0
_runtime,590.0
_timestamp,1631636158.0
_step,77.0
tp,631.0
tn,1148.0
fp,252.0
fn,271.0


0,1
Training loss,██▆▆█▇▆▅█▅▄▅▆▂▅▄▆▅▄▄▃▆▄▄▄▄▃▂▅▁▇▃▄▂▅▄▃▃▄▆
lr,▅████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▅▆▇▇█▇██▇███████
tn,█▅▄▃▃▂▃▁▁▃▂▂▁▂▂▃▂
fp,▁▄▅▆▆▇▆██▆▇▇█▇▇▆▇
fn,█▄▃▂▂▁▂▁▁▂▁▁▁▁▁▁▁


[34m[1mwandb[0m: Agent Starting Run: dei5xdlv with config:
[34m[1mwandb[0m: 	learning_rate: 3.5898006758946255e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 68
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=138.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=138.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=138.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.46587
lr,0.0
global_step,414.0
_runtime,122.0
_timestamp,1631636293.0
_step,13.0
tp,660.0
tn,1154.0
fp,246.0
fn,242.0


0,1
Training loss,█▆▄▂▆▁▂▃
lr,▅█▇▆▅▃▂▁
global_step,▁▂▃▃▄▅▅▆▇██
_runtime,▁▂▃▃▄▄▅▅▆▇████
_timestamp,▁▂▃▃▄▄▅▅▆▇████
_step,▁▂▂▃▃▄▄▅▅▆▆▇▇█
tp,█▄▁
tn,▁▆█
fp,█▃▁
fn,▁▅█


[34m[1mwandb[0m: Agent Starting Run: lprl5eq1 with config:
[34m[1mwandb[0m: 	learning_rate: 9.677742308897944e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 22
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.82168614772…

0,1
Training loss,0.62055
lr,1e-05
global_step,4000.0
_runtime,515.0
_timestamp,1631636821.0
_step,95.0
tp,0.0
tn,1400.0
fp,0.0
fn,902.0


0,1
Training loss,▆▅█▆▂▂▅▆█▁▂▄▂▄█▂▅▆▇▆▄▇▁▃▆▅▇▆▅▂▄▇▆▅▅▆▇▂▆▃
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▁▁▁▁▁▁▁▁▁▁▁▁
tn,▁▁▁▁▁▁▁▁▁▁▁▁▁
fp,▁▁▁▁▁▁▁▁▁▁▁▁▁
fn,▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: Agent Starting Run: pp7i6v6u with config:
[34m[1mwandb[0m: 	learning_rate: 5.0437877383530234e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 98
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98982661004…

0,1
Training loss,0.40803
lr,1e-05
global_step,288.0
_runtime,114.0
_timestamp,1631636947.0
_step,10.0
tp,656.0
tn,1155.0
fp,245.0
fn,246.0


0,1
Training loss,█▆▇▂▁
lr,▄█▆▃▁
global_step,▁▂▂▄▅▅▇█
_runtime,▁▂▃▄▅▅▆▇███
_timestamp,▁▂▃▄▅▅▆▇███
_step,▁▂▂▃▄▅▅▆▇▇█
tp,█▆▁
tn,▁▄█
fp,█▅▁
fn,▁▃█


[34m[1mwandb[0m: Agent Starting Run: kjrjnqge with config:
[34m[1mwandb[0m: 	learning_rate: 6.407958942042518e-06
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 98
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.57498
lr,0.0
global_step,288.0
_runtime,114.0
_timestamp,1631637072.0
_step,10.0
tp,540.0
tn,1174.0
fp,226.0
fn,362.0


0,1
Training loss,▆█▆▁▁
lr,▄█▆▃▁
global_step,▁▂▂▄▅▅▇█
_runtime,▁▂▃▄▅▅▆▇███
_timestamp,▁▂▃▄▅▅▆▇███
_step,▁▂▂▃▄▅▅▆▇▇█
tp,▁▆█
tn,█▄▁
fp,▁▅█
fn,█▃▁


[34m[1mwandb[0m: Agent Starting Run: ym5sl6uk with config:
[34m[1mwandb[0m: 	learning_rate: 9.695278202270423e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 99
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=95.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=95.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=95.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.41213
lr,2e-05
global_step,285.0
_runtime,115.0
_timestamp,1631637198.0
_step,10.0
tp,679.0
tn,1135.0
fp,265.0
fn,223.0


0,1
Training loss,█▇▆▁▂
lr,▄█▆▃▁
global_step,▁▂▂▄▅▅▇█
_runtime,▁▂▃▄▅▅▆▇███
_timestamp,▁▂▃▄▅▅▆▇███
_step,▁▂▂▃▄▅▅▆▇▇█
tp,█▁▄
tn,▁█▇
fp,█▁▂
fn,▁█▅


[34m[1mwandb[0m: Agent Starting Run: oex6etrs with config:
[34m[1mwandb[0m: 	learning_rate: 7.403105630986895e-05
[34m[1mwandb[0m: 	num_train_epochs: 7
[34m[1mwandb[0m: 	train_batch_size: 98
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=7.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 7', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 7', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 7', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 7', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 7', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 7', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 7', max=96.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.08008
lr,0.0
global_step,672.0
_runtime,238.0
_timestamp,1631637446.0
_step,22.0
tp,619.0
tn,1175.0
fp,225.0
fn,283.0


0,1
Training loss,▇▆██▇▄▆▄▆▃▃▂▁
lr,▅█▇▇▆▅▅▄▄▃▂▂▁
global_step,▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██
_runtime,▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
_timestamp,▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
tp,▆▁█▂▅▅▅
tn,▂█▁█▆▆▆
fp,▇▁█▁▃▃▃
fn,▃█▁▇▄▄▄


[34m[1mwandb[0m: Agent Starting Run: k0bd3x9a with config:
[34m[1mwandb[0m: 	learning_rate: 8.084645324142018e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 84
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=112.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=112.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=112.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98953174540…

0,1
Training loss,0.36858
lr,1e-05
global_step,336.0
_runtime,118.0
_timestamp,1631637574.0
_step,11.0
tp,659.0
tn,1131.0
fp,269.0
fn,243.0


0,1
Training loss,█▆▃▆▁▁
lr,▄█▆▅▃▁
global_step,▁▂▃▃▅▅▆▇█
_runtime,▁▂▃▃▄▅▅▆▇███
_timestamp,▁▂▃▃▄▅▅▆▇███
_step,▁▂▂▃▄▄▅▅▆▇▇█
tp,█▅▁
tn,▁▆█
fp,█▃▁
fn,▁▄█


[34m[1mwandb[0m: Agent Starting Run: h7h3uzhu with config:
[34m[1mwandb[0m: 	learning_rate: 4.567392896324436e-06
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 26
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=360.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=360.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=360.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.99021021755…

0,1
Training loss,0.50093
lr,0.0
global_step,1080.0
_runtime,170.0
_timestamp,1631637755.0
_step,27.0
tp,625.0
tn,1138.0
fp,262.0
fn,277.0


0,1
Training loss,▇▆▇▅▅█▇█▅▇▅▅▄▄▃▃▅▂▃▁▄
lr,▅██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇▇██
_runtime,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇████
_timestamp,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇████
_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
tp,█▁▅▄
tn,▁███
fp,█▁▁▁
fn,▁█▅▅


[34m[1mwandb[0m: Agent Starting Run: 8ams3tue with config:
[34m[1mwandb[0m: 	learning_rate: 9.659378022083072e-05
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 99
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 15', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 15', max=95.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 15', max=95.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 15', max=95.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 15', max=95.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 15', max=95.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98630770812…

0,1
Training loss,0.00032
lr,0.0
global_step,1425.0
_runtime,489.0
_timestamp,1631638256.0
_step,46.0
tp,638.0
tn,1159.0
fp,241.0
fn,264.0


0,1
Training loss,█▇▇▆▇▄▅▄▅▃▅▂▃▃▂▂▁▂▁▁▁▂▁▂▁▁▂▁
lr,▅██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,█▇▇▆▁▂▃▄▅▃▂▄▃▄▄▄
tn,▁▂▄▄██▆▆▆▇▇▆▇▆▆▇
fp,█▇▅▅▁▁▃▃▃▂▂▃▂▃▃▂
fn,▁▂▂▃█▇▆▅▄▆▇▅▆▅▅▅


[34m[1mwandb[0m: Agent Starting Run: i291q287 with config:
[34m[1mwandb[0m: 	learning_rate: 5.413829305272383e-05
[34m[1mwandb[0m: 	num_train_epochs: 6
[34m[1mwandb[0m: 	train_batch_size: 83
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=6.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 6', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 6', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 6', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 6', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 6', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 6', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98960415314…

0,1
Training loss,0.09071
lr,0.0
global_step,678.0
_runtime,216.0
_timestamp,1631638483.0
_step,21.0
tp,612.0
tn,1176.0
fp,224.0
fn,290.0


0,1
Training loss,██▆▅▆▅▂▄▅▂▂▂▁
lr,▅█▇▇▆▅▅▄▄▃▂▂▁
global_step,▁▂▂▂▃▃▃▄▄▄▅▅▅▆▇▇▇██
_runtime,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
_timestamp,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
tp,█▆▆▁▆▄
tn,▁▄▄█▄▆
fp,█▅▅▁▅▃
fn,▁▃▃█▃▅


[34m[1mwandb[0m: Agent Starting Run: hoqzd5du with config:
[34m[1mwandb[0m: 	learning_rate: 2.275061382845685e-06
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 98
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 15', max=96.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 15', max=96.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 15', max=96.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 15', max=96.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 15', max=96.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 15', max=96.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98984327449…

0,1
Training loss,0.45298
lr,0.0
global_step,1440.0
_runtime,493.0
_timestamp,1631638988.0
_step,46.0
tp,624.0
tn,1155.0
fp,245.0
fn,278.0


0,1
Training loss,▇██▇▇▅▇▅▄▅▃▆▄▆▅▃▅▃▃▅▄▄▃▄▅▁▃▃
lr,▅██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▁▅▇▇▇▇▇████████
tn,██▅▂▁▂▂▂▁▃▁▂▁▂▂▂
fp,▁▁▄▇█▇▇▇█▆█▇█▇▇▇
fn,██▄▂▂▂▂▂▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: Agent Starting Run: p4xih84t with config:
[34m[1mwandb[0m: 	learning_rate: 6.13821176247279e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 92
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.34821
lr,0.0
global_step,306.0
_runtime,117.0
_timestamp,1631639116.0
_step,11.0
tp,660.0
tn,1157.0
fp,243.0
fn,242.0


0,1
Training loss,█▅▅▅▃▁
lr,▅█▆▄▃▁
global_step,▁▂▂▄▅▅▆██
_runtime,▁▂▂▃▄▅▆▇▇███
_timestamp,▁▂▂▃▄▅▆▇▇███
_step,▁▂▂▃▄▄▅▅▆▇▇█
tp,█▄▁
tn,▁▅█
fp,█▄▁
fn,▁▅█


[34m[1mwandb[0m: Agent Starting Run: oa0p23je with config:
[34m[1mwandb[0m: 	learning_rate: 4.9539853374673174e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 81
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=116.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=116.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=116.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98957980566…

0,1
Training loss,0.504
lr,1e-05
global_step,348.0
_runtime,119.0
_timestamp,1631639245.0
_step,11.0
tp,659.0
tn,1136.0
fp,264.0
fn,243.0


0,1
Training loss,█▂▄▃▁▃
lr,▄█▆▄▃▁
global_step,▁▂▃▃▅▅▆▇█
_runtime,▁▂▃▃▄▅▅▆▇███
_timestamp,▁▂▃▃▄▅▅▆▇███
_step,▁▂▂▃▄▄▅▅▆▇▇█
tp,█▁▃
tn,▁█▇
fp,█▁▂
fn,▁█▆


[34m[1mwandb[0m: Agent Starting Run: 06qsefwd with config:
[34m[1mwandb[0m: 	learning_rate: 5.913295807676334e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=94.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=94.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=94.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.31558
lr,1e-05
global_step,282.0
_runtime,115.0
_timestamp,1631639371.0
_step,10.0
tp,665.0
tn,1147.0
fp,253.0
fn,237.0


0,1
Training loss,█▆▅▁▁
lr,▄█▆▃▁
global_step,▁▂▃▄▅▆▇█
_runtime,▁▂▃▄▅▅▆▇███
_timestamp,▁▂▃▄▅▅▆▇███
_step,▁▂▂▃▄▅▅▆▇▇█
tp,█▁▁
tn,▁▇█
fp,█▂▁
fn,▁█▇


[34m[1mwandb[0m: Agent Starting Run: 8v5ypngp with config:
[34m[1mwandb[0m: 	learning_rate: 6.492744044745257e-05
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 91
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=103.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=103.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=103.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=103.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=103.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=103.0, style=ProgressStyle(de…

In [2]:
model_name = "xlm-roberta-large"
model_type = "xlmroberta"
language = "en"

run_hyperparams_optimization(model_name, model_type, language)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: ji6bzb1f
Sweep URL: https://wandb.ai/5roop/task3_xlm-roberta-largexlmroberta/sweeps/ji6bzb1f


[34m[1mwandb[0m: Agent Starting Run: 4cj84xsw with config:
[34m[1mwandb[0m: 	learning_rate: 7.519539080462987e-05
[34m[1mwandb[0m: 	num_train_epochs: 7
[34m[1mwandb[0m: 	train_batch_size: 89
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m5roop[0m (use `wandb login --relogin` to force relogin)


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=7.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 7', max=105.0, style=ProgressStyle(des…

  torch.nn.utils.clip_grad_norm_(





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 7', max=105.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 7', max=105.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 7', max=105.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 7', max=105.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 7', max=105.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 7', max=105.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.68305
lr,0.0
global_step,735.0
_runtime,577.0
_timestamp,1631718188.0
_step,23.0
tp,0.0
tn,1400.0
fp,0.0
fn,902.0


0,1
Training loss,▁▅▄▅▄▃▆▇▅▅█▅▆▅
lr,█▇▇▆▆▅▅▄▄▃▃▂▂▁
global_step,▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
_runtime,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
_timestamp,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
_step,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
tp,▁▁▁▁▁▁▁
tn,▁▁▁▁▁▁▁
fp,▁▁▁▁▁▁▁
fn,▁▁▁▁▁▁▁


[34m[1mwandb[0m: Agent Starting Run: m6kdgpt7 with config:
[34m[1mwandb[0m: 	learning_rate: 2.525535574536162e-06
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 21
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=445.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=445.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=445.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=445.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=445.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=445.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=445.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=445.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=445.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=445.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=445.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=445.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98991660348…

0,1
Training loss,0.16135
lr,0.0
global_step,5000.0
_runtime,1454.0
_timestamp,1631719653.0
_step,118.0
tp,668.0
tn,1177.0
fp,223.0
fn,234.0


0,1
Training loss,▇██▇▄▇▅▄▄▄▅▅▃▅▃▄▄▆▅▃▃▄▄▇▁▄▄▄▃▄▂▂▃▂▁▆▃▁▁▂
lr,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▁▃▄▄▆▄▇▆▅█▄▅▆▃▅
tn,██▇▅▇▃▇▃▄▇▁▇▆▅▇▆
fp,▁▁▂▄▂▆▂▆▅▂█▂▃▄▂▃
fn,██▆▅▅▃▅▂▃▄▁▅▄▃▆▄


[34m[1mwandb[0m: Agent Starting Run: w8tlo6iq with config:
[34m[1mwandb[0m: 	learning_rate: 8.91638487617816e-06
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 37
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=253.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=253.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=253.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=253.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=253.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=253.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98731319554…

0,1
Training loss,0.01485
lr,0.0
global_step,3289.0
_runtime,1316.0
_timestamp,1631720981.0
_step,83.0
tp,674.0
tn,1182.0
fp,218.0
fn,228.0


0,1
Training loss,▇█▇▇▄▇▅▇▅▃▃▄▄▃▄▄▂▃▃▂▄▂▁▄▁▂▁▃▁▂▁▁▁▂▃▂▁▂▁▁
lr,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▅▃▅█▄▃▆▄▂▃▁▅▁▅▄▄
tn,▁▅▅▁▆▆▃▃█▆▇▄█▃▄▅
fp,█▄▄█▃▃▆▆▁▃▂▅▁▆▅▄
fn,▄▆▄▁▅▆▃▅▇▆█▄█▄▅▅


[34m[1mwandb[0m: Agent Starting Run: fyfx0cc2 with config:
[34m[1mwandb[0m: 	learning_rate: 1.47949572942896e-06
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	train_batch_size: 9
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=8.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 8', max=1038.0, style=ProgressStyle(de…



HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 8', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 8', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 8', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 8', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 8', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 8', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 8', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98964886047…

0,1
Training loss,0.21238
lr,0.0
global_step,8304.0
_runtime,1645.0
_timestamp,1631722639.0
_step,184.0
tp,695.0
tn,1170.0
fp,230.0
fn,207.0


0,1
Training loss,▄▃▄▄▃▃▅▄▂▃▂▄▂▅▂▂▂▂▂▃▁▁▂▂▃▂▂▁▂▂▂▁▂▂█▂▂▃▂▂
lr,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▃▅▁█▄▅▅▇▆█▆▇▅▇▇▆
tn,▆▄█▁▆▆▆▄▅▂▆▅▆▄▅▅
fp,▃▅▁█▃▃▃▅▄▇▃▄▃▅▄▄
fn,▆▄█▁▅▄▄▂▃▁▃▂▄▂▂▃


[34m[1mwandb[0m: Agent Starting Run: fmmzu2tx with config:
[34m[1mwandb[0m: 	learning_rate: 3.888251632718864e-05
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 7
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=1335.0, style=ProgressStyle(d…



HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=1335.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=1335.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.81988788691…

0,1
Training loss,0.6371
lr,3e-05
global_step,4000.0
_runtime,756.0
_timestamp,1631723406.0
_step,88.0
tp,0.0
tn,1400.0
fp,0.0
fn,902.0


0,1
Training loss,▆▇█▇█▂▅▇▅▄▆▃▅▁▇▅▆▄▆▃▆▅▆▇▄▄▆▆▅▇▄▅▃▅▆▄▇▆▂▄
lr,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▁▁▁▁▁
tn,▁▁▁▁▁▁
fp,▁▁▁▁▁▁
fn,▁▁▁▁▁▁


[34m[1mwandb[0m: Agent Starting Run: xcss28fp with config:
[34m[1mwandb[0m: 	learning_rate: 3.036137125109726e-06
[34m[1mwandb[0m: 	num_train_epochs: 11
[34m[1mwandb[0m: 	train_batch_size: 18
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=11.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 11', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 11', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 11', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 11', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 11', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 11', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 11', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 11', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 11', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 11', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 11', max=519.0, style=ProgressStyle(d…

[34m[1mwandb[0m: Network error resolved after 0:01:27.774265, resuming normal operation.





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98982960461…

0,1
Training loss,0.57828
lr,0.0
global_step,5709.0
_runtime,1493.0
_timestamp,1631724911.0
_step,132.0
tp,670.0
tn,1174.0
fp,226.0
fn,232.0


0,1
Training loss,▇▇█▅▅▇▇▅▆█▆▆▇▄▇▃▄▃▃▂▄▂▆▃▄▂▄▃▂▄▃▁▄▃▅▁▂▁▁▇
lr,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▄▂▆▅▆▁▆█▅▁█▂▄▄▂
tn,▆▂▄▂▂▄▇▃▁▄█▁▆▄▄▆
fp,▃▇▅▇▇▅▂▆█▅▁█▃▅▅▃
fn,█▅▇▃▄▃█▃▁▄█▁▇▅▅▇


[34m[1mwandb[0m: Agent Starting Run: lif7b1bz with config:
[34m[1mwandb[0m: 	learning_rate: 2.6402902283873576e-06
[34m[1mwandb[0m: 	num_train_epochs: 9
[34m[1mwandb[0m: 	train_batch_size: 46
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=9.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 9', max=204.0, style=ProgressStyle(des…






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 9', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 9', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 9', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 9', max=204.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 9', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 9', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 9', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 9', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98937735798…

0,1
Training loss,0.38336
lr,0.0
global_step,1836.0
_runtime,864.0
_timestamp,1631725787.0
_step,48.0
tp,680.0
tn,1184.0
fp,216.0
fn,222.0


0,1
Training loss,█▇▆▇▅▄▄▆▃▇▃▄▅▄▃▂▄▄▃▂▃▅▃▂▃▁▁▂▂▁▄▁▂▁▃▃
lr,███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▆█▇▇▇▇▇▇▇
tn,█▅▁▃▄▃▃▃▄▃
fp,▁▄█▆▅▆▆▆▅▆
fn,█▃▁▂▂▂▂▂▂▂


[34m[1mwandb[0m: Agent Starting Run: 3fvtudm7 with config:
[34m[1mwandb[0m: 	learning_rate: 1.8362215463578205e-06
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 56
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=167.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 15', max=167.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 15', max=167.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 15', max=167.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 15', max=167.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 15', max=167.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 15', max=167.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98942986948…

0,1
Training loss,0.27732
lr,0.0
global_step,2505.0
_runtime,1341.0
_timestamp,1631727140.0
_step,69.0
tp,674.0
tn,1177.0
fp,223.0
fn,228.0


0,1
Training loss,▇▇█▆▆▄▆▅▆▃▄▄▂▂▄▂▂▃▂▃▃▃▄▄▃▁▂▂▅▂▅▃▃▃▄▃▁▂▃▁
lr,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▆█▇████▇████████
tn,█▅▁▂▁▁▁▂▃▃▃▂▂▂▂▂▂
fp,▁▄█▇███▇▆▆▆▇▇▇▇▇▇
fn,█▃▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: Agent Starting Run: c053jz35 with config:
[34m[1mwandb[0m: 	learning_rate: 5.010506273650324e-06
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 18
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=519.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=519.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=519.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.99094694912…

0,1
Training loss,0.63365
lr,0.0
global_step,1557.0
_runtime,426.0
_timestamp,1631727578.0
_step,37.0
tp,139.0
tn,1326.0
fp,74.0
fn,763.0


0,1
Training loss,▃▃▃▅▅▆▇▄▂▆▁█▆▆▃▁▅▇▃▅▃▆▄▃▃▄▄▃▁▅▃
lr,███▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
_timestamp,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▁▁█
tn,███▁
fp,▁▁▁█
fn,███▁


[34m[1mwandb[0m: Agent Starting Run: 1he15a13 with config:
[34m[1mwandb[0m: 	learning_rate: 2.724148634070338e-06
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 57
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=12.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 12', max=164.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 12', max=164.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 12', max=164.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 12', max=164.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98964988831…

0,1
Training loss,0.28826
lr,0.0
global_step,1968.0
_runtime,1066.0
_timestamp,1631728657.0
_step,54.0
tp,678.0
tn,1183.0
fp,217.0
fn,224.0


0,1
Training loss,█▇█▇▆▅▆▄▄▆▅▅▆▇▄▄▄▃▄▅▃▂▃▂▂▂▄▃▃▅▂▃▃▄▅▁▂▃▃
lr,███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▅█▇▇▇▇▇▇▇▇▇▇
tn,█▆▁▄▄▃▄▄▅▅▄▄▄
fp,▁▃█▅▅▆▅▅▄▄▅▅▅
fn,█▄▁▂▂▂▂▂▂▂▂▂▂


[34m[1mwandb[0m: Agent Starting Run: t5wt5idu with config:
[34m[1mwandb[0m: 	learning_rate: 1.1564048441100714e-06
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 36
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=260.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=260.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=260.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=260.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=260.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=260.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=283.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=283.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=283.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=283.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=283.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=283.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=283.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=283.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98946163561…

0,1
Training loss,0.60316
lr,0.0
global_step,2830.0
_runtime,1066.0
_timestamp,1631731182.0
_step,70.0
tp,676.0
tn,1171.0
fp,229.0
fn,226.0


0,1
Training loss,██▇▇▇█▅▃▆▅▅▅▅▆▅▄▂▄▄▄▃▇▅▃▂▅▃▁▃▃▃▃▄▃▆▃▂▃▃▇
lr,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▆█▇▇▇▇▇█▇▇▇
tn,█▇▁▄▄▅▃▄▂▄▄▄
fp,▁▂█▅▅▄▆▅▇▅▅▅
fn,█▃▁▂▂▂▂▂▁▂▂▂


[34m[1mwandb[0m: Agent Starting Run: hctu39zp with config:
[34m[1mwandb[0m: 	learning_rate: 5.984024881652892e-06
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 92
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=102.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=102.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=102.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=102.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=102.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=102.0, style=ProgressStyle(de…

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
model_name = "distilroberta-base"
model_type = "roberta"
language = "en"

run_hyperparams_optimization(model_name, model_type, language)

Create sweep with ID: o4vv4mee
Sweep URL: https://wandb.ai/5roop/task3_distilroberta-baseroberta/sweeps/o4vv4mee


[34m[1mwandb[0m: Agent Starting Run: u1j44lke with config:
[34m[1mwandb[0m: 	learning_rate: 6.76031027740937e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	train_batch_size: 46
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=480.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=331070498.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=8.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 8', max=204.0, style=ProgressStyle(des…





VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run u1j44lke errored: RuntimeError('CUDA out of memory. Tried to allocate 36.00 MiB (GPU 0; 31.75 GiB total capacity; 30.28 GiB already allocated; 20.75 MiB free; 30.53 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run u1j44lke errored: RuntimeError('CUDA out of memory. Tried to allocate 36.00 MiB (GPU 0; 31.75 GiB total capacity; 30.28 GiB already allocated; 20.75 MiB free; 30.53 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: p7v8nz3y with config:
[34m[1mwandb[0m: 	learning_rate: 2.716734918698293e-05
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 79
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run p7v8nz3y errored: RuntimeError('CUDA out of memory. Tried to allocate 148.00 MiB (GPU 0; 31.75 GiB total capacity; 30.28 GiB already allocated; 20.75 MiB free; 30.53 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run p7v8nz3y errored: RuntimeError('CUDA out of memory. Tried to allocate 148.00 MiB (GPU 0; 31.75 GiB total capacity; 30.28 GiB already allocated; 20.75 MiB free; 30.53 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 3e7bx8m4 with config:
[34m[1mwandb[0m: 	learning_rate: 2.491796858311881e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 13
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 3e7bx8m4 errored: RuntimeError('CUDA out of memory. Tried to allocate 148.00 MiB (GPU 0; 31.75 GiB total capacity; 30.28 GiB already allocated; 20.75 MiB free; 30.53 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 3e7bx8m4 errored: RuntimeError('CUDA out of memory. Tried to allocate 148.00 MiB (GPU 0; 31.75 GiB total capacity; 30.28 GiB already allocated; 20.75 MiB free; 30.53 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: esxz3a8g with config:
[34m[1mwandb[0m: 	learning_rate: 6.598315567791847e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 36
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=260.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=260.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=260.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=260.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=260.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=260.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=260.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98562466391…

0,1
Training loss,0.02745
lr,0.0
global_step,3640.0
_runtime,453.0
_timestamp,1631700006.0
_step,91.0
tp,589.0
tn,1207.0
fp,193.0
fn,313.0


0,1
Training loss,█▆▅▅▅▆▃▂▃▆▂▁▁▂▁▁▃▁▁▁▁▁▂▁▂▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▅███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▇█▆▆█▆▄▅▄▄▅▆▆▆▅▅
tn,█▃▂▄▃▁▃▆▅▅▅▅▄▃▄▅▅
fp,▁▆▇▅▆█▆▃▄▄▄▄▅▆▅▄▄
fn,█▂▁▃▃▁▃▅▄▅▅▄▃▃▃▄▄


[34m[1mwandb[0m: Agent Starting Run: ypwu9bel with config:
[34m[1mwandb[0m: 	learning_rate: 7.72642547673056e-05
[34m[1mwandb[0m: 	num_train_epochs: 11
[34m[1mwandb[0m: 	train_batch_size: 97
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=11.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 11', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 11', max=97.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98765979570…

0,1
Training loss,0.00036
lr,0.0
global_step,1067.0
_runtime,292.0
_timestamp,1631700309.0
_step,35.0
tp,620.0
tn,1205.0
fp,195.0
fn,282.0


0,1
Training loss,█▅▆▄▅▂▂▂▂▁▂▂▁▁▂▁▁▂▁▁▁
lr,▅██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
tp,██▇▃▄▅▄▅▄▁▅▄
tn,▁▃▅▇▆▆▆▅▆█▆▇
fp,█▆▄▂▃▃▃▄▃▁▃▂
fn,▁▁▂▆▅▄▅▄▅█▄▅


[34m[1mwandb[0m: Agent Starting Run: age7vwl9 with config:
[34m[1mwandb[0m: 	learning_rate: 7.500667819799288e-05
[34m[1mwandb[0m: 	num_train_epochs: 11
[34m[1mwandb[0m: 	train_batch_size: 99
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=11.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 11', max=95.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 11', max=95.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.00174
lr,0.0
global_step,1045.0
_runtime,292.0
_timestamp,1631700613.0
_step,34.0
tp,635.0
tn,1160.0
fp,240.0
fn,267.0


0,1
Training loss,█▇▇▄▅▄▄▁▃▂▂▁▁▁▁▂▁▁▁▁
lr,▄██▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
tp,▁█▆▅▅▄▄▅▇▅▅▆
tn,█▁▄▄▄▅▆▄▃▃▄▃
fp,▁█▅▅▄▄▃▅▆▆▅▆
fn,█▁▃▄▄▅▅▄▂▄▄▃


[34m[1mwandb[0m: Agent Starting Run: 197rg7hf with config:
[34m[1mwandb[0m: 	learning_rate: 9.048027506921707e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=125.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=125.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98800561629…

0,1
Training loss,0.02327
lr,0.0
global_step,1250.0
_runtime,278.0
_timestamp,1631700903.0
_step,38.0
tp,605.0
tn,1200.0
fp,200.0
fn,297.0


0,1
Training loss,█▇▇▅▆▅▆▃▃▃▂▂▁▂▂▂▂▁▁▂▁▁▁▁▁
lr,▅██▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
_runtime,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▄▅█▂▄▅▅▅▄▄
tn,█▆▅▁█▇▆▆▆▆▆
fp,▁▃▄█▁▂▃▃▃▃▃
fn,█▅▄▁▇▅▄▄▄▅▅


[34m[1mwandb[0m: Agent Starting Run: khe6nwai with config:
[34m[1mwandb[0m: 	learning_rate: 9.411512011912206e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	train_batch_size: 66
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=8.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 8', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 8', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 8', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 8', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 8', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 8', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 8', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 8', max=142.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98659776629…

0,1
Training loss,0.01142
lr,0.0
global_step,1136.0
_runtime,238.0
_timestamp,1631701157.0
_step,33.0
tp,612.0
tn,1184.0
fp,216.0
fn,290.0


0,1
Training loss,█▇▆▅█▄▄▄▁▃▃▂▃▂▂▁▂▁▁▁▁▁
lr,▄██▇▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇██
_runtime,▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇████
_timestamp,▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
tp,▅█▅▁▆▅▅▄▄
tn,▅▁▆█▄▆▆▆▆
fp,▄█▃▁▅▃▃▃▃
fn,▄▁▅█▃▄▄▅▅


[34m[1mwandb[0m: Agent Starting Run: 251p8ylm with config:
[34m[1mwandb[0m: 	learning_rate: 7.648050485855302e-05
[34m[1mwandb[0m: 	num_train_epochs: 7
[34m[1mwandb[0m: 	train_batch_size: 54
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=7.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 7', max=173.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 7', max=173.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 7', max=173.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 7', max=173.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 7', max=173.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 7', max=173.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 7', max=173.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98889599199…

0,1
Training loss,0.01101
lr,0.0
global_step,1211.0
_runtime,220.0
_timestamp,1631701389.0
_step,34.0
tp,648.0
tn,1172.0
fp,228.0
fn,254.0


0,1
Training loss,▇██▅▅▇▃▃▅▄▄▃▁▁▂▂▂▁▂▁▁▂▁▁
lr,▅██▇▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
tp,▂█▆▆▁▃▂▄
tn,█▁▄▄█▇▇▇
fp,▁█▅▅▁▂▂▂
fn,▇▁▃▃█▆▇▅


[34m[1mwandb[0m: Agent Starting Run: zjgppatm with config:
[34m[1mwandb[0m: 	learning_rate: 5.723551193478569e-06
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 44
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=12.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 12', max=213.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 12', max=213.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 12', max=213.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 12', max=213.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 12', max=213.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98962811415…

0,1
Training loss,0.24657
lr,0.0
global_step,2556.0
_runtime,369.0
_timestamp,1631701770.0
_step,67.0
tp,638.0
tn,1193.0
fp,207.0
fn,264.0


0,1
Training loss,██▇▇▄█▆▄▃▆▄▃▃▃▃▃▂▆▃▂▃▅▄▂▃▃▃▂▂▄▂▂▃▄▁▃▂▁▃▃
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▃▅█▆▇█▇▃▅▆▇▅▆
tn,█▇▆▁▅▄▂▄▇▆▄▃▅▅
fp,▁▂▃█▄▅▇▅▂▃▅▆▄▄
fn,█▆▄▁▃▂▁▂▆▄▃▂▄▃


[34m[1mwandb[0m: Agent Starting Run: 909pqfcl with config:
[34m[1mwandb[0m: 	learning_rate: 1.804124912660686e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 11
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=849.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=849.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=849.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=849.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98998628257…

0,1
Training loss,0.78083
lr,0.0
global_step,3396.0
_runtime,246.0
_timestamp,1631702029.0
_step,76.0
tp,690.0
tn,1166.0
fp,234.0
fn,212.0


0,1
Training loss,▅▄▄▂▄▄▄▅▅▃▅▂▃▄▂▃▆▂▅▅▂▁▁▃▃▂▁▇▁▁▂█▅▄▃▁▃▂▁▆
lr,▅███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇██
_timestamp,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇██
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▇█▁▄▂▄▇
tn,▂▁▇▆█▆▄
fp,▇█▂▃▁▃▅
fn,▂▁█▅▇▅▂


[34m[1mwandb[0m: Agent Starting Run: wl78rzpf with config:
[34m[1mwandb[0m: 	learning_rate: 5.8408095590610094e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 97
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=97.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=97.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=97.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=97.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98763761608…

0,1
Training loss,0.00047
lr,0.0
global_step,1261.0
_runtime,340.0
_timestamp,1631702381.0
_step,41.0
tp,652.0
tn,1170.0
fp,230.0
fn,250.0


0,1
Training loss,█▅▅▄▅▂▃▁▂▂▂▂▁▁▂▁▁▁▁▁▁▁▂▁▁
lr,▅██▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
tp,█▂▅▃▆▅▂▃▁▃▃▃▃▄
tn,▁█▅▆▄▅▇▆█▇▇▇▇▆
fp,█▁▄▃▅▄▂▃▁▂▂▂▂▃
fn,▁▇▄▆▃▄▇▆█▆▆▆▆▅


[34m[1mwandb[0m: Agent Starting Run: h1u2f8h9 with config:
[34m[1mwandb[0m: 	learning_rate: 1.7343032344688447e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 53
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98956270197…

0,1
Training loss,0.25141
lr,0.0
global_step,531.0
_runtime,108.0
_timestamp,1631702501.0
_step,15.0
tp,638.0
tn,1191.0
fp,209.0
fn,264.0


0,1
Training loss,█▅▇▄▄▂▃▂▃▁
lr,▄█▇▆▅▅▄▃▂▁
global_step,▁▂▂▃▃▄▅▅▅▆▇██
_runtime,▁▁▂▃▃▃▄▄▅▅▆▆▇███
_timestamp,▁▁▂▃▃▃▄▄▅▅▆▆▇███
_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
tp,▁▇█
tn,█▂▁
fp,▁▇█
fn,█▂▁


[34m[1mwandb[0m: Agent Starting Run: w85b5bfp with config:
[34m[1mwandb[0m: 	learning_rate: 1.0717265314448978e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 48
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=195.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=195.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98983985496…

0,1
Training loss,0.17195
lr,0.0
global_step,1950.0
_runtime,304.0
_timestamp,1631702816.0
_step,52.0
tp,635.0
tn,1176.0
fp,224.0
fn,267.0


0,1
Training loss,█▇▆▅▅▅▄▅▄▄▄▄▅▃▅▄▃▃▂▃▅▂▄▃▁▁▂▃▄▂▃▃▁▃▂▂▃▂▂
lr,▅███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▇▃▇▄▅█▆▆▄▅
tn,█▃█▂▆▄▁▄▃▅▄
fp,▁▆▁▇▃▅█▅▆▄▅
fn,█▂▆▂▅▄▁▃▃▅▄


[34m[1mwandb[0m: Agent Starting Run: 34xx4b2z with config:
[34m[1mwandb[0m: 	learning_rate: 2.477634349238758e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 46
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=204.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98945576435…

0,1
Training loss,0.21178
lr,0.0
global_step,816.0
_runtime,138.0
_timestamp,1631702967.0
_step,22.0
tp,661.0
tn,1184.0
fp,216.0
fn,241.0


0,1
Training loss,█▇▆▅▄▅▆▅▇▃▆▄▁▃▁▂
lr,▅█▇▇▇▆▅▅▄▄▃▃▂▂▁▁
global_step,▁▁▂▂▂▃▃▄▄▄▅▅▆▆▆▆▇▇██
_runtime,▁▁▁▂▂▃▃▃▃▄▄▄▅▅▆▆▆▆▇▇███
_timestamp,▁▁▁▂▂▃▃▃▃▄▄▄▅▅▆▆▆▆▇▇███
_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
tp,▁█▃█
tn,▆▁█▃
fp,▃█▁▆
fn,█▁▆▁


[34m[1mwandb[0m: Agent Starting Run: d1n3yqm2 with config:
[34m[1mwandb[0m: 	learning_rate: 6.847099883557179e-05
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 41
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=12.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 12', max=228.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 12', max=228.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 12', max=228.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 12', max=228.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 12', max=228.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,9e-05
lr,0.0
global_step,2736.0
_runtime,378.0
_timestamp,1631703356.0
_step,70.0
tp,624.0
tn,1167.0
fp,233.0
fn,278.0


0,1
Training loss,█▇█▆▆▆▆▄▅▃▅▂▄▂▁▂▁▄▂▃▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁█▃▆▅▇█▃▆▅▇▆▅▆
tn,█▂▆▂▄▁▁▆▃▄▃▂▄▄
fp,▁▇▃▇▅██▃▆▅▆▇▅▅
fn,█▁▆▃▄▂▁▆▃▄▂▃▄▃


[34m[1mwandb[0m: Agent Starting Run: 439y6c0d with config:
[34m[1mwandb[0m: 	learning_rate: 5.1123179352624107e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 21
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=445.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=445.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=445.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.33349
lr,0.0
global_step,1335.0
_runtime,140.0
_timestamp,1631703509.0
_step,32.0
tp,662.0
tn,1182.0
fp,218.0
fn,240.0


0,1
Training loss,▆█▆▆▄▆▄▃▅▃▃▃▄▃▄▅▃▃▁▃▄▁▂▃▂▄
lr,▄██▇▇▇▇▆▆▆▅▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇███
_timestamp,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇███
_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███
tp,▁█▆▆
tn,█▁▄▄
fp,▁█▅▅
fn,█▁▃▃


[34m[1mwandb[0m: Agent Starting Run: lk8xpeti with config:
[34m[1mwandb[0m: 	learning_rate: 7.65989775502069e-05
[34m[1mwandb[0m: 	num_train_epochs: 6
[34m[1mwandb[0m: 	train_batch_size: 93
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=6.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 6', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 6', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 6', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 6', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 6', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 6', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98936761088…

0,1
Training loss,0.02414
lr,0.0
global_step,606.0
_runtime,172.0
_timestamp,1631703694.0
_step,20.0
tp,619.0
tn,1186.0
fp,214.0
fn,283.0


0,1
Training loss,█▅▅▅▃▃▃▂▁▂▁▁
lr,▅█▇▇▆▅▄▄▃▂▂▁
global_step,▁▂▂▂▃▃▄▄▄▅▅▅▆▇▇▇██
_runtime,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
_timestamp,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
_step,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇▇██
tp,▄▃▃█▁▁
tn,▅▇█▁██
fp,▄▂▁█▁▁
fn,▅▆▆▁██


[34m[1mwandb[0m: Agent Starting Run: 3ht5ywjd with config:
[34m[1mwandb[0m: 	learning_rate: 2.6779059231054434e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=187.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=187.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=187.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=187.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=187.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=187.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.00027
lr,0.0
global_step,2618.0
_runtime,414.0
_timestamp,1631704119.0
_step,70.0
tp,618.0
tn,1195.0
fp,205.0
fn,284.0


0,1
Training loss,██▇▆▅▇▃▃▅▂▃▂▂▃▂▃▁▁▂▂▁▂▃▂▁▁▁▁▂▁▁▃▁▂▁▁▁▁▁▁
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▆█▃▅▅▁▅█▅▆▅▆▄▆▂▄
tn,▃▁█▅▆█▄▁▅▃▄▄▅▄▇▆
fp,▆█▁▄▃▁▅█▄▆▅▅▄▅▂▃
fn,▃▁▆▄▄█▄▁▄▃▄▃▅▃▇▅


[34m[1mwandb[0m: Agent Starting Run: 1w53u6yv with config:
[34m[1mwandb[0m: 	learning_rate: 8.266969867390682e-05
[34m[1mwandb[0m: 	num_train_epochs: 9
[34m[1mwandb[0m: 	train_batch_size: 66
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=9.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 9', max=142.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 9', max=142.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98745058668…

0,1
Training loss,0.00026
lr,0.0
global_step,1278.0
_runtime,271.0
_timestamp,1631704403.0
_step,37.0
tp,613.0
tn,1192.0
fp,208.0
fn,289.0


0,1
Training loss,██▆▅▇▄▃▄▂▃▃▃▂▂▁▁▁▁▁▁▁▂▁▂▁
lr,▄██▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███
_runtime,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▄█▇▃█▄▁▃▄▃
tn,▅▂▄█▁▆█▇▇▇
fp,▄▇▅▁█▃▁▂▂▂
fn,▅▁▂▆▁▅█▆▅▆


[34m[1mwandb[0m: Agent Starting Run: ri1dfv7i with config:
[34m[1mwandb[0m: 	learning_rate: 3.9278745597322515e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 68
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=138.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=138.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=138.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=138.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98984551955…

0,1
Training loss,0.14453
lr,0.0
global_step,552.0
_runtime,173.0
_timestamp,1631704588.0
_step,17.0
tp,649.0
tn,1188.0
fp,212.0
fn,253.0


0,1
Training loss,█▆▄▅▇▄▄▄▃▃▁
lr,▅█▇▆▆▅▄▃▃▂▁
global_step,▁▂▂▂▃▄▄▄▅▆▆▇▇██
_runtime,▁▁▂▂▂▂▃▃▄▄▅▆▆▇▇███
_timestamp,▁▁▂▂▂▂▃▃▄▄▅▆▆▇▇███
_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
tp,▃▃▁█
tn,▁██▃
fp,█▁▁▆
fn,▆▆█▁


[34m[1mwandb[0m: Agent Starting Run: b8zdlejv with config:
[34m[1mwandb[0m: 	learning_rate: 7.238604711051839e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=187.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=187.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=187.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=187.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.14695
lr,1e-05
global_step,748.0
_runtime,137.0
_timestamp,1631704737.0
_step,20.0
tp,643.0
tn,1173.0
fp,227.0
fn,259.0


0,1
Training loss,▆██▅▅▄▆▂▁▁▃▁▂▂
lr,▄█▇▇▆▆▅▅▄▃▃▂▂▁
global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▆▇▇██
_runtime,▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▆▇███
_timestamp,▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▆▇███
_step,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇▇██
tp,█▇▁▄
tn,▁▃█▆
fp,█▆▁▃
fn,▁▂█▅


[34m[1mwandb[0m: Agent Starting Run: gxefuni9 with config:
[34m[1mwandb[0m: 	learning_rate: 7.983529852791669e-05
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 79
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=12.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 12', max=119.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 12', max=119.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 12', max=119.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 12', max=119.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98619906387…

0,1
Training loss,0.00025
lr,0.0
global_step,1428.0
_runtime,554.0
_timestamp,1631705303.0
_step,43.0
tp,636.0
tn,1181.0
fp,219.0
fn,266.0


0,1
Training loss,▇█▄▇▅▄▆▂▂▂▂▂▄▃▂▁▁▂▁▁▁▁▁▁▁▁▁▁
lr,▅██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
_runtime,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▇█▂▂▁▃▅▅▂▃▃▄▄
tn,▂▁▇▇█▇▅▄▇▇▇▆▆
fp,▇█▂▂▁▂▄▅▂▂▂▃▃
fn,▂▁▇▇█▆▄▄▇▆▆▅▅


[34m[1mwandb[0m: Agent Starting Run: j4tpbgu0 with config:
[34m[1mwandb[0m: 	learning_rate: 7.716014830070566e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 95
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=99.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=99.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=99.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=99.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=99.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98796061465…

0,1
Training loss,0.06613
lr,0.0
global_step,1386.0
_runtime,373.0
_timestamp,1631705689.0
_step,44.0
tp,618.0
tn,1186.0
fp,214.0
fn,284.0


0,1
Training loss,█▆▇▅▄▃▄▂▃▂▂▁▂▁▂▁▁▁▁▂▁▁▁▁▁▁▂
lr,▄██▇▇▇▇▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,█▇▁▄▃▄▆▂▅█▂▁▆▄▄
tn,▁▄█▄▅▅▄▆▅▃▆▇▄▅▅
fp,█▅▁▅▄▄▅▃▄▆▃▂▅▄▄
fn,▁▂█▅▆▅▃▇▄▁▇█▃▅▅


[34m[1mwandb[0m: Agent Starting Run: 970kgsn1 with config:
[34m[1mwandb[0m: 	learning_rate: 1.9007844390291922e-06
[34m[1mwandb[0m: 	num_train_epochs: 11
[34m[1mwandb[0m: 	train_batch_size: 13
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=11.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 11', max=719.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 11', max=719.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 11', max=719.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 11', max=719.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 11', max=719.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 11', max=719.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 11', max=719.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 11', max=719.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 11', max=719.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 11', max=719.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98958171759…

0,1
Training loss,0.65175
lr,0.0
global_step,7000.0
_runtime,504.0
_timestamp,1631706207.0
_step,158.0
tp,629.0
tn,1218.0
fp,182.0
fn,273.0


0,1
Training loss,▇█▇▅▄▃▃▆▃▄▅▅▇▄▂▄▃▃▆▆▅▅▃▂▂▅▃▇▂▇▂█▄▁▃▃▇▁▁▇
lr,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▅▅▅▂▄▄██▇▇▂▇▇▅▅
tn,▅▁▃▄▇▅▆▁▁▃▂█▃▂▆▆
fp,▄█▆▅▂▄▃██▆▇▁▆▇▃▃
fn,█▄▄▄▇▅▅▁▁▂▂▇▂▂▅▄


[34m[1mwandb[0m: Agent Starting Run: lsjka7bs with config:
[34m[1mwandb[0m: 	learning_rate: 5.549823770663299e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 64
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=146.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=146.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=146.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=146.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=146.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=146.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98716224868…

0,1
Training loss,0.00014
lr,0.0
global_step,2044.0
_runtime,418.0
_timestamp,1631706638.0
_step,58.0
tp,597.0
tn,1190.0
fp,210.0
fn,305.0


0,1
Training loss,█▇▅▅▅▃▄▅▄▂▃▃▄▂▂▁▁▂▂▂▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
lr,▄███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▃█▇▆▄▄▅▅▄▁▆▄▄▄▄▄
tn,▇▁▃▃▅▆▄▄▅█▃▅▅▅▅▅
fp,▂█▆▆▄▃▅▅▄▁▆▄▄▄▄▄
fn,▆▁▂▃▅▅▄▄▅█▃▅▅▅▅▅


[34m[1mwandb[0m: Agent Starting Run: mzsyuq5r with config:
[34m[1mwandb[0m: 	learning_rate: 1.694080541245898e-06
[34m[1mwandb[0m: 	num_train_epochs: 9
[34m[1mwandb[0m: 	train_batch_size: 92
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=9.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 9', max=102.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.44597
lr,0.0
global_step,918.0
_runtime,243.0
_timestamp,1631706894.0
_step,29.0
tp,604.0
tn,1187.0
fp,213.0
fn,298.0


0,1
Training loss,█▇▇▅▅▄▂▂▁▄▂▃▁▃▁▁▂▂
lr,▅██▇▇▆▆▅▅▅▄▄▃▃▂▂▁▁
global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
_runtime,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇████
_timestamp,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇████
_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
tp,▁▁▇▇█▇█▇█
tn,██▃▃▂▃▁▂▂
fp,▁▁▆▆▇▆█▇▇
fn,██▂▂▁▂▁▂▁


[34m[1mwandb[0m: Agent Starting Run: kdn3upoc with config:
[34m[1mwandb[0m: 	learning_rate: 5.63723648440059e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 65
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=144.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=144.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=144.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=144.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98986673371…

0,1
Training loss,0.13769
lr,0.0
global_step,576.0
_runtime,133.0
_timestamp,1631707039.0
_step,17.0
tp,622.0
tn,1217.0
fp,183.0
fn,280.0


0,1
Training loss,█▇▆▅▂▃▄▃▁▂▁
lr,▄█▇▆▆▅▄▃▃▂▁
global_step,▁▂▂▂▃▄▄▄▅▆▆▆▇██
_runtime,▁▁▂▂▃▃▄▄▄▅▆▆▆▆▇███
_timestamp,▁▁▂▂▃▃▄▄▄▅▆▆▆▆▇███
_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
tp,█▁▃▆
tn,▁██▆
fp,█▁▁▃
fn,▁█▆▃


[34m[1mwandb[0m: Agent Starting Run: 9itmiw97 with config:
[34m[1mwandb[0m: 	learning_rate: 6.993979659278217e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 89
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=105.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=105.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=105.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=105.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=105.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=105.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98812696156…

0,1
Training loss,0.00017
lr,0.0
global_step,1470.0
_runtime,372.0
_timestamp,1631707422.0
_step,46.0
tp,628.0
tn,1173.0
fp,227.0
fn,274.0


0,1
Training loss,████▅▄▅▃▁▂▁▁▂▁▂▁▁▁▂▁▁▁▂▁▁▁▁▁▁
lr,▅██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▂▅▇▅▂▅▇▂▇█▄▃▄▄
tn,▆█▅▁▄▅▄▂▇▃▁▅▆▄▄
fp,▃▁▄█▅▄▅▇▂▆█▄▃▅▅
fn,█▇▄▂▄▇▄▂▇▂▁▅▆▅▅


[34m[1mwandb[0m: Agent Starting Run: 0h4xa0cd with config:
[34m[1mwandb[0m: 	learning_rate: 8.705505954435653e-05
[34m[1mwandb[0m: 	num_train_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 18
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=12.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 12', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 12', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 12', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 12', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 12', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 12', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 12', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 12', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 12', max=519.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 12', max=519.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 12', max=519.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 12', max=519.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.03MB of 0.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.00032
lr,0.0
global_step,6228.0
_runtime,504.0
_timestamp,1631707940.0
_step,144.0
tp,639.0
tn,1152.0
fp,248.0
fn,263.0


0,1
Training loss,█▆▇▅▃▃▇▃▄▃▇▁▂▆▁▁▃▁▇▂▂▁▁▁▁▃▃▁▄▁▁▁▁▁▁▁▁▁▁▁
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,█▂▁▁▅▂▅▄▁▄▄▃▃▅▄▃▄▄
tn,▁▇██▅▇▅▆▇▆▆▇▇▅▆▇▆▆
fp,█▂▁▁▄▂▄▃▂▃▃▂▂▄▃▂▃▃
fn,▁▇██▅▇▄▅█▅▅▆▆▄▅▆▅▅


[34m[1mwandb[0m: Agent Starting Run: noj2gdzy with config:
[34m[1mwandb[0m: 	learning_rate: 1.0795820754680292e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 39
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=240.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=240.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=240.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=240.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=240.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=240.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.01594
lr,0.0
global_step,3120.0
_runtime,466.0
_timestamp,1631708418.0
_step,80.0
tp,626.0
tn,1187.0
fp,213.0
fn,276.0


0,1
Training loss,█▇▆▆▅▄▄▅▅▅▅▂▂▃▃▃▃▃▃▃▁▂▃▃▂▃▁▂▃▄▁▂▃▂▂▂▃▃▁▁
lr,▅███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▇▅▅▅█▅▆▃▄▂▂▄▅▆▄
tn,█▄▇▅█▁▆▃▇▆▇▇▅▄▂▄
fp,▁▅▂▄▁█▃▆▂▃▂▂▄▅▇▅
fn,█▂▄▄▄▁▄▃▇▅▇▇▅▄▃▅


[34m[1mwandb[0m: Agent Starting Run: jv236wm0 with config:
[34m[1mwandb[0m: 	learning_rate: 6.74131685503647e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 22
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=425.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=425.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98437193999…

0,1
Training loss,0.32968
lr,2e-05
global_step,4000.0
_runtime,366.0
_timestamp,1631708796.0
_step,95.0
tp,678.0
tn,1118.0
fp,282.0
fn,224.0


0,1
Training loss,█▅▇▇▅▆▅▅▄▃▇▂▄▂▁▄▂▃▄▁▁▂▁▁▂▁▁▅▁▁▁▁▁▁▁▁▁▃▄▅
lr,▃███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,█▁▅▄▅▃▆▁▇▇▂▅█
tn,▂█▅▅▃▆▄▆▂▁▆▄▁
fp,▇▁▄▄▆▃▅▃▇█▃▅█
fn,▁█▄▅▄▆▃█▂▂▇▄▁


[34m[1mwandb[0m: Agent Starting Run: d5dyt0ot with config:
[34m[1mwandb[0m: 	learning_rate: 4.913009109160786e-05
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: 	train_batch_size: 61
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=15.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 15', max=154.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 15', max=154.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 15', max=154.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 15', max=154.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 15', max=154.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 15', max=154.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 15', max=154.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98709781209…

0,1
Training loss,6e-05
lr,0.0
global_step,2310.0
_runtime,434.0
_timestamp,1631709243.0
_step,65.0
tp,628.0
tn,1183.0
fp,217.0
fn,274.0


0,1
Training loss,█▆▇▅▄▆▅▄▃▃▄▃▄▂▂▁▂▁▃▁▁▁▂▁▁▁▁▂▂▂▁▁▂▁▁▁▁▁▁▁
lr,▄████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▄█▅▅▄▆▁▄▃▄▆▄▆▄▄▃▄
tn,▆▁▅▅▇▄█▆▆▆▄▅▄▆▆▆▆
fp,▃█▄▄▂▅▁▃▃▃▅▄▅▃▃▃▃
fn,▅▁▄▄▅▃█▅▆▅▃▅▃▅▅▆▅


[34m[1mwandb[0m: Agent Starting Run: ge4wooq3 with config:
[34m[1mwandb[0m: 	learning_rate: 1.319173891021718e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 73
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=128.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=128.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=128.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=128.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=128.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.99025659646…

0,1
Training loss,0.08209
lr,0.0
global_step,1664.0
_runtime,369.0
_timestamp,1631709624.0
_step,49.0
tp,642.0
tn,1171.0
fp,229.0
fn,260.0


0,1
Training loss,█▇▅▆▆▅▅▅▄▃▃▄▃▃▄▄▄▂▂▂▁▁▂▂▃▁▁▂▃▁▁▁▁
lr,▅███▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▃▅▃█▇▇▆▄▄▁█▇▆▆
tn,▆▅█▂▄▃▅▅▅▇▁▂▃▄
fp,▃▄▁▇▅▆▄▄▄▂█▇▆▅
fn,▆▄▆▁▂▂▃▅▄█▁▂▃▃


[34m[1mwandb[0m: Agent Starting Run: rgvojp05 with config:
[34m[1mwandb[0m: 	learning_rate: 4.036101637716679e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 43
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=218.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=218.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=218.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=218.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=218.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=218.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 14', max=218.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98585958819…

0,1
Training loss,8e-05
lr,0.0
global_step,3052.0
_runtime,440.0
_timestamp,1631710076.0
_step,80.0
tp,628.0
tn,1200.0
fp,200.0
fn,274.0


0,1
Training loss,█▇▆▆▅▆▄▄▃▂▃▂▃▃▁▂▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▄█▂▅▄▃▂▄▁▁▁▃▂▂▃▃▃
tn,▇▁█▆▇▇▇▇███▇▇▇▇▇▇
fp,▂█▁▃▂▂▂▂▁▁▁▂▂▂▂▂▂
fn,▅▁▇▄▅▆▇▅███▆▇▇▆▆▆


[34m[1mwandb[0m: Agent Starting Run: 51hqzclt with config:
[34m[1mwandb[0m: 	learning_rate: 8.752439010549867e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 93
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=101.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98963790446…

0,1
Training loss,0.01801
lr,0.0
global_step,505.0
_runtime,151.0
_timestamp,1631710240.0
_step,17.0
tp,620.0
tn,1194.0
fp,206.0
fn,282.0


0,1
Training loss,█▆▅▅▄▃▂▂▂▁
lr,▅█▇▆▅▄▄▃▂▁
global_step,▁▂▂▃▃▃▄▅▅▆▆▆▇██
_runtime,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇███
_timestamp,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇███
_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
tp,▁▅▁█▁
tn,▆▅█▁█
fp,▃▄▁█▁
fn,█▄█▁█


[34m[1mwandb[0m: Agent Starting Run: 5eyy10hu with config:
[34m[1mwandb[0m: 	learning_rate: 5.2885931236027756e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 35
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=267.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 13', max=267.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 13', max=267.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 13', max=267.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 13', max=267.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 13', max=267.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98539920919…

0,1
Training loss,8e-05
lr,0.0
global_step,3471.0
_runtime,433.0
_timestamp,1631710685.0
_step,87.0
tp,622.0
tn,1187.0
fp,213.0
fn,280.0


0,1
Training loss,█▆▆▅▆▆▆▃▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▃▁▁▂▃▄▁▁▁▁▁▁▁▁▁▁
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁▇▇▄█▅▆▃▃▂▇▅▅▃▃▄
tn,█▃▃▆▁▅▄▆▆▇▃▅▅▇▆▅
fp,▁▆▆▃█▄▅▃▃▂▆▄▄▂▃▄
fn,█▂▂▅▁▄▃▆▆▇▂▄▄▆▆▅


[34m[1mwandb[0m: Agent Starting Run: v6syhpoc with config:
[34m[1mwandb[0m: 	learning_rate: 6.960479748942418e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 86
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=109.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=109.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=109.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=109.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.99012522892…

0,1
Training loss,0.13506
lr,1e-05
global_step,436.0
_runtime,130.0
_timestamp,1631710827.0
_step,14.0
tp,651.0
tn,1171.0
fp,229.0
fn,251.0


0,1
Training loss,█▇▆▇▃▃▁▁
lr,▄█▇▆▄▃▂▁
global_step,▁▂▂▃▄▄▅▆▆▆▇█
_runtime,▁▁▂▃▃▄▄▅▆▆▆▇███
_timestamp,▁▁▂▃▃▄▄▅▆▆▆▇███
_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
tp,▁▇▅█
tn,█▄▆▁
fp,▁▅▃█
fn,█▂▄▁


[34m[1mwandb[0m: Agent Starting Run: bggq46di with config:
[34m[1mwandb[0m: 	learning_rate: 4.135516295781849e-05
[34m[1mwandb[0m: 	num_train_epochs: 11
[34m[1mwandb[0m: 	train_batch_size: 83
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=11.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 11', max=113.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 11', max=113.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 11', max=113.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98824654310…

0,1
Training loss,0.00052
lr,0.0
global_step,1243.0
_runtime,304.0
_timestamp,1631711142.0
_step,38.0
tp,628.0
tn,1202.0
fp,198.0
fn,274.0


0,1
Training loss,█▆▅▅▄▃▂▃▃▁▂▂▂▁▂▁▁▁▂▁▁▁▁▁
lr,▄██▇▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
_runtime,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,█▆▇▁▄▂▃▂▅▃▂▃
tn,▁▅▅█▆▇▆▇▆▇██
fp,█▄▄▁▃▂▃▂▃▂▁▁
fn,▁▃▂█▅▇▆▇▄▆▇▆


[34m[1mwandb[0m: Agent Starting Run: 4so9zoqk with config:
[34m[1mwandb[0m: 	learning_rate: 8.815047548907498e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 29
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=323.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=323.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=323.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=323.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98930502757…

0,1
Training loss,0.00995
lr,0.0
global_step,1292.0
_runtime,166.0
_timestamp,1631711322.0
_step,32.0
tp,617.0
tn,1194.0
fp,206.0
fn,285.0


0,1
Training loss,▇▅▆▇▇▆▄▇▅█▄▇▅▃▃▃▅▄▃▁▂▄▁▁▁
lr,▄██▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇██
_runtime,▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇███
_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███
tp,▁▅█▄▃
tn,█▇▁▄▇
fp,▁▂█▅▂
fn,█▄▁▅▆


[34m[1mwandb[0m: Agent Starting Run: wb75rh22 with config:
[34m[1mwandb[0m: 	learning_rate: 5.803195455200624e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	train_batch_size: 36
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=8.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 8', max=260.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 8', max=260.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 8', max=260.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 8', max=260.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 8', max=260.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 8', max=260.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 8', max=260.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 8', max=260.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.19098
lr,0.0
global_step,2080.0
_runtime,325.0
_timestamp,1631711660.0
_step,53.0
tp,607.0
tn,1206.0
fp,194.0
fn,295.0


0,1
Training loss,█▆▇▅▇▅▆▄▇▅▂▂▂▃▃▃▄▁▂▃▁▂▂▃▂▁▁▂▁▃▁▁▁▁▁▁▁▁▁▃
lr,▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▁█▇▆█▇▅▅▅▅
tn,█▃▄▄▁▄▅▆▅▅
fp,▁▆▅▅█▅▄▃▄▄
fn,█▁▂▃▁▂▄▄▄▄


[34m[1mwandb[0m: Agent Starting Run: yu5j8s1r with config:
[34m[1mwandb[0m: 	learning_rate: 1.0082084305396556e-05
[34m[1mwandb[0m: 	num_train_epochs: 13
[34m[1mwandb[0m: 	train_batch_size: 9
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=13.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 13', max=1038.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 13', max=1038.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 13', max=1038.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 13', max=1038.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 13', max=1038.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 13', max=1038.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98904761904…

0,1
Training loss,0.00553
lr,1e-05
global_step,6000.0
_runtime,519.0
_timestamp,1631712191.0
_step,133.0
tp,587.0
tn,1224.0
fp,176.0
fn,315.0


0,1
Training loss,▆▄▄▄▅▄▄▅▅▃▃▄▂▃▃▃▂▅▃▇▃▆▇▁▇▃▃▁▁▅██▂▁▁▁▁▅▁▁
lr,▁████▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▇▇▅█▁▂▃▃▃▇▃
tn,▁▁▆▁██▇▆▆▁▇
fp,██▃█▁▁▂▃▃█▂
fn,▂▂▄▁█▇▆▆▆▂▆


[34m[1mwandb[0m: Agent Starting Run: fwvm3nel with config:
[34m[1mwandb[0m: 	learning_rate: 2.0559506028991944e-05
[34m[1mwandb[0m: 	num_train_epochs: 7
[34m[1mwandb[0m: 	train_batch_size: 9
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=7.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 7', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 7', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 7', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 7', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 7', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 7', max=1038.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98663741523…

0,1
Training loss,0.00026
lr,0.0
global_step,6000.0
_runtime,624.0
_timestamp,1631712828.0
_step,133.0
tp,590.0
tn,1231.0
fp,169.0
fn,312.0


0,1
Training loss,▅▃▄▃▄▄▄▅▅▂▃▅▁▂▂▄▁▄▁▅▂▆▄█▁▁▅▂▁▁▁▂▁▁▁▁▁▁▁▁
lr,▄███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
_timestamp,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▇█▆█▁▃▅▂▃▅▄
tn,▁▁▅▂█▇▅▇▆▅▆
fp,██▄▇▁▂▄▂▃▄▃
fn,▂▁▃▁█▆▄▇▆▄▅


[34m[1mwandb[0m: Agent Starting Run: gpynirez with config:
[34m[1mwandb[0m: 	learning_rate: 9.580575859321413e-06
[34m[1mwandb[0m: 	num_train_epochs: 9
[34m[1mwandb[0m: 	train_batch_size: 60
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=9.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 9', max=156.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 9', max=156.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.99015136574…

0,1
Training loss,0.1678
lr,0.0
global_step,1404.0
_runtime,471.0
_timestamp,1631713312.0
_step,40.0
tp,637.0
tn,1192.0
fp,208.0
fn,265.0


0,1
Training loss,█▆▆▅▄▆▆▄▃▃▂▅▄▁▃▃▂▃▂▂▁▃▂▁▁▂▁▁
lr,▅██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
tp,▂▁▃█▃▆▆▆▅▅
tn,▇█▇▁█▄▃▃▆▅
fp,▂▁▂█▁▅▆▆▃▄
fn,▇█▆▁▆▃▃▃▄▄


[34m[1mwandb[0m: Agent Starting Run: iepkqn76 with config:
[34m[1mwandb[0m: 	learning_rate: 3.269522076733203e-05
[34m[1mwandb[0m: 	num_train_epochs: 7
[34m[1mwandb[0m: 	train_batch_size: 53
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=7.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 7', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 7', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 7', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 7', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 7', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 7', max=177.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 7', max=177.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98955868202…

0,1
Training loss,0.00683
lr,0.0
global_step,1239.0
_runtime,340.0
_timestamp,1631713664.0
_step,34.0
tp,630.0
tn,1178.0
fp,222.0
fn,272.0


0,1
Training loss,█▇▇▅▅▄▆▃▄▃▃▄▃▃▂▂▃▁▁▁▁▁▁▁
lr,▄██▇▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇██
_runtime,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
tp,▇█▅▅▂▁▆▂
tn,▁▃▇▄██▃▇
fp,█▆▂▅▁▁▆▂
fn,▂▁▄▄▇█▃▇


[34m[1mwandb[0m: Agent Starting Run: 5t3iuxc7 with config:
[34m[1mwandb[0m: 	learning_rate: 5.420143428398187e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 19
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=492.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=492.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=492.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98971494607…

0,1
Training loss,0.1754
lr,0.0
global_step,1476.0
_runtime,150.0
_timestamp,1631713828.0
_step,35.0
tp,655.0
tn,1181.0
fp,219.0
fn,247.0


0,1
Training loss,▇▇▄▇▄▇▆█▆▄▃▄▄▄▄▂▇▆▃▄▁▄▅▃▃▂▅▂▂
lr,▅██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇███
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
tp,▃▃█▁
tn,▅▆▁█
fp,▄▃█▁
fn,▆▆▁█


[34m[1mwandb[0m: Agent Starting Run: n0petes6 with config:
[34m[1mwandb[0m: 	learning_rate: 1.731649200133443e-05
[34m[1mwandb[0m: 	num_train_epochs: 14
[34m[1mwandb[0m: 	train_batch_size: 58
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'cl

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=14.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 14', max=162.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 14', max=162.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 14', max=162.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 14', max=162.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 14', max=162.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





In [2]:
model_name = "roberta-base"
model_type = "roberta"
language = "en"

run_hyperparams_optimization(model_name, model_type, language)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: vmews0h8
Sweep URL: https://wandb.ai/5roop/task3_roberta-baseroberta/sweeps/vmews0h8


[34m[1mwandb[0m: Agent Starting Run: 4dq3apwg with config:
[34m[1mwandb[0m: 	learning_rate: 4.069461985137065e-05
[34m[1mwandb[0m: 	num_train_epochs: 11
[34m[1mwandb[0m: 	train_batch_size: 51
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m5roop[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=11.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 11', max=184.0, style=ProgressStyle(de…

  torch.nn.utils.clip_grad_norm_(





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 11', max=184.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 11', max=184.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 11', max=184.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98335886078…

0,1
Training loss,0.00016
lr,0.0
global_step,2024.0
_runtime,378.0
_timestamp,1631780288.0
_step,55.0
tp,605.0
tn,1216.0
fp,184.0
fn,297.0


0,1
Training loss,█▆▇▅▄▆▆▄▃▄▄▄▃▄▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁
lr,▄▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
tp,▁▄█▅▅▄▄█▅▅▆▄▄
tn,█▆▃▅▄▆▄▁▆▅▅▆▆
fp,▁▃▆▄▅▃▅█▃▄▄▃▃
fn,█▅▁▄▄▅▅▁▄▄▃▅▅


[34m[1mwandb[0m: Agent Starting Run: yfsti9ps with config:
[34m[1mwandb[0m: 	learning_rate: 1.3932447517956938e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 31
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=302.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=302.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=302.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98960552985…

0,1
Training loss,0.48292
lr,0.0
global_step,906.0
_runtime,131.0
_timestamp,1631780430.0
_step,23.0
tp,663.0
tn,1205.0
fp,195.0
fn,239.0


0,1
Training loss,▇▇▅▅▇▃█▃▂▆▂▂▂▁▂▁▂▄
lr,▄▇██▇▇▆▆▅▅▄▄▃▃▂▂▁▁
global_step,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███
_runtime,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
_timestamp,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
_step,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
tp,▁█▅
tn,█▁█
fp,▁█▁
fn,█▁▄


[34m[1mwandb[0m: Agent Starting Run: ete4e127 with config:
[34m[1mwandb[0m: 	learning_rate: 7.984704288279768e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	train_batch_size: 35
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=267.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=267.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=267.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=267.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=288.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98898067844…

0,1
Training loss,0.16472
lr,0.0
global_step,1068.0
_runtime,167.0
_timestamp,1631780608.0
_step,28.0
tp,643.0
tn,1184.0
fp,216.0
fn,259.0


0,1
Training loss,█▆▆▇▆▅▆▅▄▇▅▃▄▄▃▂▃▁▂▁▃
lr,▄▇██▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇████
_runtime,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇████
_timestamp,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇████
_step,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
tp,▁▇▆█▇
tn,█▃▄▁▃
fp,▁▆▅█▆
fn,█▂▃▁▂


[34m[1mwandb[0m: Agent Starting Run: 2qc6mo1e with config:
[34m[1mwandb[0m: 	learning_rate: 1.0988705049660534e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	train_batch_size: 30
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=9339.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=312.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=312.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=312.0, style=ProgressStyle(des…

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


#  Fine-tuning

In [9]:
from dataclasses import dataclass, field

@dataclass
class Model:
    """Class for keeping track of model params and results"""
    model_name: str
    model_type: str
    language: str
    accuracies: list = field(default_factory=list)
    f1scores: list = field(default_factory=list)
    num_iters: int = 7
    model_args: dict = field(default_factory=dict)


xlarge = Model(
    "xlm-roberta-large",
    "xlmroberta",
    "en",
    model_args= {
        "num_train_epochs": 9,
        "learning_rate": 0.000141,
        "train_batch_size": 46},
    accuracies = [0.6081668114682884, 0.6081668114682884, 0.6081668114682884, 0.6081668114682884, 0.6081668114682884, 0.6081668114682884, 0.6081668114682884],
    f1scores = [0.37817396002160997, 0.37817396002160997, 0.37817396002160997, 0.37817396002160997, 0.37817396002160997, 0.37817396002160997, 0.37817396002160997]
)


xbase = Model(
    "xlm-roberta-base",
    "xlmroberta",
    "en",
    model_args= {
        "num_train_epochs": 3,
        "learning_rate": 0.000097,
        "train_batch_size": 99},
    accuracies = [0.7697654213727194, 0.760642919200695, 0.7832319721980886, 0.6081668114682884, 0.7780191138140747, 0.7819287576020851, 0.6081668114682884],
    f1scores = [0.7573856527317431, 0.7479285719254002, 0.7690206413905999, 0.37817396002160997, 0.7603538620851371, 0.7698192581497165, 0.37817396002160997]
)


distilroberta = Model(
    "distilroberta-base",
    "roberta",
    "en",
    model_args= {
        "num_train_epochs": 4,
        "learning_rate": 0.0000563,
        "train_batch_size": 65},
    accuracies = [0.8053866203301477, 0.7993049522154648, 0.7966985230234579, 0.7893136403127715, 0.7993049522154648, 0.7971329278887923, 0.7958297132927888],
    f1scores = [0.7938755229581946, 0.788071029574932, 0.7862911960389911, 0.776211651538248, 0.788071029574932, 0.7878714869422752, 0.7833737110821903]
)

roberta = Model(
    "roberta-base",
    "roberta",
    "en",
    model_args= {
        "num_train_epochs": 4,
        "learning_rate": 0.0000354,
        "train_batch_size": 76},
    accuracies = [0.8049522154648132, 0.8023457862728063, 0.7975673327541268, 0.8040834057341443, 0.8001737619461338, 0.8019113814074718, 0.8084274543874891],
    f1scores = [0.7938156134306567, 0.7907003000386662, 0.7856867727912031, 0.7933341946050813, 0.7893417576471337, 0.7884840908486412, 0.7978319119382253],
)


models = [roberta, distilroberta, xbase, xlarge]
models

[Model(model_name='roberta-base', model_type='roberta', language='en', accuracies=[0.8049522154648132, 0.8023457862728063, 0.7975673327541268, 0.8040834057341443, 0.8001737619461338, 0.8019113814074718, 0.8084274543874891], f1scores=[0.7938156134306567, 0.7907003000386662, 0.7856867727912031, 0.7933341946050813, 0.7893417576471337, 0.7884840908486412, 0.7978319119382253], num_iters=7, model_args={'num_train_epochs': 4, 'learning_rate': 3.54e-05, 'train_batch_size': 76}),
 Model(model_name='distilroberta-base', model_type='roberta', language='en', accuracies=[0.8053866203301477, 0.7993049522154648, 0.7966985230234579, 0.7893136403127715, 0.7993049522154648, 0.7971329278887923, 0.7958297132927888], f1scores=[0.7938755229581946, 0.788071029574932, 0.7862911960389911, 0.776211651538248, 0.788071029574932, 0.7878714869422752, 0.7833737110821903], num_iters=7, model_args={'num_train_epochs': 4, 'learning_rate': 5.63e-05, 'train_batch_size': 65}),
 Model(model_name='xlm-roberta-base', model_t

In [10]:
import pandas as pd
import numpy as np
import torch

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')


def fine_tune_and_evaluate(
    model_type,
    model_name,
    language,
    model_args,
    num_iters,
                            ):
    import torch
    torch.cuda.empty_cache()
    if language not in {"sl", "hr", "en"}:
        raise AttributeError(f"Language {language} is not valid")
    eval_file, train_file = f"../data/merged-{language}.test.tsv" , f"../data/merged-{language}.train.tsv"
    train = read_file(train_file, correct_labels=True)
    test = read_file(eval_file, correct_labels=True)
    
    from simpletransformers.classification import ClassificationModel
    model_args["overwrite_output_dir"] = True
    model_args["output_dir"] = f"finetuned_models/{model_type}__{model_name.replace('/', '_')}_{language}_current/"
    model_args["silent"] = False
    model_args["save_model_every_epoch"] = False
    model_args["save_steps"] = -1
    #model_args["manual_seed"] = 42
    model = ClassificationModel(
        model_type, model_name, use_cuda=True,
        args=model_args

    )
    
    def copy_best_model(model_type, model_name, language):
        import os
        os.system(f"cp -r finetuned_models/{model_type}__{model_name}_{language}_current finetuned_models/{model_type}__{model_name}_{language}_best")
    accuracies = list()
    f1s = list()
    
    
    model.train_model(train, )
    from sklearn.metrics import accuracy_score, f1_score
    y_true = test["labels"].tolist()
    y_pred = model.predict(list(test["text"].values))[0].tolist()
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="macro")
    
    accuracies.append(accuracy)
    f1s.append(f1)
    
    for i in range(num_iters-1):
        model = ClassificationModel(model_type, model_name, use_cuda=True, args=model_args)
        model.train_model(train, )
        y_true = test["labels"].tolist()
        y_pred = model.predict(list(test["text"].values))[0].tolist()
        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average="macro")
    
        accuracies.append(accuracy)
        f1s.append(f1)
        if accuracy == max(accuracies):
            copy_best_model(model_type, model_name, language)
    print(f"Model: {model_name}, {model_type}, {language=}")
    print(f"Accuracies: {accuracies}")
    print(f"F1 scores: {f1s}")
    return accuracies, f1s


for model in models:
    if model.accuracies != []:
        continue
    accs, f1s = fine_tune_and_evaluate(
    model.model_type,
    model.model_name,
    model.language,
    model.model_args,
    model.num_iters)
    
    model.accuracies = accs
    model.f1scores = f1s
    print(model)

In [11]:
models

[Model(model_name='roberta-base', model_type='roberta', language='en', accuracies=[0.8049522154648132, 0.8023457862728063, 0.7975673327541268, 0.8040834057341443, 0.8001737619461338, 0.8019113814074718, 0.8084274543874891], f1scores=[0.7938156134306567, 0.7907003000386662, 0.7856867727912031, 0.7933341946050813, 0.7893417576471337, 0.7884840908486412, 0.7978319119382253], num_iters=7, model_args={'num_train_epochs': 4, 'learning_rate': 3.54e-05, 'train_batch_size': 76}),
 Model(model_name='distilroberta-base', model_type='roberta', language='en', accuracies=[0.8053866203301477, 0.7993049522154648, 0.7966985230234579, 0.7893136403127715, 0.7993049522154648, 0.7971329278887923, 0.7958297132927888], f1scores=[0.7938755229581946, 0.788071029574932, 0.7862911960389911, 0.776211651538248, 0.788071029574932, 0.7878714869422752, 0.7833737110821903], num_iters=7, model_args={'num_train_epochs': 4, 'learning_rate': 5.63e-05, 'train_batch_size': 65}),
 Model(model_name='xlm-roberta-base', model_t

In [14]:
import numpy as np
print("""
|model name| model type| accuracy | macro f1 score|
| ---      | ---       | ---      | ---           |""",)

for model in models:
    print(f"|{model.model_name}| {model.model_type}| {np.mean(model.accuracies):0.3} +/- {np.std(model.accuracies):0.3}", end="")
    print(f"| {np.mean(model.f1scores):0.3} +/- {np.std(model.f1scores):0.3}|")


|model name| model type| accuracy | macro f1 score|
| ---      | ---       | ---      | ---           |
|roberta-base| roberta| 0.803 +/- 0.00323| 0.791 +/- 0.00372|
|distilroberta-base| roberta| 0.798 +/- 0.00446| 0.786 +/- 0.00502|
|xlm-roberta-base| xlmroberta| 0.727 +/- 0.0756| 0.652 +/- 0.173|
|xlm-roberta-large| xlmroberta| 0.608 +/- 0.0| 0.378 +/- 5.55e-17|


In [23]:
sorted_models = sorted(models, key = lambda model: -1*np.mean(model.accuracies))

best_model = sorted_models.pop(0)
rest_models = sorted_models

from scipy import stats
for model in rest_models:
    # Looking at accs:
    higher = best_model.accuracies
    lower = model.accuracies
    import numpy as np
    wilcoxon_acc = stats.wilcoxon(higher,lower, alternative='greater')[1]
    manwhithney_acc = stats.mannwhitneyu(higher,lower, alternative='greater')[1]
    student_acc = stats.ttest_ind(higher,lower)[1]
    
    # Looking at f1s:
    higher = best_model.f1scores
    lower = model.f1scores
    import numpy as np
    wilcoxon_f1 = stats.wilcoxon(higher,lower, alternative='greater')[1]
    manwhithney_f1 = stats.mannwhitneyu(higher,lower, alternative='greater')[1]
    student_f1 = stats.ttest_ind(higher,lower)[1]

    print(f"""
#### `{best_model.model_name}` vs `{model.model_name}`:

| test | accuracy p-value | macro F1 p-value|
| --- | --- | --- |
|Wilcoxon|{wilcoxon_acc:0.3}|{wilcoxon_f1}|
|Mann Whithney|{manwhithney_acc:0.3}|{manwhithney_f1:0.3}|
|Student t-test | {student_acc:0.3e} | {student_f1:0.3e}|

""")


#### `roberta-base` vs `distilroberta-base`:

| test | accuracy p-value | macro F1 p-value|
| --- | --- | --- |
|Wilcoxon|0.0156|0.0390625|
|Mann Whithney|0.0203|0.0367|
|Student t-test | 3.894e-02 | 7.056e-02|



#### `roberta-base` vs `xlm-roberta-base`:

| test | accuracy p-value | macro F1 p-value|
| --- | --- | --- |
|Wilcoxon|0.00781|0.0078125|
|Mann Whithney|0.00107|0.00107|
|Student t-test | 3.062e-02 | 7.135e-02|



#### `roberta-base` vs `xlm-roberta-large`:

| test | accuracy p-value | macro F1 p-value|
| --- | --- | --- |
|Wilcoxon|0.00781|0.0078125|
|Mann Whithney|0.000529|0.000529|
|Student t-test | 6.386e-21 | 4.070e-24|


