In [1]:
import pandas as pd
import numpy as np
import torch



def read_file(fname: str, correct_labels=False) -> pd.DataFrame:
    """Reads a filename, return df with text and labels."""

    df = pd.read_table(fname, sep="\t", header=None, names="text,labels,role".split(","))
    if correct_labels:
        offensive_ids = df.labels != "Acceptable speech"
        df.labels[offensive_ids] = 1
        df.labels[~offensive_ids] = 0

    df = df.drop(columns=["role"])
    return df

en_test, en_train = "../data/merged-en.test.tsv" , "../data/merged-en.train.tsv"
hr_test, hr_train = "../data/merged-hr.test.tsv" , "../data/merged-hr.train.tsv"
sl_test, sl_train = "../data/merged-sl.test.tsv",  "../data/merged-sl.train.tsv"


In [2]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import wandb


model_args = ClassificationArgs()
model_args.use_early_stopping = True
model_args.early_stopping_delta = 0.01
model_args.early_stopping_metric = "mcc"
model_args.early_stopping_metric_minimize = False
model_args.early_stopping_patience = 3
model_args.evaluate_during_training_steps = 1000

model_args.evaluate_during_training = True
model_args.manual_seed = 4
model_args.use_multiprocessing = True
model_args.eval_batch_size = 8
model_args.labels_list = [0, 1]
model_args.wandb_project = "task2"
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True


sweep_config = {
    "method": "bayes",  # grid, random
    "metric": {"name": "mcc", "goal": "maximize"},
    "parameters": {
        "num_train_epochs": {"values": [5, 10, 20]},
        "learning_rate": {"values": [0.00002927]},
        "train_batch_size": {"values": [40,80,160]}
    },
}

sweep_id = wandb.sweep(sweep_config, project="task2-cont")

train_df = read_file(hr_train, correct_labels=True)
eval_df = read_file(hr_test, correct_labels=True)

def train():
    # Initialize a new wandb run
    wandb.init()

    # Create a TransformerModel
    model = ClassificationModel(
        "electra",
        "classla/bcms-bertic",
        use_cuda=True,
        args=model_args,
        sweep_config=wandb.config,
    )

    # Train the model
    model.train_model(train_df, eval_df=eval_df)

    # Evaluate the model
    model.eval_model(eval_df)

    # Sync wandb
    wandb.join()

wandb.agent(sweep_id, train)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: qvhsksia
Sweep URL: https://wandb.ai/5roop/task2-cont/sweeps/qvhsksia


[34m[1mwandb[0m: Agent Starting Run: 9g63ch48 with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m5roop[0m (use `wandb login --relogin` to force relogin)


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=222.0, style=ProgressStyle(de…

  torch.nn.utils.clip_grad_norm_(





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98515399955…

0,1
Training loss,0.00067
lr,0.0
global_step,2220.0
_runtime,392.0
_timestamp,1629272781.0
_step,58.0
tp,1134.0
tn,633.0
fp,196.0
fn,157.0


0,1
Training loss,█▅▆▅▅▅▅▅▄▃▃▄▃▁▃▂▂▃▁▃▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
lr,▄▆███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▄▁█▁▃▃▅▅▆▄▄▄
tn,▅█▁█▆▅▄▄▄▅▅▅
fp,▄▁█▁▃▄▅▅▅▄▄▄
fn,▅█▁█▆▆▄▄▃▅▅▅


[34m[1mwandb[0m: Agent Starting Run: wxe8i4u8 with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 160
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98742819474…

0,1
Training loss,0.20884
lr,1e-05
global_step,280.0
_runtime,159.0
_timestamp,1629272953.0
_step,12.0
tp,1118.0
tn,640.0
fp,189.0
fn,173.0


0,1
Training loss,█▇▅▂▁
lr,▃▇█▅▁
global_step,▁▁▃▃▄▅▆▆▇█
_runtime,▁▁▂▃▄▅▅▆▇████
_timestamp,▁▁▂▃▄▅▅▆▇████
_step,▁▂▂▃▃▄▅▅▆▆▇▇█
tp,█▁▂▃▃
tn,▁████
fp,█▁▁▁▁
fn,▁█▇▆▆


[34m[1mwandb[0m: Agent Starting Run: 5702pwtk with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 20
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=20.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 20', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 20', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 20', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 20', max=222.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 20', max=222.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 20', max=222.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 20', max=222.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 20', max=222.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 15 of 20', max=222.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 16 of 20', max=222.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 17 of 20', max=222.0, style=ProgressStyle(d…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 18 of 20', max=222.0, style=ProgressStyle(d…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98420019627…

0,1
Training loss,8e-05
lr,0.0
global_step,4000.0
_runtime,680.0
_timestamp,1629273644.0
_step,104.0
tp,1125.0
tn,636.0
fp,193.0
fn,166.0


0,1
Training loss,█▆▅▅▄▃▄▁▂▂▁▁▁▁▂▃▁▁▃▁▂▁▁▁▁▁▁▃▁▁▁▁▁▁▂▁▁▁▁▁
lr,▃███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_runtime,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▆▁█▄▆▇▆▆▆▆▆▃▅▅▄▇▆▆▆▆▆▆
tn,▃█▁▆▃▂▃▃▂▃▃▆▅▄▆▂▄▃▄▃▄▄
fp,▆▁█▃▆▇▆▆▇▆▆▃▄▅▃▇▅▆▅▆▅▅
fn,▃█▁▅▃▂▃▃▃▃▃▆▄▄▅▂▃▃▃▃▃▃


[34m[1mwandb[0m: Agent Starting Run: rd0yvz4e with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=222.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98729470586…

0,1
Training loss,0.01268
lr,0.0
global_step,1110.0
_runtime,207.0
_timestamp,1629273863.0
_step,30.0
tp,1130.0
tn,639.0
fp,190.0
fn,161.0


0,1
Training loss,█▅▆▅▅▄▅▅▃▃▃▃▄▃▂▃▂▂▄▁▂▁
lr,▄▆██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
tp,▁▂█▅▁▂
tn,▆▇▁▆█▇
fp,▃▂█▃▁▂
fn,█▇▁▄█▇


[34m[1mwandb[0m: Agent Starting Run: 4qy24az4 with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98515399955…

0,1
Training loss,0.00067
lr,0.0
global_step,2220.0
_runtime,391.0
_timestamp,1629274268.0
_step,58.0
tp,1134.0
tn,633.0
fp,196.0
fn,157.0


0,1
Training loss,█▅▆▅▅▅▅▅▄▃▃▄▃▁▃▂▂▃▁▃▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
lr,▄▆███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▄▁█▁▃▃▅▅▆▄▄▄
tn,▅█▁█▆▅▄▄▄▅▅▅
fp,▄▁█▁▃▄▅▅▅▄▄▄
fn,▅█▁█▆▆▄▄▃▅▅▅


[34m[1mwandb[0m: Agent Starting Run: qirjalsm with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98515399955…

0,1
Training loss,0.00067
lr,0.0
global_step,2220.0
_runtime,390.0
_timestamp,1629274669.0
_step,58.0
tp,1134.0
tn,633.0
fp,196.0
fn,157.0


0,1
Training loss,█▅▆▅▅▅▅▅▄▃▃▄▃▁▃▂▂▃▁▃▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
lr,▄▆███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▄▁█▁▃▃▅▅▆▄▄▄
tn,▅█▁█▆▅▄▄▄▅▅▅
fp,▄▁█▁▃▄▅▅▅▄▄▄
fn,▅█▁█▆▆▄▄▃▅▅▅


[34m[1mwandb[0m: Agent Starting Run: rauqokb7 with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=222.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98729470586…

0,1
Training loss,0.01268
lr,0.0
global_step,1110.0
_runtime,209.0
_timestamp,1629274890.0
_step,30.0
tp,1130.0
tn,639.0
fp,190.0
fn,161.0


0,1
Training loss,█▅▆▅▅▄▅▅▃▃▃▃▄▃▂▃▂▂▄▁▂▁
lr,▄▆██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
_timestamp,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
tp,▁▂█▅▁▂
tn,▆▇▁▆█▇
fp,▃▂█▃▁▂
fn,█▇▁▄█▇


[34m[1mwandb[0m: Agent Starting Run: jupd0aqe with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 160
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=56.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.20884
lr,1e-05
global_step,280.0
_runtime,165.0
_timestamp,1629275067.0
_step,12.0
tp,1118.0
tn,640.0
fp,189.0
fn,173.0


0,1
Training loss,█▇▅▂▁
lr,▃▇█▅▁
global_step,▁▁▃▃▄▅▆▆▇█
_runtime,▁▁▂▃▄▅▅▆▇████
_timestamp,▁▁▂▃▄▅▅▆▇████
_step,▁▂▂▃▃▄▅▅▆▆▇▇█
tp,█▁▂▃▃
tn,▁████
fp,█▁▁▁▁
fn,▁█▇▆▆


[34m[1mwandb[0m: Agent Starting Run: snnxhnn4 with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98515399955…

0,1
Training loss,0.00067
lr,0.0
global_step,2220.0
_runtime,392.0
_timestamp,1629275470.0
_step,58.0
tp,1134.0
tn,633.0
fp,196.0
fn,157.0


0,1
Training loss,█▅▆▅▅▅▅▅▄▃▃▄▃▁▃▂▂▃▁▃▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
lr,▄▆███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▄▁█▁▃▃▅▅▆▄▄▄
tn,▅█▁█▆▅▄▄▄▅▅▅
fp,▄▁█▁▃▄▅▅▅▄▄▄
fn,▅█▁█▆▆▄▄▃▅▅▅


[34m[1mwandb[0m: Agent Starting Run: zwquxyid with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 80
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=111.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=111.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98689083341…

0,1
Training loss,0.00475
lr,0.0
global_step,1110.0
_runtime,326.0
_timestamp,1629275810.0
_step,35.0
tp,1128.0
tn,619.0
fp,210.0
fn,163.0


0,1
Training loss,█▆▅▆▄▄▂▄▃▂▁▁▂▁▂▂▂▁▂▁▁▁
lr,▄▆██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
_timestamp,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
tp,█▁▆▅▆▅▅▂▄▅▅
tn,▁█▅▅▄▅▅▆▅▅▅
fp,█▁▄▄▅▄▄▃▄▄▄
fn,▁█▃▄▃▄▄▇▅▄▄


[34m[1mwandb[0m: Agent Starting Run: ue69qlms with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=222.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=222.0, style=ProgressStyle(des…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98729470586…

0,1
Training loss,0.01268
lr,0.0
global_step,1110.0
_runtime,209.0
_timestamp,1629276032.0
_step,30.0
tp,1130.0
tn,639.0
fp,190.0
fn,161.0


0,1
Training loss,█▅▆▅▅▄▅▅▃▃▃▃▄▃▂▃▂▂▄▁▂▁
lr,▄▆██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
_timestamp,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
tp,▁▂█▅▁▂
tn,▆▇▁▆█▇
fp,▃▂█▃▁▂
fn,█▇▁▄█▇


[34m[1mwandb[0m: Agent Starting Run: sfh6evoj with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	train_batch_size: 40
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 10', max=222.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 10', max=222.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=265.0, style=ProgressStyle(descr…




VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98515399955…

0,1
Training loss,0.00067
lr,0.0
global_step,2220.0
_runtime,395.0
_timestamp,1629276441.0
_step,58.0
tp,1134.0
tn,633.0
fp,196.0
fn,157.0


0,1
Training loss,█▅▆▅▅▅▅▅▄▃▃▄▃▁▃▂▂▃▁▃▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
lr,▄▆███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
tp,▄▁█▁▃▃▅▅▆▄▄▄
tn,▅█▁█▆▅▄▄▄▅▅▅
fp,▄▁█▁▃▄▅▅▅▄▄▄
fn,▅█▁█▆▆▄▄▃▅▅▅


[34m[1mwandb[0m: Agent Starting Run: vzmn4cfh with config:
[34m[1mwandb[0m: 	learning_rate: 2.927e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 80
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Some weights of the model checkpoint at classla/bcms-bertic were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at classla/bcms-bertic and are newly initialized: ['pooler.dense.bias', 'classifier.bias', '

HBox(children=(FloatProgress(value=0.0, max=8851.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=111.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, max=2120.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=111.0, style=ProgressStyle(des…

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.





In [21]:
train_df = read_file(hr_train, correct_labels=True)
eval_df = read_file(hr_test, correct_labels=True)

eval_df

Unnamed: 0,text,labels
0,"Ovi ne idu nigdje za boljim životom, pošto im ...",1
1,Na zalost sve moguce,0
2,Ocemu se tu radi ne kuzim,0
3,Tako je kad vi pisete istoriju i pola miliona ...,1
4,Meni se ovo jako svida to smo htjeli nasi hrva...,1
...,...,...
2115,Nekome trebaju naočale. Ovo je šezdesetak ljud...,0
2116,Slusate li vi uopce sta oni gove?,0
2117,Jadno..,1
2118,"pa mi stvarno nismo normalni kao narod, zasto ...",1
