In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
### RUN IF IN COLAB

# # torch==1.10.2
# !pip install transformers==4.16.2 datasets==1.17.0 tokenizers==0.11.6 wandb==0.12.14

# !git clone -b kretov/small_addons https://github.com/DevKretov/ntu_nlp_al
# %cd ntu_nlp_al/

In [44]:
import torch
from transformers import get_scheduler
from datasets import list_metrics, load_metric

from active_learning_trainer import ALTrainer
from transformers import AutoTokenizer
from dataset import Dataset
from model import Model
from transformers import BertForSequenceClassification
from strategies import RandomStrategy

import wandb

In [36]:
parameters = dict()
parameters['use_gpu'] = True

parameters['weights_and_biases_on'] = False
parameters['weights_and_biases_key'] = '5e5e00356042a33b5cb271399b8d05c9c9d6ded8'
parameters['weights_and_biases_run_name'] = 'run_2'
# TODO: implement it
parameters['weights_and_biases_save_predictions'] = False

parameters['pretrained_model_name'] = 'prajjwal1/bert-tiny' #'distilbert-base-uncased'


# parameters['train_dataset_file_path'] = 'data/imdb/train_IMDB.csv'
# parameters['val_dataset_file_path'] = 'data/imdb/test_IMDB.csv'
# parameters['test_dataset_file_path'] = 'data/imdb/test_IMDB.csv'

parameters['train_dataset_file_path'] = 'data/news/train.csv'
parameters['val_dataset_file_path'] = 'data/news/val.csv'
parameters['test_dataset_file_path'] = 'data/news/test.csv'
parameters['dataset_file_delimiter'] = ','

parameters['dataset_text_column_name'] = 'text_cleaned' #'text'
parameters['dataset_label_column_name'] = 'label_reduced'#'airline_sentiment'

# TODO: implement this with CrossEntropyLoss
parameters['loss'] = 'cross_entropy'
parameters['loss_weighted'] = False

parameters['class_imbalance_reweight'] = True
parameters['train_batch_size'] = 32
parameters['val_batch_size'] = 64
parameters['test_batch_size'] = 64
parameters['epochs'] = 5
parameters['finetuned_model_type'] = 'classification'

parameters['al_iterations'] = 100
parameters['init_dataset_size'] = 32
parameters['add_dataset_size'] = 32
parameters['al_strategy'] = 'least_confidence' #'least_confidence'
parameters['full_train'] = False
parameters['debug'] = False

In [None]:
if parameters['weights_and_biases_on']:
    wandb.login(key='5e5e00356042a33b5cb271399b8d05c9c9d6ded8')
    wandb.init(
        name=parameters['weights_and_biases_run_name'],
        project='ntu_al',
        reinit=True
    )

    wandb.config.update(parameters)

device = 'cpu'
if parameters['use_gpu']:
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(f'Device set to {device}!')

In [37]:
tokenizer = AutoTokenizer.from_pretrained(parameters['pretrained_model_name'])

dataset_obj = Dataset(tokenizer)

data_files = {
    'train': [parameters['train_dataset_file_path']],
    'val': [parameters['val_dataset_file_path']],
    'test': [parameters['test_dataset_file_path']]
}

dataset_obj.load_csv_dataset(
    data_files,
    delimiter=parameters['dataset_file_delimiter']
)

dataset_obj.truncate_dataset('train', 10000)
dataset_obj.truncate_dataset('val', 1000)
dataset_obj.truncate_dataset('test', 10000)

Training mean loss: 1.899296794618879: 100%|██████████| 14/14 [31:38<00:00, 135.61s/it]
Eval mean loss: 1.9060639824186052:  44%|████▍     | 7/16 [31:34<40:35, 270.59s/it]
AL evaluation iteration. Batch   246/295:  83%|████████▎ | 246/295 [09:07<01:49,  2.23s/it]


Using custom data configuration default-e159e2f034d47ce8
Reusing dataset csv (/Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e)
100%|██████████| 3/3 [00:00<00:00, 544.95it/s]


In [38]:
dataset_obj.prepare_labels(parameters['dataset_label_column_name'])
dataset_obj.encode_dataset(parameters['dataset_text_column_name'])

Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-ac32778fd5425486.arrow
Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-6799fe5e1a63afb8.arrow
Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-6853f82358e53875.arrow
Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-d7b79c90d7fbe0d7.arrow
Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d

In [39]:
print(f'Categories: {dataset_obj.get_all_categories()}')
num_labels = dataset_obj.get_num_categories()

Categories: {'alt': 0, 'comp': 1, 'misc': 2, 'rec': 3, 'sci': 4, 'soc': 5, 'talk': 6}


In [40]:
model = Model(
    parameters['pretrained_model_name'],
    model_type=parameters['finetuned_model_type'],
    num_labels=num_labels
)

Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

In [41]:
trainer = ALTrainer(
    wandb_on=parameters['weights_and_biases_on'],
    imbalanced_training=parameters['class_imbalance_reweight']
)
trainer.set_model(model)

# TODO: add strategy
trainer.set_strategy(None)
trainer.set_dataset(dataset_obj)
trainer.prepare_dataloaders(
    train_batch_size=parameters['train_batch_size'],
    val_batch_size=parameters['val_batch_size'],
    test_batch_size=parameters['test_batch_size'],
)

In [42]:
optimizer = torch.optim.AdamW(model.model.parameters(), lr=5e-5)
trainer.set_optimizer(optimizer)

num_training_steps = parameters['epochs'] * trainer.get_training_steps_num()

lr_scheduler = get_scheduler(
    name="linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)
trainer.set_lr_scheduler(lr_scheduler)
trainer.set_device(device)



In [None]:
trainer.add_evaluation_metric(load_metric('accuracy'))
trainer.add_evaluation_metric(load_metric('f1'))
trainer.add_evaluation_metric(load_metric('precision'))
trainer.add_evaluation_metric(load_metric('recall'))

In [43]:
if parameters['full_train']:
    trainer.full_train(
        train_epochs=parameters['epochs'],
        train_batch_size=parameters['train_batch_size'],
        val_batch_size=parameters['val_batch_size'],
        test_batch_size=parameters['test_batch_size'],
        debug=parameters['debug']
    )

trainer.al_train(
    al_iterations=parameters['al_iterations'],
    init_dataset_size=parameters['init_dataset_size'],
    add_dataset_size=parameters['add_dataset_size'],
    train_epochs=parameters['epochs'],
    strategy=parameters['al_strategy'],
    train_batch_size=parameters['train_batch_size'],
    val_batch_size=parameters['val_batch_size'],
    test_batch_size=parameters['test_batch_size'],
    debug=parameters['debug']
)

Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-9bd4ab3a27a7fb3a.arrow
Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-264ab8efe04d123a.arrow
Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-fd085d63684e4b7c.arrow
Loading cached processed dataset at /Users/antonkretov/.cache/huggingface/datasets/csv/default-e159e2f034d47ce8/0.0.0/6b9057d9e23d9d8a2f05b985917a0da84d70c5dae3d22ddd8a3f22fb01c69d9e/cache-078a83154302888e.arrow


Training initialized!
AL train dataset length: 32, rest dataset length: 9968
Training is run on 1 batches!
Evaluation is run on 16 batches!
Testing is run on 16 batches!


AL BADGE strategy applied!


AL iteration   1/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9942021369934082: 100%|██████████| 1/1 [00:00<00:00,  3.63it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.935079112648964:  94%|█████████▍| 15/16 [00:02<00:00,  7.10it/s] 
Eval mean loss: 1.935079112648964: 100%|██████████| 16/16 [00:02<00:00,  7.25it/s]


Epoch   2
Training mean loss: 1.9942021369934082: 100%|██████████| 1/1 [00:02<00:00,  2.48s/it]
Training mean loss: 1.969994306564331: 100%|██████████| 1/1 [00:00<00:00,  4.01it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9267708659172058:  94%|█████████▍| 15/16 [00:02<00:00,  7.23it/s]
Eval mean loss: 1.9267708659172058: 100%|██████████| 16/16 [00:02<00:00,  7.46it/s]


Epoch   3
Training mean loss: 1.969994306564331: 100%|██████████| 1/1 [00:02<00:00,  2.40s/it]
Training mean loss: 1.9879329204559326: 100%|██████████| 1/1 [00:00<00:00,  3.64it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.91963689029216

100%|██████████| 10/10 [00:01<00:00,  5.87ba/s]
100%|██████████| 9936/9936 [00:03<00:00, 3183.28ex/s]
100%|██████████| 64/64 [00:00<00:00, 3129.24ex/s]



Updated AL datasets: train size = 64, unlabelled size = 9936, sum: 10000 


AL iteration   2/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9718313217163086: 100%|██████████| 2/2 [00:00<00:00,  3.98it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8992066755890846: 100%|██████████| 16/16 [00:02<00:00,  7.01it/s]
Eval mean loss: 1.8992066755890846: 100%|██████████| 16/16 [00:02<00:00,  6.97it/s]


Epoch   2
Training mean loss: 1.9718313217163086: 100%|██████████| 2/2 [00:02<00:00,  1.40s/it]
Training mean loss: 1.9352800250053406: 100%|██████████| 2/2 [00:00<00:00,  3.71it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8962380439043045: 100%|██████████| 16/16 [00:02<00:00,  7.08it/s]
Eval mean loss: 1.8962380439043045: 100%|██████████| 16/16 [00:02<00:00,  7.05it/s]


Epoch   3
Training mean loss: 1.9352800250053406: 100%|██████████| 2/2 [00:02<00:00,  1.41s/it]
Training mean loss: 1.9230751991271973: 100%|██████████| 2/2 [00:00<00:00,  3.92it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.89594065397

100%|██████████| 10/10 [00:01<00:00,  5.68ba/s]
100%|██████████| 9904/9904 [00:03<00:00, 3110.85ex/s]
100%|██████████| 96/96 [00:00<00:00, 3122.48ex/s]



Updated AL datasets: train size = 96, unlabelled size = 9904, sum: 10000 


AL iteration   3/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9621433814366658: 100%|██████████| 3/3 [00:00<00:00,  4.00it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8992003723978996: 100%|██████████| 16/16 [00:02<00:00,  7.19it/s]
Eval mean loss: 1.8992003723978996: 100%|██████████| 16/16 [00:02<00:00,  7.18it/s]


Epoch   2
Training mean loss: 1.9621433814366658: 100%|██████████| 3/3 [00:02<00:00,  1.01it/s]
Training mean loss: 1.9631332556406658: 100%|██████████| 3/3 [00:00<00:00,  3.92it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8962132707238197:  94%|█████████▍| 15/16 [00:02<00:00,  6.70it/s]
Eval mean loss: 1.8962132707238197: 100%|██████████| 16/16 [00:02<00:00,  6.89it/s]


Epoch   3
Training mean loss: 1.9631332556406658: 100%|██████████| 3/3 [00:03<00:00,  1.03s/it]
Training mean loss: 1.9245431025822957: 100%|██████████| 3/3 [00:00<00:00,  4.00it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.89309287816

100%|██████████| 10/10 [00:01<00:00,  5.63ba/s]
100%|██████████| 9872/9872 [00:03<00:00, 3081.89ex/s]
100%|██████████| 128/128 [00:00<00:00, 3387.33ex/s]



Updated AL datasets: train size = 128, unlabelled size = 9872, sum: 10000 


AL iteration   4/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.957252949476242: 100%|██████████| 4/4 [00:00<00:00,  4.08it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.924219585955143:  94%|█████████▍| 15/16 [00:02<00:00,  6.99it/s] 
Eval mean loss: 1.924219585955143: 100%|██████████| 16/16 [00:02<00:00,  7.14it/s]


Epoch   2
Training mean loss: 1.957252949476242: 100%|██████████| 4/4 [00:03<00:00,  1.24it/s]
Training mean loss: 1.90103080868721: 100%|██████████| 4/4 [00:01<00:00,  3.88it/s]  

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.922018401324749:  94%|█████████▍| 15/16 [00:02<00:00,  6.80it/s] 
Eval mean loss: 1.922018401324749: 100%|██████████| 16/16 [00:02<00:00,  6.99it/s]


Epoch   3
Training mean loss: 1.90103080868721: 100%|██████████| 4/4 [00:03<00:00,  1.20it/s]
Training mean loss: 1.9102621376514435: 100%|██████████| 4/4 [00:01<00:00,  3.99it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9159388020634651

100%|██████████| 10/10 [00:01<00:00,  5.68ba/s]
100%|██████████| 9840/9840 [00:03<00:00, 2905.32ex/s]
100%|██████████| 160/160 [00:00<00:00, 3231.86ex/s]



Updated AL datasets: train size = 160, unlabelled size = 9840, sum: 10000 


AL iteration   5/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9160380125045777: 100%|██████████| 5/5 [00:01<00:00,  3.95it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9310956597328186:  94%|█████████▍| 15/16 [00:02<00:00,  6.85it/s]
Eval mean loss: 1.9310956597328186: 100%|██████████| 16/16 [00:02<00:00,  7.01it/s]


Epoch   2
Training mean loss: 1.9160380125045777: 100%|██████████| 5/5 [00:03<00:00,  1.41it/s]
Training mean loss: 1.872106909751892: 100%|██████████| 5/5 [00:01<00:00,  3.92it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9311582371592522:  94%|█████████▍| 15/16 [00:02<00:00,  6.70it/s]
Eval mean loss: 1.9311582371592522: 100%|██████████| 16/16 [00:02<00:00,  6.83it/s]


Epoch   3
Training mean loss: 1.872106909751892: 100%|██████████| 5/5 [00:03<00:00,  1.38it/s]
Training mean loss: 1.8538269996643066: 100%|██████████| 5/5 [00:01<00:00,  3.73it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.929515585303

100%|██████████| 10/10 [00:01<00:00,  5.51ba/s]
100%|██████████| 9808/9808 [00:03<00:00, 3070.68ex/s]
100%|██████████| 192/192 [00:00<00:00, 3082.81ex/s]



Updated AL datasets: train size = 192, unlabelled size = 9808, sum: 10000 


AL iteration   6/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9212560057640076: 100%|██████████| 6/6 [00:01<00:00,  3.97it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9188627004623413:  94%|█████████▍| 15/16 [00:02<00:00,  6.81it/s]
Eval mean loss: 1.9188627004623413: 100%|██████████| 16/16 [00:02<00:00,  6.98it/s]


Epoch   2
Training mean loss: 1.9212560057640076: 100%|██████████| 6/6 [00:03<00:00,  1.58it/s]
Training mean loss: 1.8921994765599568: 100%|██████████| 6/6 [00:01<00:00,  3.91it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.903777375817299: 100%|██████████| 16/16 [00:02<00:00,  6.92it/s] 
Eval mean loss: 1.903777375817299: 100%|██████████| 16/16 [00:02<00:00,  6.87it/s]


Epoch   3
Training mean loss: 1.8921994765599568: 100%|██████████| 6/6 [00:03<00:00,  1.55it/s]
Training mean loss: 1.872847318649292: 100%|██████████| 6/6 [00:01<00:00,  3.91it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.890290290117

100%|██████████| 10/10 [00:01<00:00,  5.40ba/s]
100%|██████████| 9776/9776 [00:03<00:00, 3005.06ex/s]
100%|██████████| 224/224 [00:00<00:00, 3410.73ex/s]



Updated AL datasets: train size = 224, unlabelled size = 9776, sum: 10000 


AL iteration   7/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9177392039980208: 100%|██████████| 7/7 [00:01<00:00,  4.06it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9153546243906021: 100%|██████████| 16/16 [00:02<00:00,  6.84it/s]
Eval mean loss: 1.9153546243906021: 100%|██████████| 16/16 [00:02<00:00,  6.83it/s]


Epoch   2
Training mean loss: 1.9177392039980208: 100%|██████████| 7/7 [00:04<00:00,  1.71it/s]
Training mean loss: 1.8987752028873988: 100%|██████████| 7/7 [00:01<00:00,  3.74it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8959802389144897:  94%|█████████▍| 15/16 [00:02<00:00,  6.21it/s]
Eval mean loss: 1.8959802389144897: 100%|██████████| 16/16 [00:02<00:00,  6.41it/s]


Epoch   3
Training mean loss: 1.8987752028873988: 100%|██████████| 7/7 [00:04<00:00,  1.60it/s]
Training mean loss: 1.8822299923215593: 100%|██████████| 7/7 [00:01<00:00,  3.60it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.88333261758

100%|██████████| 10/10 [00:01<00:00,  5.45ba/s]
100%|██████████| 9744/9744 [00:03<00:00, 3009.40ex/s]
100%|██████████| 256/256 [00:00<00:00, 3093.06ex/s]



Updated AL datasets: train size = 256, unlabelled size = 9744, sum: 10000 


AL iteration   8/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9464155435562134: 100%|██████████| 8/8 [00:02<00:00,  3.88it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9377797171473503: 100%|██████████| 16/16 [00:02<00:00,  6.05it/s]
Eval mean loss: 1.9377797171473503: 100%|██████████| 16/16 [00:02<00:00,  6.08it/s]


Epoch   2
Training mean loss: 1.9464155435562134: 100%|██████████| 8/8 [00:04<00:00,  1.71it/s]
Training mean loss: 1.9260640889406204: 100%|██████████| 8/8 [00:02<00:00,  3.64it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9144911468029022: 100%|██████████| 16/16 [00:02<00:00,  6.47it/s]
Eval mean loss: 1.9144911468029022: 100%|██████████| 16/16 [00:02<00:00,  6.37it/s]


Epoch   3
Training mean loss: 1.9260640889406204: 100%|██████████| 8/8 [00:04<00:00,  1.70it/s]
Training mean loss: 1.8914835900068283: 100%|██████████| 8/8 [00:02<00:00,  3.90it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.89478412270

100%|██████████| 10/10 [00:01<00:00,  5.14ba/s]
100%|██████████| 9712/9712 [00:03<00:00, 3063.89ex/s]
100%|██████████| 288/288 [00:00<00:00, 3111.66ex/s]



Updated AL datasets: train size = 288, unlabelled size = 9712, sum: 10000 


AL iteration   9/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9205819633271959: 100%|██████████| 9/9 [00:02<00:00,  3.91it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9188989400863647: 100%|██████████| 16/16 [00:02<00:00,  6.58it/s]
Eval mean loss: 1.9188989400863647: 100%|██████████| 16/16 [00:02<00:00,  6.53it/s]


Epoch   2
Training mean loss: 1.9205819633271959: 100%|██████████| 9/9 [00:04<00:00,  1.90it/s]
Training mean loss: 1.8879464202457004: 100%|██████████| 9/9 [00:02<00:00,  3.79it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9126798212528229: 100%|██████████| 16/16 [00:03<00:00,  5.23it/s]
Eval mean loss: 1.9126798212528229: 100%|██████████| 16/16 [00:03<00:00,  5.26it/s]


Epoch   3
Training mean loss: 1.8879464202457004: 100%|██████████| 9/9 [00:05<00:00,  1.66it/s]
Training mean loss: 1.8625552654266357: 100%|██████████| 9/9 [00:02<00:00,  3.81it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.89439445734

100%|██████████| 10/10 [00:01<00:00,  5.55ba/s]
100%|██████████| 9680/9680 [00:03<00:00, 3033.15ex/s]
100%|██████████| 320/320 [00:00<00:00, 3431.46ex/s]



Updated AL datasets: train size = 320, unlabelled size = 9680, sum: 10000 


AL iteration  10/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9450373291969298: 100%|██████████| 10/10 [00:02<00:00,  4.01it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9204617366194725:  94%|█████████▍| 15/16 [00:02<00:00,  6.66it/s]
Eval mean loss: 1.9204617366194725: 100%|██████████| 16/16 [00:02<00:00,  6.83it/s]


Epoch   2
Training mean loss: 1.9450373291969298: 100%|██████████| 10/10 [00:04<00:00,  2.07it/s]
Training mean loss: 1.9100677728652955: 100%|██████████| 10/10 [00:02<00:00,  3.86it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9051586166024208:  94%|█████████▍| 15/16 [00:02<00:00,  6.62it/s]
Eval mean loss: 1.9051586166024208: 100%|██████████| 16/16 [00:02<00:00,  6.79it/s]


Epoch   3
Training mean loss: 1.9100677728652955: 100%|██████████| 10/10 [00:04<00:00,  2.02it/s]
Training mean loss: 1.8914218425750733: 100%|██████████| 10/10 [00:02<00:00,  3.87it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8

100%|██████████| 10/10 [00:01<00:00,  5.47ba/s]
100%|██████████| 9648/9648 [00:03<00:00, 3119.02ex/s]
100%|██████████| 352/352 [00:00<00:00, 3451.05ex/s]



Updated AL datasets: train size = 352, unlabelled size = 9648, sum: 10000 


AL iteration  11/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9399024573239414: 100%|██████████| 11/11 [00:02<00:00,  4.08it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9531132727861404: 100%|██████████| 16/16 [00:02<00:00,  6.72it/s]
Eval mean loss: 1.9531132727861404: 100%|██████████| 16/16 [00:02<00:00,  6.73it/s]


Epoch   2
Training mean loss: 1.9399024573239414: 100%|██████████| 11/11 [00:05<00:00,  2.17it/s]
Training mean loss: 1.926098563454368: 100%|██████████| 11/11 [00:03<00:00,  3.64it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9412172511219978: 100%|██████████| 16/16 [00:02<00:00,  5.96it/s]
Eval mean loss: 1.9412172511219978: 100%|██████████| 16/16 [00:02<00:00,  5.96it/s]


Epoch   3
Training mean loss: 1.926098563454368: 100%|██████████| 11/11 [00:05<00:00,  1.93it/s]
Training mean loss: 1.8870601654052734: 100%|██████████| 11/11 [00:03<00:00,  3.49it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.92

100%|██████████| 10/10 [00:01<00:00,  5.88ba/s]
100%|██████████| 9616/9616 [00:02<00:00, 3241.79ex/s]
100%|██████████| 384/384 [00:00<00:00, 3528.50ex/s]



Updated AL datasets: train size = 384, unlabelled size = 9616, sum: 10000 


AL iteration  12/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.956448604663213: 100%|██████████| 12/12 [00:02<00:00,  4.02it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.928953379392624: 100%|██████████| 16/16 [00:02<00:00,  6.42it/s] 
Eval mean loss: 1.928953379392624: 100%|██████████| 16/16 [00:02<00:00,  6.40it/s]


Epoch   2
Training mean loss: 1.956448604663213: 100%|██████████| 12/12 [00:05<00:00,  2.19it/s]
Training mean loss: 1.9117964903513591: 100%|██████████| 12/12 [00:03<00:00,  3.62it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8923099115490913: 100%|██████████| 16/16 [00:02<00:00,  5.97it/s]
Eval mean loss: 1.8923099115490913: 100%|██████████| 16/16 [00:02<00:00,  5.90it/s]


Epoch   3
Training mean loss: 1.9117964903513591: 100%|██████████| 12/12 [00:06<00:00,  1.99it/s]
Training mean loss: 1.8461479445298512: 100%|██████████| 12/12 [00:03<00:00,  3.38it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.858

100%|██████████| 10/10 [00:01<00:00,  6.09ba/s]
100%|██████████| 9584/9584 [00:02<00:00, 3199.20ex/s]
100%|██████████| 416/416 [00:00<00:00, 3357.36ex/s]



Updated AL datasets: train size = 416, unlabelled size = 9584, sum: 10000 


AL iteration  13/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.929385863817655: 100%|██████████| 13/13 [00:03<00:00,  4.19it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9339825510978699:  94%|█████████▍| 15/16 [00:02<00:00,  6.50it/s]
Eval mean loss: 1.9339825510978699: 100%|██████████| 16/16 [00:02<00:00,  6.69it/s]


Epoch   2
Training mean loss: 1.929385863817655: 100%|██████████| 13/13 [00:05<00:00,  2.37it/s]
Training mean loss: 1.893907537827125: 100%|██████████| 13/13 [00:03<00:00,  3.48it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8943746834993362: 100%|██████████| 16/16 [00:02<00:00,  6.33it/s]
Eval mean loss: 1.8943746834993362: 100%|██████████| 16/16 [00:02<00:00,  6.24it/s]


Epoch   3
Training mean loss: 1.893907537827125: 100%|██████████| 13/13 [00:06<00:00,  2.08it/s]
Training mean loss: 1.8489335041779738: 100%|██████████| 13/13 [00:03<00:00,  3.64it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.830

100%|██████████| 10/10 [00:01<00:00,  6.16ba/s]
100%|██████████| 9552/9552 [00:02<00:00, 3310.92ex/s]
100%|██████████| 448/448 [00:00<00:00, 3421.37ex/s]



Updated AL datasets: train size = 448, unlabelled size = 9552, sum: 10000 


AL iteration  14/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.949265718460083: 100%|██████████| 14/14 [00:03<00:00,  4.18it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.976825788617134:  94%|█████████▍| 15/16 [00:02<00:00,  6.41it/s] 
Eval mean loss: 1.976825788617134: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Epoch   2
Training mean loss: 1.949265718460083: 100%|██████████| 14/14 [00:05<00:00,  2.42it/s]
Training mean loss: 1.903927709375109: 100%|██████████| 14/14 [00:03<00:00,  3.75it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.917906977236271: 100%|██████████| 16/16 [00:02<00:00,  5.72it/s] 
Eval mean loss: 1.917906977236271: 100%|██████████| 16/16 [00:02<00:00,  5.77it/s]


Epoch   3
Training mean loss: 1.903927709375109: 100%|██████████| 14/14 [00:06<00:00,  2.15it/s]
Training mean loss: 1.8537040267671858: 100%|██████████| 14/14 [00:04<00:00,  3.30it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.86335

100%|██████████| 10/10 [00:01<00:00,  5.60ba/s]
100%|██████████| 9520/9520 [00:04<00:00, 2231.90ex/s]
100%|██████████| 480/480 [00:00<00:00, 3127.24ex/s]



Updated AL datasets: train size = 480, unlabelled size = 9520, sum: 10000 


AL iteration  15/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9408115943272908: 100%|██████████| 15/15 [00:03<00:00,  3.85it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.927008517086506: 100%|██████████| 16/16 [00:02<00:00,  6.17it/s] 
Eval mean loss: 1.927008517086506: 100%|██████████| 16/16 [00:02<00:00,  6.12it/s]


Epoch   2
Training mean loss: 1.9408115943272908: 100%|██████████| 15/15 [00:06<00:00,  2.31it/s]
Training mean loss: 1.8868653694788615: 100%|██████████| 15/15 [00:04<00:00,  3.43it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8979755640029907: 100%|██████████| 16/16 [00:02<00:00,  6.00it/s]
Eval mean loss: 1.8979755640029907: 100%|██████████| 16/16 [00:02<00:00,  5.96it/s]


Epoch   3
Training mean loss: 1.8868653694788615: 100%|██████████| 15/15 [00:07<00:00,  2.13it/s]
Training mean loss: 1.8443925301233928: 100%|██████████| 15/15 [00:04<00:00,  3.47it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.87

100%|██████████| 10/10 [00:01<00:00,  6.04ba/s]
100%|██████████| 9488/9488 [00:02<00:00, 3347.13ex/s]
100%|██████████| 512/512 [00:00<00:00, 3379.68ex/s]



Updated AL datasets: train size = 512, unlabelled size = 9488, sum: 10000 


AL iteration  16/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9366025775671005: 100%|██████████| 16/16 [00:04<00:00,  3.87it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.945161059498787: 100%|██████████| 16/16 [00:02<00:00,  6.16it/s] 
Eval mean loss: 1.945161059498787: 100%|██████████| 16/16 [00:02<00:00,  6.08it/s]


Epoch   2
Training mean loss: 1.9366025775671005: 100%|██████████| 16/16 [00:06<00:00,  2.37it/s]
Training mean loss: 1.8857799172401428: 100%|██████████| 16/16 [00:05<00:00,  3.06it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.916857823729515: 100%|██████████| 16/16 [00:02<00:00,  5.99it/s] 
Eval mean loss: 1.916857823729515: 100%|██████████| 16/16 [00:02<00:00,  5.90it/s]


Epoch   3
Training mean loss: 1.8857799172401428: 100%|██████████| 16/16 [00:08<00:00,  1.97it/s]
Training mean loss: 1.8406940922141075: 100%|██████████| 16/16 [00:04<00:00,  3.47it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.880

100%|██████████| 10/10 [00:01<00:00,  6.09ba/s]
100%|██████████| 9456/9456 [00:03<00:00, 3099.30ex/s]
100%|██████████| 544/544 [00:00<00:00, 3369.55ex/s]



Updated AL datasets: train size = 544, unlabelled size = 9456, sum: 10000 


AL iteration  17/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9619506176780253: 100%|██████████| 17/17 [00:04<00:00,  4.01it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9049307256937027: 100%|██████████| 16/16 [00:02<00:00,  6.31it/s]
Eval mean loss: 1.9049307256937027: 100%|██████████| 16/16 [00:02<00:00,  6.27it/s]


Epoch   2
Training mean loss: 1.9619506176780253: 100%|██████████| 17/17 [00:06<00:00,  2.51it/s]
Training mean loss: 1.9016128568088306: 100%|██████████| 17/17 [00:04<00:00,  3.64it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.839160494506359: 100%|██████████| 16/16 [00:02<00:00,  6.10it/s] 
Eval mean loss: 1.839160494506359: 100%|██████████| 16/16 [00:02<00:00,  6.04it/s]


Epoch   3
Training mean loss: 1.9016128568088306: 100%|██████████| 17/17 [00:07<00:00,  2.32it/s]
Training mean loss: 1.856194306822384: 100%|██████████| 17/17 [00:04<00:00,  3.59it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.77

100%|██████████| 10/10 [00:01<00:00,  6.12ba/s]
100%|██████████| 9424/9424 [00:02<00:00, 3333.86ex/s]
100%|██████████| 576/576 [00:00<00:00, 3575.87ex/s]



Updated AL datasets: train size = 576, unlabelled size = 9424, sum: 10000 


AL iteration  18/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9521409670511882: 100%|██████████| 18/18 [00:04<00:00,  4.02it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8994583040475845: 100%|██████████| 16/16 [00:02<00:00,  6.46it/s]
Eval mean loss: 1.8994583040475845: 100%|██████████| 16/16 [00:02<00:00,  6.42it/s]


Epoch   2
Training mean loss: 1.9521409670511882: 100%|██████████| 18/18 [00:06<00:00,  2.59it/s]
Training mean loss: 1.9058416220876906: 100%|██████████| 18/18 [00:04<00:00,  3.60it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.890235498547554: 100%|██████████| 16/16 [00:02<00:00,  5.98it/s] 
Eval mean loss: 1.890235498547554: 100%|██████████| 16/16 [00:02<00:00,  5.92it/s]


Epoch   3
Training mean loss: 1.9058416220876906: 100%|██████████| 18/18 [00:07<00:00,  2.35it/s]
Training mean loss: 1.8616016970740423: 100%|██████████| 18/18 [00:05<00:00,  3.58it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.85

100%|██████████| 10/10 [00:01<00:00,  6.00ba/s]
100%|██████████| 9392/9392 [00:02<00:00, 3361.94ex/s]
100%|██████████| 608/608 [00:00<00:00, 3540.36ex/s]



Updated AL datasets: train size = 608, unlabelled size = 9392, sum: 10000 


AL iteration  19/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9406635196585404: 100%|██████████| 19/19 [00:04<00:00,  3.86it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.919901229441166: 100%|██████████| 16/16 [00:02<00:00,  6.39it/s] 
Eval mean loss: 1.919901229441166: 100%|██████████| 16/16 [00:02<00:00,  6.36it/s]


Epoch   2
Training mean loss: 1.9406635196585404: 100%|██████████| 19/19 [00:07<00:00,  2.57it/s]
Training mean loss: 1.8944886609127647: 100%|██████████| 19/19 [00:05<00:00,  3.59it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8790254592895508: 100%|██████████| 16/16 [00:02<00:00,  5.99it/s]
Eval mean loss: 1.8790254592895508: 100%|██████████| 16/16 [00:02<00:00,  5.95it/s]


Epoch   3
Training mean loss: 1.8944886609127647: 100%|██████████| 19/19 [00:07<00:00,  2.38it/s]
Training mean loss: 1.842972291143317: 100%|██████████| 19/19 [00:05<00:00,  3.54it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.84

100%|██████████| 10/10 [00:01<00:00,  5.82ba/s]
100%|██████████| 9360/9360 [00:02<00:00, 3294.87ex/s]
100%|██████████| 640/640 [00:00<00:00, 3463.54ex/s]



Updated AL datasets: train size = 640, unlabelled size = 9360, sum: 10000 


AL iteration  20/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.919357216358185: 100%|██████████| 20/20 [00:05<00:00,  3.71it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8953564018011093: 100%|██████████| 16/16 [00:02<00:00,  5.87it/s]
Eval mean loss: 1.8953564018011093: 100%|██████████| 16/16 [00:02<00:00,  5.84it/s]


Epoch   2
Training mean loss: 1.919357216358185: 100%|██████████| 20/20 [00:08<00:00,  2.48it/s]
Training mean loss: 1.8543145060539246: 100%|██████████| 20/20 [00:05<00:00,  3.41it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8418582081794739: 100%|██████████| 16/16 [00:02<00:00,  5.64it/s]
Eval mean loss: 1.8418582081794739: 100%|██████████| 16/16 [00:02<00:00,  5.55it/s]


Epoch   3
Training mean loss: 1.8543145060539246: 100%|██████████| 20/20 [00:08<00:00,  2.28it/s]
Training mean loss: 1.7964115202426911: 100%|██████████| 20/20 [00:06<00:00,  3.26it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.78

100%|██████████| 10/10 [00:01<00:00,  5.61ba/s]
100%|██████████| 9328/9328 [00:03<00:00, 2681.22ex/s]
100%|██████████| 672/672 [00:00<00:00, 3123.64ex/s]



Updated AL datasets: train size = 672, unlabelled size = 9328, sum: 10000 


AL iteration  21/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9239700862339564: 100%|██████████| 21/21 [00:07<00:00,  2.58it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8987674415111542: 100%|██████████| 16/16 [00:03<00:00,  4.77it/s]
Eval mean loss: 1.8987674415111542: 100%|██████████| 16/16 [00:03<00:00,  4.69it/s]


Epoch   2
Training mean loss: 1.9239700862339564: 100%|██████████| 21/21 [00:11<00:00,  1.88it/s]
Training mean loss: 1.8817743460337322: 100%|██████████| 21/21 [00:07<00:00,  2.95it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8513756692409515: 100%|██████████| 16/16 [00:03<00:00,  5.02it/s]
Eval mean loss: 1.8513756692409515: 100%|██████████| 16/16 [00:03<00:00,  4.95it/s]


Epoch   3
Training mean loss: 1.8817743460337322: 100%|██████████| 21/21 [00:10<00:00,  2.03it/s]
Training mean loss: 1.807324312982105: 100%|██████████| 21/21 [00:08<00:00,  2.51it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8

100%|██████████| 10/10 [00:01<00:00,  5.47ba/s]
100%|██████████| 9296/9296 [00:03<00:00, 2963.34ex/s]
100%|██████████| 704/704 [00:00<00:00, 3021.39ex/s]



Updated AL datasets: train size = 704, unlabelled size = 9296, sum: 10000 


AL iteration  22/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9215812520547346: 100%|██████████| 22/22 [00:06<00:00,  3.43it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9064511731266975: 100%|██████████| 16/16 [00:03<00:00,  4.95it/s]
Eval mean loss: 1.9064511731266975: 100%|██████████| 16/16 [00:03<00:00,  5.00it/s]


Epoch   2
Training mean loss: 1.9215812520547346: 100%|██████████| 22/22 [00:09<00:00,  2.31it/s]
Training mean loss: 1.8563367399302395: 100%|██████████| 22/22 [00:08<00:00,  2.78it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8742096424102783: 100%|██████████| 16/16 [00:03<00:00,  5.12it/s]
Eval mean loss: 1.8742096424102783: 100%|██████████| 16/16 [00:03<00:00,  5.11it/s]


Epoch   3
Training mean loss: 1.8563367399302395: 100%|██████████| 22/22 [00:11<00:00,  1.94it/s]
Training mean loss: 1.7861398566852917: 100%|██████████| 22/22 [00:07<00:00,  2.99it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7

100%|██████████| 10/10 [00:01<00:00,  5.40ba/s]
100%|██████████| 9264/9264 [00:03<00:00, 2937.10ex/s]
100%|██████████| 736/736 [00:00<00:00, 3163.71ex/s]



Updated AL datasets: train size = 736, unlabelled size = 9264, sum: 10000 


AL iteration  23/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9099978312202122: 100%|██████████| 23/23 [00:06<00:00,  3.48it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8951840475201607: 100%|██████████| 16/16 [00:03<00:00,  4.54it/s]
Eval mean loss: 1.8951840475201607: 100%|██████████| 16/16 [00:03<00:00,  4.63it/s]


Epoch   2
Training mean loss: 1.9099978312202122: 100%|██████████| 23/23 [00:09<00:00,  2.31it/s]
Training mean loss: 1.8225088378657466: 100%|██████████| 23/23 [00:08<00:00,  2.69it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.808168038725853: 100%|██████████| 16/16 [00:03<00:00,  5.08it/s] 
Eval mean loss: 1.808168038725853: 100%|██████████| 16/16 [00:03<00:00,  5.08it/s]


Epoch   3
Training mean loss: 1.8225088378657466: 100%|██████████| 23/23 [00:12<00:00,  1.91it/s]
Training mean loss: 1.742727429970451: 100%|██████████| 23/23 [00:07<00:00,  3.03it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.73

100%|██████████| 10/10 [00:01<00:00,  5.30ba/s]
100%|██████████| 9232/9232 [00:02<00:00, 3089.66ex/s]
100%|██████████| 768/768 [00:00<00:00, 3246.10ex/s]



Updated AL datasets: train size = 768, unlabelled size = 9232, sum: 10000 


AL iteration  24/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9058224856853485: 100%|██████████| 24/24 [00:06<00:00,  3.37it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.877868391573429: 100%|██████████| 16/16 [00:03<00:00,  5.11it/s] 
Eval mean loss: 1.877868391573429: 100%|██████████| 16/16 [00:03<00:00,  5.10it/s]


Epoch   2
Training mean loss: 1.9058224856853485: 100%|██████████| 24/24 [00:10<00:00,  2.37it/s]
Training mean loss: 1.8264886736869812: 100%|██████████| 24/24 [00:07<00:00,  3.10it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8102940320968628: 100%|██████████| 16/16 [00:03<00:00,  5.19it/s]
Eval mean loss: 1.8102940320968628: 100%|██████████| 16/16 [00:03<00:00,  5.17it/s]


Epoch   3
Training mean loss: 1.8264886736869812: 100%|██████████| 24/24 [00:10<00:00,  2.21it/s]
Training mean loss: 1.7336094031731288: 100%|██████████| 24/24 [00:07<00:00,  3.06it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.71

100%|██████████| 10/10 [00:01<00:00,  5.28ba/s]
100%|██████████| 9200/9200 [00:03<00:00, 2848.70ex/s]
100%|██████████| 800/800 [00:00<00:00, 3183.31ex/s]



Updated AL datasets: train size = 800, unlabelled size = 9200, sum: 10000 


AL iteration  25/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9203940105438233: 100%|██████████| 25/25 [00:07<00:00,  3.40it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8658234626054764: 100%|██████████| 16/16 [00:03<00:00,  5.04it/s]
Eval mean loss: 1.8658234626054764: 100%|██████████| 16/16 [00:03<00:00,  4.98it/s]


Epoch   2
Training mean loss: 1.9203940105438233: 100%|██████████| 25/25 [00:10<00:00,  2.40it/s]
Training mean loss: 1.8339196348190308: 100%|██████████| 25/25 [00:08<00:00,  2.86it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8221522197127342: 100%|██████████| 16/16 [00:03<00:00,  4.58it/s]
Eval mean loss: 1.8221522197127342: 100%|██████████| 16/16 [00:03<00:00,  4.57it/s]


Epoch   3
Training mean loss: 1.8339196348190308: 100%|██████████| 25/25 [00:12<00:00,  2.06it/s]
Training mean loss: 1.7641521549224854: 100%|██████████| 25/25 [00:08<00:00,  2.87it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7

100%|██████████| 10/10 [00:01<00:00,  5.55ba/s]
100%|██████████| 9168/9168 [00:03<00:00, 2967.97ex/s]
100%|██████████| 832/832 [00:00<00:00, 2997.76ex/s]



Updated AL datasets: train size = 832, unlabelled size = 9168, sum: 10000 


AL iteration  26/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9038928701327398: 100%|██████████| 26/26 [00:07<00:00,  3.35it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9181133136153221: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]
Eval mean loss: 1.9181133136153221: 100%|██████████| 16/16 [00:03<00:00,  5.18it/s]


Epoch   2
Training mean loss: 1.9038928701327398: 100%|██████████| 26/26 [00:10<00:00,  2.43it/s]
Training mean loss: 1.8347325508411114: 100%|██████████| 26/26 [00:08<00:00,  2.97it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8315921872854233: 100%|██████████| 16/16 [00:03<00:00,  3.96it/s]
Eval mean loss: 1.8315921872854233: 100%|██████████| 16/16 [00:03<00:00,  4.12it/s]


Epoch   3
Training mean loss: 1.8347325508411114: 100%|██████████| 26/26 [00:12<00:00,  2.08it/s]
Training mean loss: 1.752861357652224: 100%|██████████| 26/26 [00:09<00:00,  2.88it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7

100%|██████████| 10/10 [00:02<00:00,  4.10ba/s]
100%|██████████| 9136/9136 [00:04<00:00, 2220.81ex/s]
100%|██████████| 864/864 [00:00<00:00, 2201.17ex/s]



Updated AL datasets: train size = 864, unlabelled size = 9136, sum: 10000 


AL iteration  27/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9339228603574965: 100%|██████████| 27/27 [00:11<00:00,  2.55it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8872238993644714: 100%|██████████| 16/16 [00:03<00:00,  4.34it/s]
Eval mean loss: 1.8872238993644714: 100%|██████████| 16/16 [00:03<00:00,  4.19it/s]


Epoch   2
Training mean loss: 1.9339228603574965: 100%|██████████| 27/27 [00:14<00:00,  1.82it/s]
Training mean loss: 1.8837562490392614: 100%|██████████| 27/27 [00:09<00:00,  2.94it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8538880422711372: 100%|██████████| 16/16 [00:03<00:00,  4.44it/s]
Eval mean loss: 1.8538880422711372: 100%|██████████| 16/16 [00:03<00:00,  4.39it/s]


Epoch   3
Training mean loss: 1.8837562490392614: 100%|██████████| 27/27 [00:12<00:00,  2.10it/s]
Training mean loss: 1.8031203923402008: 100%|██████████| 27/27 [00:09<00:00,  2.97it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7

100%|██████████| 10/10 [00:01<00:00,  5.40ba/s]
100%|██████████| 9104/9104 [00:03<00:00, 3008.78ex/s]
100%|██████████| 896/896 [00:00<00:00, 3264.70ex/s]



Updated AL datasets: train size = 896, unlabelled size = 9104, sum: 10000 


AL iteration  28/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.915033791746412: 100%|██████████| 28/28 [00:08<00:00,  3.42it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8476508036255836: 100%|██████████| 16/16 [00:02<00:00,  5.45it/s]
Eval mean loss: 1.8476508036255836: 100%|██████████| 16/16 [00:02<00:00,  5.40it/s]


Epoch   2
Training mean loss: 1.915033791746412: 100%|██████████| 28/28 [00:10<00:00,  2.55it/s]
Training mean loss: 1.839085659810475: 100%|██████████| 28/28 [00:08<00:00,  3.20it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.785171516239643: 100%|██████████| 16/16 [00:03<00:00,  5.23it/s] 
Eval mean loss: 1.785171516239643: 100%|██████████| 16/16 [00:03<00:00,  5.19it/s]


Epoch   3
Training mean loss: 1.839085659810475: 100%|██████████| 28/28 [00:11<00:00,  2.36it/s]
Training mean loss: 1.7355124865259444: 100%|██████████| 28/28 [00:08<00:00,  3.24it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7263

100%|██████████| 10/10 [00:02<00:00,  4.93ba/s]
100%|██████████| 9072/9072 [00:03<00:00, 2824.60ex/s]
100%|██████████| 928/928 [00:00<00:00, 3004.54ex/s]



Updated AL datasets: train size = 928, unlabelled size = 9072, sum: 10000 


AL iteration  29/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9285580413094883: 100%|██████████| 29/29 [00:09<00:00,  2.80it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8787869289517403: 100%|██████████| 16/16 [00:04<00:00,  3.11it/s]
Eval mean loss: 1.8787869289517403: 100%|██████████| 16/16 [00:04<00:00,  3.25it/s]


Epoch   2
Training mean loss: 1.9285580413094883: 100%|██████████| 29/29 [00:14<00:00,  1.95it/s]
Training mean loss: 1.8430202870533383: 100%|██████████| 29/29 [00:10<00:00,  2.67it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8217025324702263: 100%|██████████| 16/16 [00:03<00:00,  4.96it/s]
Eval mean loss: 1.8217025324702263: 100%|██████████| 16/16 [00:03<00:00,  4.91it/s]


Epoch   3
Training mean loss: 1.8430202870533383: 100%|██████████| 29/29 [00:14<00:00,  2.05it/s]
Training mean loss: 1.742256102890804: 100%|██████████| 29/29 [00:11<00:00,  2.51it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7

100%|██████████| 10/10 [00:01<00:00,  5.73ba/s]
100%|██████████| 9040/9040 [00:02<00:00, 3041.44ex/s]
100%|██████████| 960/960 [00:00<00:00, 3265.52ex/s]



Updated AL datasets: train size = 960, unlabelled size = 9040, sum: 10000 


AL iteration  30/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.919098138809204: 100%|██████████| 30/30 [00:09<00:00,  3.27it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8502372279763222: 100%|██████████| 16/16 [00:03<00:00,  5.20it/s]
Eval mean loss: 1.8502372279763222: 100%|██████████| 16/16 [00:03<00:00,  5.17it/s]


Epoch   2
Training mean loss: 1.919098138809204: 100%|██████████| 30/30 [00:12<00:00,  2.46it/s]
Training mean loss: 1.8277937332789103: 100%|██████████| 30/30 [00:09<00:00,  3.19it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7802342772483826: 100%|██████████| 16/16 [00:03<00:00,  5.09it/s]
Eval mean loss: 1.7802342772483826: 100%|██████████| 16/16 [00:03<00:00,  5.09it/s]


Epoch   3
Training mean loss: 1.8277937332789103: 100%|██████████| 30/30 [00:12<00:00,  2.39it/s]
Training mean loss: 1.7315956910451253: 100%|██████████| 30/30 [00:09<00:00,  3.10it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.68

100%|██████████| 10/10 [00:01<00:00,  5.71ba/s]
100%|██████████| 9008/9008 [00:02<00:00, 3101.46ex/s]
100%|██████████| 992/992 [00:00<00:00, 3317.76ex/s]



Updated AL datasets: train size = 992, unlabelled size = 9008, sum: 10000 


AL iteration  31/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9197309940092024: 100%|██████████| 31/31 [00:08<00:00,  3.48it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9472358152270317: 100%|██████████| 16/16 [00:02<00:00,  5.45it/s]
Eval mean loss: 1.9472358152270317: 100%|██████████| 16/16 [00:02<00:00,  5.42it/s]


Epoch   2
Training mean loss: 1.9197309940092024: 100%|██████████| 31/31 [00:11<00:00,  2.65it/s]
Training mean loss: 1.846867818986216: 100%|██████████| 31/31 [00:09<00:00,  3.27it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8358825668692589: 100%|██████████| 16/16 [00:02<00:00,  5.49it/s]
Eval mean loss: 1.8358825668692589: 100%|██████████| 16/16 [00:02<00:00,  5.44it/s]


Epoch   3
Training mean loss: 1.846867818986216: 100%|██████████| 31/31 [00:12<00:00,  2.50it/s]
Training mean loss: 1.7451713585084485: 100%|██████████| 31/31 [00:09<00:00,  3.22it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.74

100%|██████████| 10/10 [00:01<00:00,  5.88ba/s]
100%|██████████| 8976/8976 [00:02<00:00, 3008.93ex/s]
100%|██████████| 1024/1024 [00:00<00:00, 3212.67ex/s]



Updated AL datasets: train size = 1024, unlabelled size = 8976, sum: 10000 


AL iteration  32/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.941043883562088: 100%|██████████| 32/32 [00:08<00:00,  3.50it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9438636228442192: 100%|██████████| 16/16 [00:02<00:00,  5.51it/s]
Eval mean loss: 1.9438636228442192: 100%|██████████| 16/16 [00:02<00:00,  5.46it/s]


Epoch   2
Training mean loss: 1.941043883562088: 100%|██████████| 32/32 [00:11<00:00,  2.70it/s]
Training mean loss: 1.8748849220573902: 100%|██████████| 32/32 [00:09<00:00,  3.28it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8763578161597252: 100%|██████████| 16/16 [00:03<00:00,  5.17it/s]
Eval mean loss: 1.8763578161597252: 100%|██████████| 16/16 [00:03<00:00,  5.13it/s]


Epoch   3
Training mean loss: 1.8748849220573902: 100%|██████████| 32/32 [00:12<00:00,  2.47it/s]
Training mean loss: 1.7787422947585583: 100%|██████████| 32/32 [00:09<00:00,  3.24it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.77

100%|██████████| 9/9 [00:01<00:00,  5.21ba/s]
100%|██████████| 8944/8944 [00:02<00:00, 3233.53ex/s]
100%|██████████| 1056/1056 [00:00<00:00, 3212.27ex/s]



Updated AL datasets: train size = 1056, unlabelled size = 8944, sum: 10000 


AL iteration  33/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9110220634576045: 100%|██████████| 33/33 [00:09<00:00,  3.48it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8870662674307823: 100%|██████████| 16/16 [00:02<00:00,  5.49it/s]
Eval mean loss: 1.8870662674307823: 100%|██████████| 16/16 [00:02<00:00,  5.45it/s]


Epoch   2
Training mean loss: 1.9110220634576045: 100%|██████████| 33/33 [00:12<00:00,  2.72it/s]
Training mean loss: 1.8006477572701194: 100%|██████████| 33/33 [00:09<00:00,  3.34it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.796244852244854: 100%|██████████| 16/16 [00:03<00:00,  5.36it/s] 
Eval mean loss: 1.796244852244854: 100%|██████████| 16/16 [00:03<00:00,  5.31it/s]


Epoch   3
Training mean loss: 1.8006477572701194: 100%|██████████| 33/33 [00:12<00:00,  2.56it/s]
Training mean loss: 1.7055787967913079: 100%|██████████| 33/33 [00:09<00:00,  3.34it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.70

100%|██████████| 9/9 [00:01<00:00,  5.30ba/s]
100%|██████████| 8912/8912 [00:02<00:00, 3241.84ex/s]
100%|██████████| 1088/1088 [00:00<00:00, 3215.41ex/s]



Updated AL datasets: train size = 1088, unlabelled size = 8912, sum: 10000 


AL iteration  34/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.896860872997957: 100%|██████████| 34/34 [00:09<00:00,  3.51it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8944601267576218: 100%|██████████| 16/16 [00:02<00:00,  5.55it/s]
Eval mean loss: 1.8944601267576218: 100%|██████████| 16/16 [00:02<00:00,  5.53it/s]


Epoch   2
Training mean loss: 1.896860872997957: 100%|██████████| 34/34 [00:12<00:00,  2.75it/s]
Training mean loss: 1.789033269180971: 100%|██████████| 34/34 [00:10<00:00,  3.35it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7828496843576431: 100%|██████████| 16/16 [00:02<00:00,  5.49it/s]
Eval mean loss: 1.7828496843576431: 100%|██████████| 16/16 [00:02<00:00,  5.43it/s]


Epoch   3
Training mean loss: 1.789033269180971: 100%|██████████| 34/34 [00:13<00:00,  2.59it/s]
Training mean loss: 1.6538739204406738: 100%|██████████| 34/34 [00:10<00:00,  3.33it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.675

100%|██████████| 9/9 [00:01<00:00,  4.90ba/s]
100%|██████████| 8880/8880 [00:02<00:00, 3195.53ex/s]
100%|██████████| 1120/1120 [00:00<00:00, 3224.23ex/s]



Updated AL datasets: train size = 1120, unlabelled size = 8880, sum: 10000 


AL iteration  35/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9089647769927978: 100%|██████████| 35/35 [00:09<00:00,  3.44it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.853128731250763: 100%|██████████| 16/16 [00:02<00:00,  5.51it/s] 
Eval mean loss: 1.853128731250763: 100%|██████████| 16/16 [00:02<00:00,  5.46it/s]


Epoch   2
Training mean loss: 1.9089647769927978: 100%|██████████| 35/35 [00:12<00:00,  2.73it/s]
Training mean loss: 1.810372359412057: 100%|██████████| 35/35 [00:10<00:00,  3.33it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.721199445426464: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s] 
Eval mean loss: 1.721199445426464: 100%|██████████| 16/16 [00:03<00:00,  5.33it/s]


Epoch   3
Training mean loss: 1.810372359412057: 100%|██████████| 35/35 [00:13<00:00,  2.60it/s]
Training mean loss: 1.6681345292500087: 100%|██████████| 35/35 [00:10<00:00,  3.23it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5859

100%|██████████| 9/9 [00:01<00:00,  5.58ba/s]
100%|██████████| 8848/8848 [00:02<00:00, 2968.36ex/s]
100%|██████████| 1152/1152 [00:00<00:00, 2752.83ex/s]



Updated AL datasets: train size = 1152, unlabelled size = 8848, sum: 10000 


AL iteration  36/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9133381843566895: 100%|██████████| 36/36 [00:10<00:00,  3.26it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8520342707633972: 100%|██████████| 16/16 [00:03<00:00,  5.36it/s]
Eval mean loss: 1.8520342707633972: 100%|██████████| 16/16 [00:03<00:00,  5.33it/s]


Epoch   2
Training mean loss: 1.9133381843566895: 100%|██████████| 36/36 [00:13<00:00,  2.57it/s]
Training mean loss: 1.8091103070312076: 100%|██████████| 36/36 [00:12<00:00,  2.76it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7554801180958748: 100%|██████████| 16/16 [00:03<00:00,  4.12it/s]
Eval mean loss: 1.7554801180958748: 100%|██████████| 16/16 [00:03<00:00,  4.14it/s]


Epoch   3
Training mean loss: 1.8091103070312076: 100%|██████████| 36/36 [00:16<00:00,  2.22it/s]
Training mean loss: 1.6575589776039124: 100%|██████████| 36/36 [00:13<00:00,  2.69it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6

100%|██████████| 9/9 [00:01<00:00,  5.13ba/s]
100%|██████████| 8816/8816 [00:02<00:00, 3085.01ex/s]
100%|██████████| 1184/1184 [00:00<00:00, 2943.69ex/s]



Updated AL datasets: train size = 1184, unlabelled size = 8816, sum: 10000 


AL iteration  37/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.92162663227803: 100%|██████████| 37/37 [00:10<00:00,  3.34it/s]  

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.904245287179947: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s] 
Eval mean loss: 1.904245287179947: 100%|██████████| 16/16 [00:03<00:00,  5.14it/s]


Epoch   2
Training mean loss: 1.92162663227803: 100%|██████████| 37/37 [00:13<00:00,  2.66it/s]
Training mean loss: 1.820349870501338: 100%|██████████| 37/37 [00:12<00:00,  3.03it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.775683306157589: 100%|██████████| 16/16 [00:02<00:00,  5.48it/s] 
Eval mean loss: 1.775683306157589: 100%|██████████| 16/16 [00:02<00:00,  5.42it/s]


Epoch   3
Training mean loss: 1.820349870501338: 100%|██████████| 37/37 [00:15<00:00,  2.37it/s]
Training mean loss: 1.672164424045666: 100%|██████████| 37/37 [00:12<00:00,  2.93it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.640869

100%|██████████| 9/9 [00:01<00:00,  5.16ba/s]
100%|██████████| 8784/8784 [00:02<00:00, 3140.87ex/s]
100%|██████████| 1216/1216 [00:00<00:00, 3075.52ex/s]



Updated AL datasets: train size = 1216, unlabelled size = 8784, sum: 10000 


AL iteration  38/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9278350786158913: 100%|██████████| 38/38 [00:11<00:00,  3.34it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.867920957505703: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s] 
Eval mean loss: 1.867920957505703: 100%|██████████| 16/16 [00:03<00:00,  5.29it/s]


Epoch   2
Training mean loss: 1.9278350786158913: 100%|██████████| 38/38 [00:14<00:00,  2.68it/s]
Training mean loss: 1.8286557354425128: 100%|██████████| 38/38 [00:12<00:00,  3.14it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7474044188857079: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s]
Eval mean loss: 1.7474044188857079: 100%|██████████| 16/16 [00:03<00:00,  5.27it/s]


Epoch   3
Training mean loss: 1.8286557354425128: 100%|██████████| 38/38 [00:15<00:00,  2.51it/s]
Training mean loss: 1.7042201161384583: 100%|██████████| 38/38 [00:12<00:00,  3.17it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.63

100%|██████████| 9/9 [00:01<00:00,  5.23ba/s]
100%|██████████| 8752/8752 [00:02<00:00, 3113.11ex/s]
100%|██████████| 1248/1248 [00:00<00:00, 2845.28ex/s]



Updated AL datasets: train size = 1248, unlabelled size = 8752, sum: 10000 


AL iteration  39/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9050076405207317: 100%|██████████| 39/39 [00:11<00:00,  3.40it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.881641186773777: 100%|██████████| 16/16 [00:02<00:00,  5.38it/s] 
Eval mean loss: 1.881641186773777: 100%|██████████| 16/16 [00:02<00:00,  5.35it/s]


Epoch   2
Training mean loss: 1.9050076405207317: 100%|██████████| 39/39 [00:14<00:00,  2.76it/s]
Training mean loss: 1.7829940227361827: 100%|██████████| 39/39 [00:11<00:00,  3.28it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7258931249380112: 100%|██████████| 16/16 [00:02<00:00,  5.52it/s]
Eval mean loss: 1.7258931249380112: 100%|██████████| 16/16 [00:02<00:00,  5.49it/s]


Epoch   3
Training mean loss: 1.7829940227361827: 100%|██████████| 39/39 [00:14<00:00,  2.63it/s]
Training mean loss: 1.6393893498640795: 100%|██████████| 39/39 [00:11<00:00,  3.28it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.58

100%|██████████| 9/9 [00:01<00:00,  5.61ba/s]
100%|██████████| 8720/8720 [00:02<00:00, 3016.25ex/s]
100%|██████████| 1280/1280 [00:00<00:00, 3229.34ex/s]



Updated AL datasets: train size = 1280, unlabelled size = 8720, sum: 10000 


AL iteration  40/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9018277436494828: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.903024509549141: 100%|██████████| 16/16 [00:02<00:00,  5.57it/s] 
Eval mean loss: 1.903024509549141: 100%|██████████| 16/16 [00:02<00:00,  5.53it/s]


Epoch   2
Training mean loss: 1.9018277436494828: 100%|██████████| 40/40 [00:14<00:00,  2.82it/s]
Training mean loss: 1.8012755274772645: 100%|██████████| 40/40 [00:12<00:00,  3.34it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.759307086467743: 100%|██████████| 16/16 [00:02<00:00,  5.54it/s] 
Eval mean loss: 1.759307086467743: 100%|██████████| 16/16 [00:02<00:00,  5.52it/s]


Epoch   3
Training mean loss: 1.8012755274772645: 100%|██████████| 40/40 [00:14<00:00,  2.68it/s]
Training mean loss: 1.6547959953546525: 100%|██████████| 40/40 [00:12<00:00,  3.34it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.642

100%|██████████| 9/9 [00:01<00:00,  5.55ba/s]
100%|██████████| 8688/8688 [00:02<00:00, 3226.73ex/s]
100%|██████████| 1312/1312 [00:00<00:00, 3255.90ex/s]



Updated AL datasets: train size = 1312, unlabelled size = 8688, sum: 10000 


AL iteration  41/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9183831592885459: 100%|██████████| 41/41 [00:11<00:00,  3.51it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8709742724895477: 100%|██████████| 16/16 [00:02<00:00,  5.59it/s]
Eval mean loss: 1.8709742724895477: 100%|██████████| 16/16 [00:02<00:00,  5.54it/s]


Epoch   2
Training mean loss: 1.9183831592885459: 100%|██████████| 41/41 [00:14<00:00,  2.87it/s]
Training mean loss: 1.802161554010903: 100%|██████████| 41/41 [00:12<00:00,  3.39it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7065984606742859: 100%|██████████| 16/16 [00:02<00:00,  5.57it/s]
Eval mean loss: 1.7065984606742859: 100%|██████████| 16/16 [00:02<00:00,  5.52it/s]


Epoch   3
Training mean loss: 1.802161554010903: 100%|██████████| 41/41 [00:15<00:00,  2.71it/s]
Training mean loss: 1.651031543568867: 100%|██████████| 41/41 [00:12<00:00,  3.39it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.59

100%|██████████| 9/9 [00:01<00:00,  5.48ba/s]
100%|██████████| 8656/8656 [00:02<00:00, 3227.82ex/s]
100%|██████████| 1344/1344 [00:00<00:00, 3290.99ex/s]



Updated AL datasets: train size = 1344, unlabelled size = 8656, sum: 10000 


AL iteration  42/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9265788311050052: 100%|██████████| 42/42 [00:11<00:00,  3.52it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.9078754112124443: 100%|██████████| 16/16 [00:02<00:00,  5.50it/s]
Eval mean loss: 1.9078754112124443: 100%|██████████| 16/16 [00:02<00:00,  5.50it/s]


Epoch   2
Training mean loss: 1.9265788311050052: 100%|██████████| 42/42 [00:14<00:00,  2.89it/s]
Training mean loss: 1.8334808746973674: 100%|██████████| 42/42 [00:12<00:00,  3.42it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7876886427402496: 100%|██████████| 16/16 [00:02<00:00,  5.63it/s]
Eval mean loss: 1.7876886427402496: 100%|██████████| 16/16 [00:02<00:00,  5.57it/s]


Epoch   3
Training mean loss: 1.8334808746973674: 100%|██████████| 42/42 [00:15<00:00,  2.76it/s]
Training mean loss: 1.7105418159848167: 100%|██████████| 42/42 [00:12<00:00,  3.43it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6

100%|██████████| 9/9 [00:01<00:00,  5.90ba/s]
100%|██████████| 8624/8624 [00:02<00:00, 3397.81ex/s]
100%|██████████| 1376/1376 [00:00<00:00, 3269.71ex/s]



Updated AL datasets: train size = 1376, unlabelled size = 8624, sum: 10000 


AL iteration  43/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9166295472965684: 100%|██████████| 43/43 [00:11<00:00,  3.62it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.826769433915615: 100%|██████████| 16/16 [00:02<00:00,  5.90it/s] 
Eval mean loss: 1.826769433915615: 100%|██████████| 16/16 [00:02<00:00,  5.86it/s]


Epoch   2
Training mean loss: 1.9166295472965684: 100%|██████████| 43/43 [00:14<00:00,  3.00it/s]
Training mean loss: 1.8059569985367532: 100%|██████████| 43/43 [00:12<00:00,  3.33it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7004890367388725: 100%|██████████| 16/16 [00:02<00:00,  5.44it/s]
Eval mean loss: 1.7004890367388725: 100%|██████████| 16/16 [00:02<00:00,  5.37it/s]


Epoch   3
Training mean loss: 1.8059569985367532: 100%|██████████| 43/43 [00:15<00:00,  2.75it/s]
Training mean loss: 1.6610705353492914: 100%|██████████| 43/43 [00:12<00:00,  3.54it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.57

100%|██████████| 9/9 [00:01<00:00,  6.04ba/s]
100%|██████████| 8592/8592 [00:02<00:00, 3235.56ex/s]
100%|██████████| 1408/1408 [00:00<00:00, 3435.37ex/s]



Updated AL datasets: train size = 1408, unlabelled size = 8592, sum: 10000 


AL iteration  44/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9085309532555668: 100%|██████████| 44/44 [00:11<00:00,  3.72it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8383569419384003: 100%|██████████| 16/16 [00:02<00:00,  6.01it/s]
Eval mean loss: 1.8383569419384003: 100%|██████████| 16/16 [00:02<00:00,  5.94it/s]


Epoch   2
Training mean loss: 1.9085309532555668: 100%|██████████| 44/44 [00:14<00:00,  3.09it/s]
Training mean loss: 1.7617259242317893: 100%|██████████| 44/44 [00:12<00:00,  3.55it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6957696676254272: 100%|██████████| 16/16 [00:02<00:00,  5.99it/s]
Eval mean loss: 1.6957696676254272: 100%|██████████| 16/16 [00:02<00:00,  5.95it/s]


Epoch   3
Training mean loss: 1.7617259242317893: 100%|██████████| 44/44 [00:15<00:00,  2.92it/s]
Training mean loss: 1.6089009940624237: 100%|██████████| 44/44 [00:12<00:00,  3.55it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5

100%|██████████| 9/9 [00:01<00:00,  5.88ba/s]
100%|██████████| 8560/8560 [00:02<00:00, 3391.12ex/s]
100%|██████████| 1440/1440 [00:00<00:00, 3466.86ex/s]



Updated AL datasets: train size = 1440, unlabelled size = 8560, sum: 10000 


AL iteration  45/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.880365326669481: 100%|██████████| 45/45 [00:12<00:00,  3.64it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8726669177412987: 100%|██████████| 16/16 [00:02<00:00,  5.98it/s]
Eval mean loss: 1.8726669177412987: 100%|██████████| 16/16 [00:02<00:00,  5.92it/s]


Epoch   2
Training mean loss: 1.880365326669481: 100%|██████████| 45/45 [00:14<00:00,  3.05it/s]
Training mean loss: 1.7504318131340875: 100%|██████████| 45/45 [00:12<00:00,  3.55it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.73284462839365: 100%|██████████| 16/16 [00:02<00:00,  5.96it/s]  
Eval mean loss: 1.73284462839365: 100%|██████████| 16/16 [00:02<00:00,  5.88it/s]


Epoch   3
Training mean loss: 1.7504318131340875: 100%|██████████| 45/45 [00:15<00:00,  2.91it/s]
Training mean loss: 1.5889264424641927: 100%|██████████| 45/45 [00:12<00:00,  3.55it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5733

100%|██████████| 9/9 [00:01<00:00,  5.97ba/s]
100%|██████████| 8528/8528 [00:02<00:00, 3425.34ex/s]
100%|██████████| 1472/1472 [00:00<00:00, 3409.08ex/s]



Updated AL datasets: train size = 1472, unlabelled size = 8528, sum: 10000 


AL iteration  46/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8993429178776948: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8242539167404175: 100%|██████████| 16/16 [00:02<00:00,  6.00it/s]
Eval mean loss: 1.8242539167404175: 100%|██████████| 16/16 [00:02<00:00,  5.95it/s]


Epoch   2
Training mean loss: 1.8993429178776948: 100%|██████████| 46/46 [00:14<00:00,  3.08it/s]
Training mean loss: 1.765443488307621: 100%|██████████| 46/46 [00:12<00:00,  3.56it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7200799211859703: 100%|██████████| 16/16 [00:02<00:00,  6.00it/s]
Eval mean loss: 1.7200799211859703: 100%|██████████| 16/16 [00:02<00:00,  5.93it/s]


Epoch   3
Training mean loss: 1.765443488307621: 100%|██████████| 46/46 [00:15<00:00,  2.94it/s]
Training mean loss: 1.6141714168631511: 100%|██████████| 46/46 [00:12<00:00,  3.55it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.56

100%|██████████| 9/9 [00:01<00:00,  6.09ba/s]
100%|██████████| 8496/8496 [00:02<00:00, 3413.73ex/s]
100%|██████████| 1504/1504 [00:00<00:00, 3425.52ex/s]



Updated AL datasets: train size = 1504, unlabelled size = 8496, sum: 10000 


AL iteration  47/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8960535678457706: 100%|██████████| 47/47 [00:12<00:00,  3.68it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8423478156328201: 100%|██████████| 16/16 [00:02<00:00,  6.01it/s]
Eval mean loss: 1.8423478156328201: 100%|██████████| 16/16 [00:02<00:00,  5.96it/s]


Epoch   2
Training mean loss: 1.8960535678457706: 100%|██████████| 47/47 [00:15<00:00,  3.12it/s]
Training mean loss: 1.7702551471426131: 100%|██████████| 47/47 [00:13<00:00,  3.55it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7106670215725899: 100%|██████████| 16/16 [00:02<00:00,  6.03it/s]
Eval mean loss: 1.7106670215725899: 100%|██████████| 16/16 [00:02<00:00,  5.97it/s]


Epoch   3
Training mean loss: 1.7702551471426131: 100%|██████████| 47/47 [00:15<00:00,  2.95it/s]
Training mean loss: 1.6131260648686836: 100%|██████████| 47/47 [00:13<00:00,  3.57it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5

100%|██████████| 9/9 [00:01<00:00,  6.20ba/s]
100%|██████████| 8464/8464 [00:02<00:00, 3235.53ex/s]
100%|██████████| 1536/1536 [00:00<00:00, 3434.21ex/s]



Updated AL datasets: train size = 1536, unlabelled size = 8464, sum: 10000 


AL iteration  48/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8940538465976715: 100%|██████████| 48/48 [00:12<00:00,  3.69it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8478355705738068: 100%|██████████| 16/16 [00:02<00:00,  6.10it/s]
Eval mean loss: 1.8478355705738068: 100%|██████████| 16/16 [00:02<00:00,  6.07it/s]


Epoch   2
Training mean loss: 1.8940538465976715: 100%|██████████| 48/48 [00:15<00:00,  3.15it/s]
Training mean loss: 1.7489435200889905: 100%|██████████| 48/48 [00:13<00:00,  3.57it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6986618041992188: 100%|██████████| 16/16 [00:02<00:00,  5.92it/s]
Eval mean loss: 1.6986618041992188: 100%|██████████| 16/16 [00:02<00:00,  5.93it/s]


Epoch   3
Training mean loss: 1.7489435200889905: 100%|██████████| 48/48 [00:16<00:00,  2.97it/s]
Training mean loss: 1.5907014931241672: 100%|██████████| 48/48 [00:13<00:00,  3.57it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5

100%|██████████| 9/9 [00:01<00:00,  6.10ba/s]
100%|██████████| 8432/8432 [00:02<00:00, 3384.20ex/s]
100%|██████████| 1568/1568 [00:00<00:00, 3470.28ex/s]



Updated AL datasets: train size = 1568, unlabelled size = 8432, sum: 10000 


AL iteration  49/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.925371637149733: 100%|██████████| 49/49 [00:13<00:00,  3.64it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8593071475625038: 100%|██████████| 16/16 [00:02<00:00,  6.05it/s]
Eval mean loss: 1.8593071475625038: 100%|██████████| 16/16 [00:02<00:00,  6.02it/s]


Epoch   2
Training mean loss: 1.925371637149733: 100%|██████████| 49/49 [00:15<00:00,  3.12it/s]
Training mean loss: 1.7915527698945026: 100%|██████████| 49/49 [00:13<00:00,  3.58it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7562273368239403: 100%|██████████| 16/16 [00:02<00:00,  6.07it/s]
Eval mean loss: 1.7562273368239403: 100%|██████████| 16/16 [00:02<00:00,  6.02it/s]


Epoch   3
Training mean loss: 1.7915527698945026: 100%|██████████| 49/49 [00:16<00:00,  2.99it/s]
Training mean loss: 1.6499535210278569: 100%|██████████| 49/49 [00:13<00:00,  3.58it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.60

100%|██████████| 9/9 [00:01<00:00,  6.34ba/s]
100%|██████████| 8400/8400 [00:02<00:00, 3410.33ex/s]
100%|██████████| 1600/1600 [00:00<00:00, 3435.65ex/s]



Updated AL datasets: train size = 1600, unlabelled size = 8400, sum: 10000 


AL iteration  50/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.873131754398346: 100%|██████████| 50/50 [00:13<00:00,  3.70it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.815676011145115: 100%|██████████| 16/16 [00:02<00:00,  6.12it/s] 
Eval mean loss: 1.815676011145115: 100%|██████████| 16/16 [00:02<00:00,  6.04it/s]


Epoch   2
Training mean loss: 1.873131754398346: 100%|██████████| 50/50 [00:15<00:00,  3.15it/s]
Training mean loss: 1.7079169464111328: 100%|██████████| 50/50 [00:13<00:00,  3.59it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6794340834021568: 100%|██████████| 16/16 [00:02<00:00,  6.05it/s]
Eval mean loss: 1.6794340834021568: 100%|██████████| 16/16 [00:02<00:00,  6.01it/s]


Epoch   3
Training mean loss: 1.7079169464111328: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s]
Training mean loss: 1.5498944449424743: 100%|██████████| 50/50 [00:14<00:00,  3.58it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.497

100%|██████████| 9/9 [00:01<00:00,  6.21ba/s]
100%|██████████| 8368/8368 [00:02<00:00, 3395.64ex/s]
100%|██████████| 1632/1632 [00:00<00:00, 3404.65ex/s]



Updated AL datasets: train size = 1632, unlabelled size = 8368, sum: 10000 


AL iteration  51/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8983858753653133: 100%|██████████| 51/51 [00:13<00:00,  3.66it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7995286881923676: 100%|██████████| 16/16 [00:02<00:00,  5.95it/s]
Eval mean loss: 1.7995286881923676: 100%|██████████| 16/16 [00:02<00:00,  5.90it/s]


Epoch   2
Training mean loss: 1.8983858753653133: 100%|██████████| 51/51 [00:16<00:00,  3.13it/s]
Training mean loss: 1.7354315472584145: 100%|██████████| 51/51 [00:14<00:00,  3.50it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.631982073187828: 100%|██████████| 16/16 [00:02<00:00,  5.86it/s] 
Eval mean loss: 1.631982073187828: 100%|██████████| 16/16 [00:02<00:00,  5.80it/s]


Epoch   3
Training mean loss: 1.7354315472584145: 100%|██████████| 51/51 [00:17<00:00,  2.97it/s]
Training mean loss: 1.55239204565684: 100%|██████████| 51/51 [00:14<00:00,  3.55it/s]  

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.47

100%|██████████| 9/9 [00:01<00:00,  6.17ba/s]
100%|██████████| 8336/8336 [00:02<00:00, 3223.87ex/s]
100%|██████████| 1664/1664 [00:00<00:00, 3326.97ex/s]



Updated AL datasets: train size = 1664, unlabelled size = 8336, sum: 10000 


AL iteration  52/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9175331042363093: 100%|██████████| 52/52 [00:13<00:00,  3.66it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8959768563508987: 100%|██████████| 16/16 [00:02<00:00,  6.06it/s]
Eval mean loss: 1.8959768563508987: 100%|██████████| 16/16 [00:02<00:00,  6.03it/s]


Epoch   2
Training mean loss: 1.9175331042363093: 100%|██████████| 52/52 [00:16<00:00,  3.16it/s]
Training mean loss: 1.77865476333178: 100%|██████████| 52/52 [00:14<00:00,  3.58it/s]  

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7466548085212708: 100%|██████████| 16/16 [00:02<00:00,  6.08it/s]
Eval mean loss: 1.7466548085212708: 100%|██████████| 16/16 [00:02<00:00,  6.01it/s]


Epoch   3
Training mean loss: 1.77865476333178: 100%|██████████| 52/52 [00:17<00:00,  3.03it/s]
Training mean loss: 1.616149026613969: 100%|██████████| 52/52 [00:14<00:00,  3.56it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.594

100%|██████████| 9/9 [00:01<00:00,  6.37ba/s]
100%|██████████| 8304/8304 [00:02<00:00, 3392.14ex/s]
100%|██████████| 1696/1696 [00:00<00:00, 3355.07ex/s]



Updated AL datasets: train size = 1696, unlabelled size = 8304, sum: 10000 


AL iteration  53/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.879479208082523: 100%|██████████| 53/53 [00:14<00:00,  3.67it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8462266474962234: 100%|██████████| 16/16 [00:02<00:00,  6.12it/s]
Eval mean loss: 1.8462266474962234: 100%|██████████| 16/16 [00:02<00:00,  6.09it/s]


Epoch   2
Training mean loss: 1.879479208082523: 100%|██████████| 53/53 [00:16<00:00,  3.18it/s]
Training mean loss: 1.685259076784242: 100%|██████████| 53/53 [00:14<00:00,  3.63it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6506536900997162: 100%|██████████| 16/16 [00:02<00:00,  6.05it/s]
Eval mean loss: 1.6506536900997162: 100%|██████████| 16/16 [00:02<00:00,  5.98it/s]


Epoch   3
Training mean loss: 1.685259076784242: 100%|██████████| 53/53 [00:17<00:00,  3.05it/s]
Training mean loss: 1.500184214340066: 100%|██████████| 53/53 [00:14<00:00,  3.60it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.512

100%|██████████| 9/9 [00:01<00:00,  6.25ba/s]
100%|██████████| 8272/8272 [00:02<00:00, 3368.06ex/s]
100%|██████████| 1728/1728 [00:00<00:00, 3438.65ex/s]



Updated AL datasets: train size = 1728, unlabelled size = 8272, sum: 10000 


AL iteration  54/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8870117664337158: 100%|██████████| 54/54 [00:14<00:00,  3.66it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8194900676608086: 100%|██████████| 16/16 [00:02<00:00,  6.15it/s]
Eval mean loss: 1.8194900676608086: 100%|██████████| 16/16 [00:02<00:00,  6.11it/s]


Epoch   2
Training mean loss: 1.8870117664337158: 100%|██████████| 54/54 [00:16<00:00,  3.19it/s]
Training mean loss: 1.7254047967769481: 100%|██████████| 54/54 [00:15<00:00,  3.59it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.596312366425991: 100%|██████████| 16/16 [00:02<00:00,  6.11it/s] 
Eval mean loss: 1.596312366425991: 100%|██████████| 16/16 [00:02<00:00,  6.08it/s]


Epoch   3
Training mean loss: 1.7254047967769481: 100%|██████████| 54/54 [00:17<00:00,  3.06it/s]
Training mean loss: 1.5323674700878285: 100%|██████████| 54/54 [00:14<00:00,  3.61it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.41

100%|██████████| 9/9 [00:01<00:00,  6.24ba/s]
100%|██████████| 8240/8240 [00:02<00:00, 3369.14ex/s]
100%|██████████| 1760/1760 [00:00<00:00, 3435.53ex/s]



Updated AL datasets: train size = 1760, unlabelled size = 8240, sum: 10000 


AL iteration  55/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8771252458745784: 100%|██████████| 55/55 [00:14<00:00,  3.64it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.829019233584404: 100%|██████████| 16/16 [00:02<00:00,  5.85it/s] 
Eval mean loss: 1.829019233584404: 100%|██████████| 16/16 [00:02<00:00,  5.84it/s]


Epoch   2
Training mean loss: 1.8771252458745784: 100%|██████████| 55/55 [00:17<00:00,  3.16it/s]
Training mean loss: 1.717892174287276: 100%|██████████| 55/55 [00:15<00:00,  3.54it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6722902804613113: 100%|██████████| 16/16 [00:02<00:00,  6.03it/s]
Eval mean loss: 1.6722902804613113: 100%|██████████| 16/16 [00:02<00:00,  5.99it/s]


Epoch   3
Training mean loss: 1.717892174287276: 100%|██████████| 55/55 [00:18<00:00,  3.03it/s]
Training mean loss: 1.5393982258709995: 100%|██████████| 55/55 [00:15<00:00,  3.39it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.520

100%|██████████| 9/9 [00:01<00:00,  6.27ba/s]
100%|██████████| 8208/8208 [00:02<00:00, 3017.56ex/s]
100%|██████████| 1792/1792 [00:00<00:00, 3395.97ex/s]



Updated AL datasets: train size = 1792, unlabelled size = 8208, sum: 10000 


AL iteration  56/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9235670587846212: 100%|██████████| 56/56 [00:15<00:00,  3.50it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.854847863316536: 100%|██████████| 16/16 [00:02<00:00,  5.64it/s] 
Eval mean loss: 1.854847863316536: 100%|██████████| 16/16 [00:02<00:00,  5.64it/s]


Epoch   2
Training mean loss: 1.9235670587846212: 100%|██████████| 56/56 [00:18<00:00,  2.99it/s]
Training mean loss: 1.7796858698129654: 100%|██████████| 56/56 [00:17<00:00,  3.33it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6687035411596298: 100%|██████████| 16/16 [00:02<00:00,  5.62it/s]
Eval mean loss: 1.6687035411596298: 100%|██████████| 16/16 [00:02<00:00,  5.62it/s]


Epoch   3
Training mean loss: 1.7796858698129654: 100%|██████████| 56/56 [00:19<00:00,  2.80it/s]
Training mean loss: 1.5785055458545685: 100%|██████████| 56/56 [00:17<00:00,  3.33it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.49

100%|██████████| 9/9 [00:01<00:00,  5.98ba/s]
100%|██████████| 8176/8176 [00:02<00:00, 3201.39ex/s]
100%|██████████| 1824/1824 [00:00<00:00, 3279.54ex/s]



Updated AL datasets: train size = 1824, unlabelled size = 8176, sum: 10000 


AL iteration  57/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.868081021727177: 100%|██████████| 57/57 [00:18<00:00,  2.98it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7889713272452354: 100%|██████████| 16/16 [00:03<00:00,  4.99it/s]
Eval mean loss: 1.7889713272452354: 100%|██████████| 16/16 [00:03<00:00,  4.91it/s]


Epoch   2
Training mean loss: 1.868081021727177: 100%|██████████| 57/57 [00:22<00:00,  2.58it/s]
Training mean loss: 1.6703763928329736: 100%|██████████| 57/57 [00:19<00:00,  2.96it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6544612795114517: 100%|██████████| 16/16 [00:03<00:00,  4.55it/s]
Eval mean loss: 1.6544612795114517: 100%|██████████| 16/16 [00:03<00:00,  4.51it/s]


Epoch   3
Training mean loss: 1.6703763928329736: 100%|██████████| 57/57 [00:22<00:00,  2.50it/s]
Training mean loss: 1.4949724235032733: 100%|██████████| 57/57 [00:19<00:00,  3.19it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.50

100%|██████████| 9/9 [00:01<00:00,  6.13ba/s]
100%|██████████| 8144/8144 [00:02<00:00, 3200.14ex/s]
100%|██████████| 1856/1856 [00:00<00:00, 3264.22ex/s]



Updated AL datasets: train size = 1856, unlabelled size = 8144, sum: 10000 


AL iteration  58/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8785966120917221: 100%|██████████| 58/58 [00:16<00:00,  3.44it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8102768808603287: 100%|██████████| 16/16 [00:02<00:00,  5.52it/s]
Eval mean loss: 1.8102768808603287: 100%|██████████| 16/16 [00:02<00:00,  5.50it/s]


Epoch   2
Training mean loss: 1.8785966120917221: 100%|██████████| 58/58 [00:19<00:00,  2.98it/s]
Training mean loss: 1.7152179849558864: 100%|██████████| 58/58 [00:17<00:00,  3.39it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.645602509379387: 100%|██████████| 16/16 [00:02<00:00,  5.39it/s] 
Eval mean loss: 1.645602509379387: 100%|██████████| 16/16 [00:02<00:00,  5.36it/s]


Epoch   3
Training mean loss: 1.7152179849558864: 100%|██████████| 58/58 [00:20<00:00,  2.88it/s]
Training mean loss: 1.5243177783900295: 100%|██████████| 58/58 [00:19<00:00,  2.65it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.47

100%|██████████| 9/9 [00:01<00:00,  6.23ba/s]
100%|██████████| 8112/8112 [00:02<00:00, 3174.90ex/s]
100%|██████████| 1888/1888 [00:00<00:00, 3241.22ex/s]



Updated AL datasets: train size = 1888, unlabelled size = 8112, sum: 10000 


AL iteration  59/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.876427159471027: 100%|██████████| 59/59 [00:17<00:00,  3.26it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8012342900037766: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s]
Eval mean loss: 1.8012342900037766: 100%|██████████| 16/16 [00:03<00:00,  5.30it/s]


Epoch   2
Training mean loss: 1.876427159471027: 100%|██████████| 59/59 [00:20<00:00,  2.83it/s]
Training mean loss: 1.692901112265506: 100%|██████████| 59/59 [00:19<00:00,  3.07it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5805975645780563: 100%|██████████| 16/16 [00:03<00:00,  5.20it/s]
Eval mean loss: 1.5805975645780563: 100%|██████████| 16/16 [00:03<00:00,  5.16it/s]


Epoch   3
Training mean loss: 1.692901112265506: 100%|██████████| 59/59 [00:22<00:00,  2.60it/s]
Training mean loss: 1.4980283510887016: 100%|██████████| 59/59 [00:22<00:00,  2.69it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.429

100%|██████████| 9/9 [00:01<00:00,  4.68ba/s]
100%|██████████| 8080/8080 [00:02<00:00, 2849.81ex/s]
100%|██████████| 1920/1920 [00:00<00:00, 3278.69ex/s]



Updated AL datasets: train size = 1920, unlabelled size = 8080, sum: 10000 


AL iteration  60/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9013391514619191: 100%|██████████| 60/60 [00:18<00:00,  3.02it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.833951711654663: 100%|██████████| 16/16 [00:03<00:00,  4.65it/s] 
Eval mean loss: 1.833951711654663: 100%|██████████| 16/16 [00:03<00:00,  4.60it/s]


Epoch   2
Training mean loss: 1.9013391514619191: 100%|██████████| 60/60 [00:21<00:00,  2.73it/s]
Training mean loss: 1.7488296627998352: 100%|██████████| 60/60 [00:19<00:00,  3.19it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6929139271378517: 100%|██████████| 16/16 [00:02<00:00,  5.37it/s]
Eval mean loss: 1.6929139271378517: 100%|██████████| 16/16 [00:02<00:00,  5.36it/s]


Epoch   3
Training mean loss: 1.7488296627998352: 100%|██████████| 60/60 [00:22<00:00,  2.66it/s]
Training mean loss: 1.550994336605072: 100%|██████████| 60/60 [00:19<00:00,  3.13it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.49

100%|██████████| 9/9 [00:01<00:00,  5.66ba/s]
100%|██████████| 8048/8048 [00:02<00:00, 2727.27ex/s]
100%|██████████| 1952/1952 [00:00<00:00, 2801.41ex/s]



Updated AL datasets: train size = 1952, unlabelled size = 8048, sum: 10000 


AL iteration  61/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.899953631104016: 100%|██████████| 61/61 [00:19<00:00,  2.92it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.793937973678112: 100%|██████████| 16/16 [00:03<00:00,  4.92it/s] 
Eval mean loss: 1.793937973678112: 100%|██████████| 16/16 [00:03<00:00,  4.83it/s]


Epoch   2
Training mean loss: 1.899953631104016: 100%|██████████| 61/61 [00:22<00:00,  2.69it/s]
Training mean loss: 1.6899896410645032: 100%|██████████| 61/61 [00:20<00:00,  2.96it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5462063252925873: 100%|██████████| 16/16 [00:03<00:00,  4.71it/s]
Eval mean loss: 1.5462063252925873: 100%|██████████| 16/16 [00:03<00:00,  4.74it/s]


Epoch   3
Training mean loss: 1.6899896410645032: 100%|██████████| 61/61 [00:23<00:00,  2.58it/s]
Training mean loss: 1.4815153313464806: 100%|██████████| 61/61 [00:19<00:00,  3.05it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.376

100%|██████████| 9/9 [00:01<00:00,  5.73ba/s]
100%|██████████| 8016/8016 [00:02<00:00, 2747.86ex/s]
100%|██████████| 1984/1984 [00:00<00:00, 3028.49ex/s]



Updated AL datasets: train size = 1984, unlabelled size = 8016, sum: 10000 


AL iteration  62/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8666792492712698: 100%|██████████| 62/62 [00:20<00:00,  2.92it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8064804822206497: 100%|██████████| 16/16 [00:04<00:00,  3.95it/s]
Eval mean loss: 1.8064804822206497: 100%|██████████| 16/16 [00:04<00:00,  3.81it/s]


Epoch   2
Training mean loss: 1.8666792492712698: 100%|██████████| 62/62 [00:24<00:00,  2.51it/s]
Training mean loss: 1.6584865431631766: 100%|██████████| 62/62 [00:20<00:00,  3.01it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5943466797471046: 100%|██████████| 16/16 [00:03<00:00,  5.05it/s]
Eval mean loss: 1.5943466797471046: 100%|██████████| 16/16 [00:03<00:00,  5.01it/s]


Epoch   3
Training mean loss: 1.6584865431631766: 100%|██████████| 62/62 [00:23<00:00,  2.64it/s]
Training mean loss: 1.4572616219520569: 100%|██████████| 62/62 [00:19<00:00,  3.24it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4

100%|██████████| 9/9 [00:01<00:00,  5.13ba/s]
100%|██████████| 7984/7984 [00:02<00:00, 3122.19ex/s]
100%|██████████| 2016/2016 [00:00<00:00, 2517.94ex/s]



Updated AL datasets: train size = 2016, unlabelled size = 7984, sum: 10000 


AL iteration  63/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8557132217619154: 100%|██████████| 63/63 [00:18<00:00,  3.35it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7642033770680428: 100%|██████████| 16/16 [00:02<00:00,  5.45it/s]
Eval mean loss: 1.7642033770680428: 100%|██████████| 16/16 [00:02<00:00,  5.40it/s]


Epoch   2
Training mean loss: 1.8557132217619154: 100%|██████████| 63/63 [00:21<00:00,  2.97it/s]
Training mean loss: 1.666289696617732: 100%|██████████| 63/63 [00:19<00:00,  3.30it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.573330894112587: 100%|██████████| 16/16 [00:02<00:00,  5.55it/s] 
Eval mean loss: 1.573330894112587: 100%|██████████| 16/16 [00:02<00:00,  5.52it/s]


Epoch   3
Training mean loss: 1.666289696617732: 100%|██████████| 63/63 [00:22<00:00,  2.85it/s]
Training mean loss: 1.4792524292355491: 100%|██████████| 63/63 [00:18<00:00,  3.35it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.422

100%|██████████| 8/8 [00:01<00:00,  5.39ba/s]
100%|██████████| 7952/7952 [00:02<00:00, 3217.43ex/s]
100%|██████████| 2048/2048 [00:00<00:00, 3152.76ex/s]



Updated AL datasets: train size = 2048, unlabelled size = 7952, sum: 10000 


AL iteration  64/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.870743289589882: 100%|██████████| 64/64 [00:18<00:00,  3.27it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.789358913898468: 100%|██████████| 16/16 [00:02<00:00,  5.36it/s] 
Eval mean loss: 1.789358913898468: 100%|██████████| 16/16 [00:02<00:00,  5.36it/s]


Epoch   2
Training mean loss: 1.870743289589882: 100%|██████████| 64/64 [00:21<00:00,  2.93it/s]
Training mean loss: 1.65057073533535: 100%|██████████| 64/64 [00:19<00:00,  3.33it/s]  

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5324776843190193: 100%|██████████| 16/16 [00:02<00:00,  5.52it/s]
Eval mean loss: 1.5324776843190193: 100%|██████████| 16/16 [00:02<00:00,  5.46it/s]


Epoch   3
Training mean loss: 1.65057073533535: 100%|██████████| 64/64 [00:22<00:00,  2.88it/s]
Training mean loss: 1.4240075703710318: 100%|██████████| 64/64 [00:19<00:00,  3.30it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.36838

100%|██████████| 8/8 [00:01<00:00,  4.78ba/s]
100%|██████████| 7920/7920 [00:02<00:00, 3119.65ex/s]
100%|██████████| 2080/2080 [00:00<00:00, 3139.00ex/s]



Updated AL datasets: train size = 2080, unlabelled size = 7920, sum: 10000 


AL iteration  65/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8938327642587516: 100%|██████████| 65/65 [00:19<00:00,  3.25it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8024813160300255: 100%|██████████| 16/16 [00:02<00:00,  5.39it/s]
Eval mean loss: 1.8024813160300255: 100%|██████████| 16/16 [00:02<00:00,  5.35it/s]


Epoch   2
Training mean loss: 1.8938327642587516: 100%|██████████| 65/65 [00:22<00:00,  2.89it/s]
Training mean loss: 1.6899855265250572: 100%|██████████| 65/65 [00:20<00:00,  3.11it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5449484586715698: 100%|██████████| 16/16 [00:03<00:00,  5.18it/s]
Eval mean loss: 1.5449484586715698: 100%|██████████| 16/16 [00:03<00:00,  5.16it/s]


Epoch   3
Training mean loss: 1.6899855265250572: 100%|██████████| 65/65 [00:23<00:00,  2.72it/s]
Training mean loss: 1.479631282733037: 100%|██████████| 65/65 [00:20<00:00,  3.23it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3

100%|██████████| 8/8 [00:01<00:00,  4.97ba/s]
100%|██████████| 7888/7888 [00:03<00:00, 2509.06ex/s]
100%|██████████| 2112/2112 [00:00<00:00, 3004.82ex/s]



Updated AL datasets: train size = 2112, unlabelled size = 7888, sum: 10000 


AL iteration  66/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9013267683260369: 100%|██████████| 66/66 [00:23<00:00,  2.63it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8172499760985374: 100%|██████████| 16/16 [00:03<00:00,  4.35it/s]
Eval mean loss: 1.8172499760985374: 100%|██████████| 16/16 [00:03<00:00,  4.34it/s]


Epoch   2
Training mean loss: 1.9013267683260369: 100%|██████████| 66/66 [00:27<00:00,  2.41it/s]
Training mean loss: 1.70169464747111: 100%|██████████| 66/66 [00:22<00:00,  2.93it/s]  

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5762196853756905: 100%|██████████| 16/16 [00:03<00:00,  5.17it/s]
Eval mean loss: 1.5762196853756905: 100%|██████████| 16/16 [00:03<00:00,  5.14it/s]


Epoch   3
Training mean loss: 1.70169464747111: 100%|██████████| 66/66 [00:26<00:00,  2.54it/s]
Training mean loss: 1.4944454576029922: 100%|██████████| 66/66 [00:21<00:00,  3.10it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.402

100%|██████████| 8/8 [00:01<00:00,  5.16ba/s]
100%|██████████| 7856/7856 [00:02<00:00, 2788.22ex/s]
100%|██████████| 2144/2144 [00:00<00:00, 2915.17ex/s]



Updated AL datasets: train size = 2144, unlabelled size = 7856, sum: 10000 


AL iteration  67/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8840083933588285: 100%|██████████| 67/67 [00:20<00:00,  3.14it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7690018266439438: 100%|██████████| 16/16 [00:03<00:00,  5.10it/s]
Eval mean loss: 1.7690018266439438: 100%|██████████| 16/16 [00:03<00:00,  5.03it/s]


Epoch   2
Training mean loss: 1.8840083933588285: 100%|██████████| 67/67 [00:23<00:00,  2.85it/s]
Training mean loss: 1.6924921558864081: 100%|██████████| 67/67 [00:24<00:00,  2.88it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.558418519794941: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s] 
Eval mean loss: 1.558418519794941: 100%|██████████| 16/16 [00:03<00:00,  5.17it/s]


Epoch   3
Training mean loss: 1.6924921558864081: 100%|██████████| 67/67 [00:27<00:00,  2.46it/s]
Training mean loss: 1.4510964973648983: 100%|██████████| 67/67 [00:21<00:00,  3.05it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.35

100%|██████████| 8/8 [00:01<00:00,  5.00ba/s]
100%|██████████| 7824/7824 [00:02<00:00, 3041.84ex/s]
100%|██████████| 2176/2176 [00:00<00:00, 2958.44ex/s]



Updated AL datasets: train size = 2176, unlabelled size = 7824, sum: 10000 


AL iteration  68/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8663092904231127: 100%|██████████| 68/68 [00:23<00:00,  2.96it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8044074326753616: 100%|██████████| 16/16 [00:03<00:00,  4.19it/s]
Eval mean loss: 1.8044074326753616: 100%|██████████| 16/16 [00:03<00:00,  4.17it/s]


Epoch   2
Training mean loss: 1.8663092904231127: 100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
Training mean loss: 1.66279077705215: 100%|██████████| 68/68 [00:22<00:00,  3.11it/s]  

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6057433187961578: 100%|██████████| 16/16 [00:03<00:00,  5.26it/s]
Eval mean loss: 1.6057433187961578: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]


Epoch   3
Training mean loss: 1.66279077705215: 100%|██████████| 68/68 [00:25<00:00,  2.65it/s]
Training mean loss: 1.4648745936505936: 100%|██████████| 68/68 [00:21<00:00,  3.19it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.394

100%|██████████| 8/8 [00:01<00:00,  5.20ba/s]
100%|██████████| 7792/7792 [00:02<00:00, 3104.51ex/s]
100%|██████████| 2208/2208 [00:00<00:00, 2962.07ex/s]



Updated AL datasets: train size = 2208, unlabelled size = 7792, sum: 10000 


AL iteration  69/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8987054617508599: 100%|██████████| 69/69 [00:21<00:00,  3.04it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7854673936963081: 100%|██████████| 16/16 [00:03<00:00,  5.17it/s]
Eval mean loss: 1.7854673936963081: 100%|██████████| 16/16 [00:03<00:00,  5.18it/s]


Epoch   2
Training mean loss: 1.8987054617508599: 100%|██████████| 69/69 [00:24<00:00,  2.83it/s]
Training mean loss: 1.671845819639123: 100%|██████████| 69/69 [00:22<00:00,  3.07it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5524756088852882: 100%|██████████| 16/16 [00:03<00:00,  4.88it/s]
Eval mean loss: 1.5524756088852882: 100%|██████████| 16/16 [00:03<00:00,  4.92it/s]


Epoch   3
Training mean loss: 1.671845819639123: 100%|██████████| 69/69 [00:26<00:00,  2.63it/s]
Training mean loss: 1.4334405090497888: 100%|██████████| 69/69 [00:22<00:00,  3.13it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.37

100%|██████████| 8/8 [00:01<00:00,  5.12ba/s]
100%|██████████| 7760/7760 [00:02<00:00, 3051.88ex/s]
100%|██████████| 2240/2240 [00:00<00:00, 2718.30ex/s]



Updated AL datasets: train size = 2240, unlabelled size = 7760, sum: 10000 


AL iteration  70/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.9021447454180036: 100%|██████████| 70/70 [00:20<00:00,  3.31it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8415569737553596: 100%|██████████| 16/16 [00:02<00:00,  5.50it/s]
Eval mean loss: 1.8415569737553596: 100%|██████████| 16/16 [00:02<00:00,  5.45it/s]


Epoch   2
Training mean loss: 1.9021447454180036: 100%|██████████| 70/70 [00:23<00:00,  2.93it/s]
Training mean loss: 1.7242310762405395: 100%|██████████| 70/70 [00:23<00:00,  3.04it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6302071288228035: 100%|██████████| 16/16 [00:02<00:00,  5.44it/s]
Eval mean loss: 1.6302071288228035: 100%|██████████| 16/16 [00:02<00:00,  5.39it/s]


Epoch   3
Training mean loss: 1.7242310762405395: 100%|██████████| 70/70 [00:26<00:00,  2.69it/s]
Training mean loss: 1.4887556961604527: 100%|██████████| 70/70 [00:22<00:00,  2.99it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4

100%|██████████| 8/8 [00:01<00:00,  5.15ba/s]
100%|██████████| 7728/7728 [00:02<00:00, 2959.90ex/s]
100%|██████████| 2272/2272 [00:00<00:00, 3008.47ex/s]



Updated AL datasets: train size = 2272, unlabelled size = 7728, sum: 10000 


AL iteration  71/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8474515794028699: 100%|██████████| 71/71 [00:20<00:00,  3.35it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.774951048195362: 100%|██████████| 16/16 [00:02<00:00,  5.53it/s] 
Eval mean loss: 1.774951048195362: 100%|██████████| 16/16 [00:02<00:00,  5.47it/s]


Epoch   2
Training mean loss: 1.8474515794028699: 100%|██████████| 71/71 [00:23<00:00,  3.01it/s]
Training mean loss: 1.6109327497616621: 100%|██████████| 71/71 [00:21<00:00,  3.28it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5504138842225075: 100%|██████████| 16/16 [00:02<00:00,  5.60it/s]
Eval mean loss: 1.5504138842225075: 100%|██████████| 16/16 [00:02<00:00,  5.55it/s]


Epoch   3
Training mean loss: 1.6109327497616621: 100%|██████████| 71/71 [00:24<00:00,  2.89it/s]
Training mean loss: 1.3937072166254822: 100%|██████████| 71/71 [00:21<00:00,  3.32it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.35

100%|██████████| 8/8 [00:01<00:00,  5.57ba/s]
100%|██████████| 7696/7696 [00:02<00:00, 3180.16ex/s]
100%|██████████| 2304/2304 [00:00<00:00, 3068.28ex/s]



Updated AL datasets: train size = 2304, unlabelled size = 7696, sum: 10000 


AL iteration  72/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8621395412418578: 100%|██████████| 72/72 [00:21<00:00,  3.32it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.733150526881218: 100%|██████████| 16/16 [00:02<00:00,  5.46it/s] 
Eval mean loss: 1.733150526881218: 100%|██████████| 16/16 [00:02<00:00,  5.43it/s]


Epoch   2
Training mean loss: 1.8621395412418578: 100%|██████████| 72/72 [00:23<00:00,  3.00it/s]
Training mean loss: 1.6158038824796677: 100%|██████████| 72/72 [00:21<00:00,  3.31it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4864384308457375: 100%|██████████| 16/16 [00:03<00:00,  5.36it/s]
Eval mean loss: 1.4864384308457375: 100%|██████████| 16/16 [00:03<00:00,  5.31it/s]


Epoch   3
Training mean loss: 1.6158038824796677: 100%|██████████| 72/72 [00:25<00:00,  2.88it/s]
Training mean loss: 1.3996779885556963: 100%|██████████| 72/72 [00:22<00:00,  3.25it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.31

100%|██████████| 8/8 [00:01<00:00,  5.34ba/s]
100%|██████████| 7664/7664 [00:02<00:00, 3142.12ex/s]
100%|██████████| 2336/2336 [00:00<00:00, 3147.54ex/s]



Updated AL datasets: train size = 2336, unlabelled size = 7664, sum: 10000 


AL iteration  73/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8304772132063565: 100%|██████████| 73/73 [00:21<00:00,  3.30it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7597011551260948: 100%|██████████| 16/16 [00:02<00:00,  5.54it/s]
Eval mean loss: 1.7597011551260948: 100%|██████████| 16/16 [00:02<00:00,  5.50it/s]


Epoch   2
Training mean loss: 1.8304772132063565: 100%|██████████| 73/73 [00:24<00:00,  3.00it/s]
Training mean loss: 1.6144774368364516: 100%|██████████| 73/73 [00:22<00:00,  3.32it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5891580358147621: 100%|██████████| 16/16 [00:03<00:00,  5.33it/s]
Eval mean loss: 1.5891580358147621: 100%|██████████| 16/16 [00:03<00:00,  5.30it/s]


Epoch   3
Training mean loss: 1.6144774368364516: 100%|██████████| 73/73 [00:25<00:00,  2.89it/s]
Training mean loss: 1.3952685023007327: 100%|██████████| 73/73 [00:22<00:00,  3.27it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3

100%|██████████| 8/8 [00:01<00:00,  5.30ba/s]
100%|██████████| 7632/7632 [00:02<00:00, 3117.72ex/s]
100%|██████████| 2368/2368 [00:00<00:00, 3172.91ex/s]



Updated AL datasets: train size = 2368, unlabelled size = 7632, sum: 10000 


AL iteration  74/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8612856816601109: 100%|██████████| 74/74 [00:22<00:00,  3.26it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.778394490480423: 100%|██████████| 16/16 [00:02<00:00,  5.38it/s] 
Eval mean loss: 1.778394490480423: 100%|██████████| 16/16 [00:02<00:00,  5.34it/s]


Epoch   2
Training mean loss: 1.8612856816601109: 100%|██████████| 74/74 [00:25<00:00,  2.94it/s]
Training mean loss: 1.624162812490721: 100%|██████████| 74/74 [00:23<00:00,  3.13it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5359016209840775: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s]
Eval mean loss: 1.5359016209840775: 100%|██████████| 16/16 [00:03<00:00,  5.29it/s]


Epoch   3
Training mean loss: 1.624162812490721: 100%|██████████| 74/74 [00:26<00:00,  2.82it/s]
Training mean loss: 1.406998648836806: 100%|██████████| 74/74 [00:23<00:00,  3.22it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.329

100%|██████████| 8/8 [00:01<00:00,  4.44ba/s]
100%|██████████| 7600/7600 [00:02<00:00, 3043.85ex/s]
100%|██████████| 2400/2400 [00:00<00:00, 3161.19ex/s]



Updated AL datasets: train size = 2400, unlabelled size = 7600, sum: 10000 


AL iteration  75/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8784199555714924: 100%|██████████| 75/75 [00:22<00:00,  3.25it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7936020269989967: 100%|██████████| 16/16 [00:03<00:00,  4.63it/s]
Eval mean loss: 1.7936020269989967: 100%|██████████| 16/16 [00:03<00:00,  4.62it/s]


Epoch   2
Training mean loss: 1.8784199555714924: 100%|██████████| 75/75 [00:25<00:00,  2.90it/s]
Training mean loss: 1.6447085332870484: 100%|██████████| 75/75 [00:24<00:00,  3.18it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5367621555924416: 100%|██████████| 16/16 [00:02<00:00,  5.38it/s]
Eval mean loss: 1.5367621555924416: 100%|██████████| 16/16 [00:02<00:00,  5.34it/s]


Epoch   3
Training mean loss: 1.6447085332870484: 100%|██████████| 75/75 [00:27<00:00,  2.76it/s]
Training mean loss: 1.387658076286316: 100%|██████████| 75/75 [00:23<00:00,  3.16it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3

100%|██████████| 8/8 [00:01<00:00,  5.42ba/s]
100%|██████████| 7568/7568 [00:02<00:00, 3010.33ex/s]
100%|██████████| 2432/2432 [00:00<00:00, 2995.96ex/s]



Updated AL datasets: train size = 2432, unlabelled size = 7568, sum: 10000 


AL iteration  76/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8941036682379873: 100%|██████████| 76/76 [00:27<00:00,  3.01it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8436779901385307: 100%|██████████| 16/16 [00:03<00:00,  5.12it/s]
Eval mean loss: 1.8436779901385307: 100%|██████████| 16/16 [00:03<00:00,  5.07it/s]


Epoch   2
Training mean loss: 1.8941036682379873: 100%|██████████| 76/76 [00:30<00:00,  2.51it/s]
Training mean loss: 1.665217300778941: 100%|██████████| 76/76 [00:24<00:00,  3.10it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.572368137538433: 100%|██████████| 16/16 [00:03<00:00,  5.37it/s] 
Eval mean loss: 1.572368137538433: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s]


Epoch   3
Training mean loss: 1.665217300778941: 100%|██████████| 76/76 [00:27<00:00,  2.73it/s]
Training mean loss: 1.4153765032165928: 100%|██████████| 76/76 [00:24<00:00,  3.15it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.344

100%|██████████| 8/8 [00:01<00:00,  5.37ba/s]
100%|██████████| 7536/7536 [00:02<00:00, 2961.93ex/s]
100%|██████████| 2464/2464 [00:00<00:00, 2842.61ex/s]



Updated AL datasets: train size = 2464, unlabelled size = 7536, sum: 10000 


AL iteration  77/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8696091562122494: 100%|██████████| 77/77 [00:23<00:00,  3.17it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7615100741386414: 100%|██████████| 16/16 [00:02<00:00,  5.44it/s]
Eval mean loss: 1.7615100741386414: 100%|██████████| 16/16 [00:02<00:00,  5.41it/s]


Epoch   2
Training mean loss: 1.8696091562122494: 100%|██████████| 77/77 [00:26<00:00,  2.91it/s]
Training mean loss: 1.6318293097731356: 100%|██████████| 77/77 [00:25<00:00,  3.00it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5455397963523865: 100%|██████████| 16/16 [00:03<00:00,  5.12it/s]
Eval mean loss: 1.5455397963523865: 100%|██████████| 16/16 [00:03<00:00,  5.11it/s]


Epoch   3
Training mean loss: 1.6318293097731356: 100%|██████████| 77/77 [00:28<00:00,  2.73it/s]
Training mean loss: 1.4115629892844659: 100%|██████████| 77/77 [00:25<00:00,  3.01it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3

100%|██████████| 8/8 [00:01<00:00,  5.32ba/s]
100%|██████████| 7504/7504 [00:02<00:00, 3072.86ex/s]
100%|██████████| 2496/2496 [00:00<00:00, 2803.75ex/s]



Updated AL datasets: train size = 2496, unlabelled size = 7504, sum: 10000 


AL iteration  78/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8597534696261089: 100%|██████████| 78/78 [00:25<00:00,  2.90it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7618078589439392: 100%|██████████| 16/16 [00:03<00:00,  4.62it/s]
Eval mean loss: 1.7618078589439392: 100%|██████████| 16/16 [00:03<00:00,  4.64it/s]


Epoch   2
Training mean loss: 1.8597534696261089: 100%|██████████| 78/78 [00:28<00:00,  2.72it/s]
Training mean loss: 1.6157410618586419: 100%|██████████| 78/78 [00:26<00:00,  2.97it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4718330204486847: 100%|██████████| 16/16 [00:03<00:00,  4.92it/s]
Eval mean loss: 1.4718330204486847: 100%|██████████| 16/16 [00:03<00:00,  4.94it/s]


Epoch   3
Training mean loss: 1.6157410618586419: 100%|██████████| 78/78 [00:29<00:00,  2.63it/s]
Training mean loss: 1.3574605446595411: 100%|██████████| 78/78 [00:25<00:00,  3.02it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.2

100%|██████████| 8/8 [00:01<00:00,  5.40ba/s]
100%|██████████| 7472/7472 [00:02<00:00, 3085.96ex/s]
100%|██████████| 2528/2528 [00:00<00:00, 3154.25ex/s]



Updated AL datasets: train size = 2528, unlabelled size = 7472, sum: 10000 


AL iteration  79/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8255467414855957: 100%|██████████| 79/79 [00:24<00:00,  3.16it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7089363262057304: 100%|██████████| 16/16 [00:02<00:00,  5.38it/s]
Eval mean loss: 1.7089363262057304: 100%|██████████| 16/16 [00:02<00:00,  5.35it/s]


Epoch   2
Training mean loss: 1.8255467414855957: 100%|██████████| 79/79 [00:27<00:00,  2.88it/s]
Training mean loss: 1.578860160670703: 100%|██████████| 79/79 [00:25<00:00,  3.05it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4943075776100159: 100%|██████████| 16/16 [00:03<00:00,  5.09it/s]
Eval mean loss: 1.4943075776100159: 100%|██████████| 16/16 [00:03<00:00,  5.09it/s]


Epoch   3
Training mean loss: 1.578860160670703: 100%|██████████| 79/79 [00:28<00:00,  2.75it/s]
Training mean loss: 1.3651431361331214: 100%|██████████| 79/79 [00:27<00:00,  2.85it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.28

100%|██████████| 8/8 [00:01<00:00,  5.28ba/s]
100%|██████████| 7440/7440 [00:02<00:00, 2962.91ex/s]
100%|██████████| 2560/2560 [00:00<00:00, 2975.18ex/s]



Updated AL datasets: train size = 2560, unlabelled size = 7440, sum: 10000 


AL iteration  80/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8295376747846603: 100%|██████████| 80/80 [00:27<00:00,  2.61it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7048466354608536: 100%|██████████| 16/16 [00:03<00:00,  5.15it/s]
Eval mean loss: 1.7048466354608536: 100%|██████████| 16/16 [00:03<00:00,  5.14it/s]


Epoch   2
Training mean loss: 1.8295376747846603: 100%|██████████| 80/80 [00:31<00:00,  2.58it/s]
Training mean loss: 1.5745702683925629: 100%|██████████| 80/80 [00:26<00:00,  3.12it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.473272055387497: 100%|██████████| 16/16 [00:02<00:00,  5.40it/s] 
Eval mean loss: 1.473272055387497: 100%|██████████| 16/16 [00:02<00:00,  5.37it/s]


Epoch   3
Training mean loss: 1.5745702683925629: 100%|██████████| 80/80 [00:29<00:00,  2.73it/s]
Training mean loss: 1.3536683008074761: 100%|██████████| 80/80 [00:26<00:00,  3.02it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.30

100%|██████████| 8/8 [00:01<00:00,  5.37ba/s]
100%|██████████| 7408/7408 [00:02<00:00, 3087.99ex/s]
100%|██████████| 2592/2592 [00:00<00:00, 3191.12ex/s]



Updated AL datasets: train size = 2592, unlabelled size = 7408, sum: 10000 


AL iteration  81/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8434517633767775: 100%|██████████| 81/81 [00:24<00:00,  3.17it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7508996650576591: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s]
Eval mean loss: 1.7508996650576591: 100%|██████████| 16/16 [00:03<00:00,  5.30it/s]


Epoch   2
Training mean loss: 1.8434517633767775: 100%|██████████| 81/81 [00:27<00:00,  2.93it/s]
Training mean loss: 1.5781142505598658: 100%|██████████| 81/81 [00:28<00:00,  2.79it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.48998611420393: 100%|██████████| 16/16 [00:03<00:00,  5.13it/s]  
Eval mean loss: 1.48998611420393: 100%|██████████| 16/16 [00:03<00:00,  5.14it/s]


Epoch   3
Training mean loss: 1.5781142505598658: 100%|██████████| 81/81 [00:31<00:00,  2.54it/s]
Training mean loss: 1.3324583032984791: 100%|██████████| 81/81 [00:29<00:00,  2.63it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.275

100%|██████████| 8/8 [00:01<00:00,  5.12ba/s]
100%|██████████| 7376/7376 [00:03<00:00, 2183.95ex/s]
100%|██████████| 2624/2624 [00:01<00:00, 2083.90ex/s]



Updated AL datasets: train size = 2624, unlabelled size = 7376, sum: 10000 


AL iteration  82/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8534190204085372: 100%|██████████| 82/82 [00:28<00:00,  3.10it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7175771594047546: 100%|██████████| 16/16 [00:03<00:00,  4.49it/s]
Eval mean loss: 1.7175771594047546: 100%|██████████| 16/16 [00:03<00:00,  4.44it/s]


Epoch   2
Training mean loss: 1.8534190204085372: 100%|██████████| 82/82 [00:31<00:00,  2.58it/s]
Training mean loss: 1.6012204696492451: 100%|██████████| 82/82 [00:26<00:00,  3.15it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5098422467708588: 100%|██████████| 16/16 [00:03<00:00,  5.02it/s]
Eval mean loss: 1.5098422467708588: 100%|██████████| 16/16 [00:03<00:00,  5.02it/s]


Epoch   3
Training mean loss: 1.6012204696492451: 100%|██████████| 82/82 [00:30<00:00,  2.72it/s]
Training mean loss: 1.3637330953667803: 100%|██████████| 82/82 [00:26<00:00,  3.18it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3

100%|██████████| 8/8 [00:01<00:00,  5.66ba/s]
100%|██████████| 7344/7344 [00:02<00:00, 3133.29ex/s]
100%|██████████| 2656/2656 [00:00<00:00, 3168.54ex/s]



Updated AL datasets: train size = 2656, unlabelled size = 7344, sum: 10000 


AL iteration  83/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8797691687043891: 100%|██████████| 83/83 [00:24<00:00,  3.27it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.769577495753765: 100%|██████████| 16/16 [00:02<00:00,  5.43it/s] 
Eval mean loss: 1.769577495753765: 100%|██████████| 16/16 [00:02<00:00,  5.40it/s]


Epoch   2
Training mean loss: 1.8797691687043891: 100%|██████████| 83/83 [00:27<00:00,  3.00it/s]
Training mean loss: 1.6547685574336224: 100%|██████████| 83/83 [00:26<00:00,  2.96it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.527249313890934: 100%|██████████| 16/16 [00:03<00:00,  5.26it/s] 
Eval mean loss: 1.527249313890934: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]


Epoch   3
Training mean loss: 1.6547685574336224: 100%|██████████| 83/83 [00:29<00:00,  2.81it/s]
Training mean loss: 1.4083786929946347: 100%|██████████| 83/83 [00:27<00:00,  3.00it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.316

100%|██████████| 8/8 [00:01<00:00,  5.28ba/s]
100%|██████████| 7312/7312 [00:02<00:00, 3128.69ex/s]
100%|██████████| 2688/2688 [00:00<00:00, 3220.32ex/s]



Updated AL datasets: train size = 2688, unlabelled size = 7312, sum: 10000 


AL iteration  84/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8233218562035334: 100%|██████████| 84/84 [00:25<00:00,  3.23it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6904493570327759: 100%|██████████| 16/16 [00:02<00:00,  5.43it/s]
Eval mean loss: 1.6904493570327759: 100%|██████████| 16/16 [00:02<00:00,  5.38it/s]


Epoch   2
Training mean loss: 1.8233218562035334: 100%|██████████| 84/84 [00:28<00:00,  2.99it/s]
Training mean loss: 1.5460845019136156: 100%|██████████| 84/84 [00:25<00:00,  3.26it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4759223833680153: 100%|██████████| 16/16 [00:03<00:00,  5.40it/s]
Eval mean loss: 1.4759223833680153: 100%|██████████| 16/16 [00:03<00:00,  5.33it/s]


Epoch   3
Training mean loss: 1.5460845019136156: 100%|██████████| 84/84 [00:28<00:00,  2.91it/s]
Training mean loss: 1.3503793463820504: 100%|██████████| 84/84 [00:25<00:00,  3.25it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3

100%|██████████| 8/8 [00:01<00:00,  5.67ba/s]
100%|██████████| 7280/7280 [00:02<00:00, 3096.05ex/s]
100%|██████████| 2720/2720 [00:00<00:00, 3040.74ex/s]



Updated AL datasets: train size = 2720, unlabelled size = 7280, sum: 10000 


AL iteration  85/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8337135469212251: 100%|██████████| 85/85 [00:25<00:00,  3.29it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.8085963428020477: 100%|██████████| 16/16 [00:03<00:00,  5.35it/s]
Eval mean loss: 1.8085963428020477: 100%|██████████| 16/16 [00:03<00:00,  5.32it/s]


Epoch   2
Training mean loss: 1.8337135469212251: 100%|██████████| 85/85 [00:28<00:00,  3.01it/s]
Training mean loss: 1.5849675487069523: 100%|██████████| 85/85 [00:26<00:00,  3.23it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5121950656175613: 100%|██████████| 16/16 [00:02<00:00,  5.42it/s]
Eval mean loss: 1.5121950656175613: 100%|██████████| 16/16 [00:02<00:00,  5.41it/s]


Epoch   3
Training mean loss: 1.5849675487069523: 100%|██████████| 85/85 [00:29<00:00,  2.92it/s]
Training mean loss: 1.3694874426897834: 100%|██████████| 85/85 [00:25<00:00,  3.29it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.2

100%|██████████| 8/8 [00:01<00:00,  5.22ba/s]
100%|██████████| 7248/7248 [00:02<00:00, 2820.29ex/s]
100%|██████████| 2752/2752 [00:01<00:00, 2515.60ex/s]



Updated AL datasets: train size = 2752, unlabelled size = 7248, sum: 10000 


AL iteration  86/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8665633922399476: 100%|██████████| 86/86 [00:30<00:00,  2.83it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7360231652855873: 100%|██████████| 16/16 [00:03<00:00,  4.58it/s]
Eval mean loss: 1.7360231652855873: 100%|██████████| 16/16 [00:03<00:00,  4.60it/s]


Epoch   2
Training mean loss: 1.8665633922399476: 100%|██████████| 86/86 [00:34<00:00,  2.51it/s]
Training mean loss: 1.6045132029888243: 100%|██████████| 86/86 [00:30<00:00,  2.87it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4908774644136429: 100%|██████████| 16/16 [00:03<00:00,  4.67it/s]
Eval mean loss: 1.4908774644136429: 100%|██████████| 16/16 [00:03<00:00,  4.64it/s]


Epoch   3
Training mean loss: 1.6045132029888243: 100%|██████████| 86/86 [00:33<00:00,  2.56it/s]
Training mean loss: 1.3435088382210842: 100%|██████████| 86/86 [00:32<00:00,  2.64it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.2

100%|██████████| 8/8 [00:01<00:00,  5.45ba/s]
100%|██████████| 7216/7216 [00:02<00:00, 3000.02ex/s]
100%|██████████| 2784/2784 [00:00<00:00, 3056.42ex/s]



Updated AL datasets: train size = 2784, unlabelled size = 7216, sum: 10000 


AL iteration  87/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8461550309740264: 100%|██████████| 87/87 [00:30<00:00,  2.73it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7262323498725891: 100%|██████████| 16/16 [00:03<00:00,  5.24it/s]
Eval mean loss: 1.7262323498725891: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]


Epoch   2
Training mean loss: 1.8461550309740264: 100%|██████████| 87/87 [00:33<00:00,  2.61it/s]
Training mean loss: 1.5715297145405034: 100%|██████████| 87/87 [00:32<00:00,  2.81it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4527192413806915: 100%|██████████| 16/16 [00:03<00:00,  4.60it/s]
Eval mean loss: 1.4527192413806915: 100%|██████████| 16/16 [00:03<00:00,  4.54it/s]


Epoch   3
Training mean loss: 1.5715297145405034: 100%|██████████| 87/87 [00:35<00:00,  2.43it/s]
Training mean loss: 1.3167209447115318: 100%|██████████| 87/87 [00:35<00:00,  2.56it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.2

100%|██████████| 8/8 [00:02<00:00,  3.74ba/s]
100%|██████████| 7184/7184 [00:02<00:00, 2882.38ex/s]
100%|██████████| 2816/2816 [00:00<00:00, 3006.00ex/s]



Updated AL datasets: train size = 2816, unlabelled size = 7184, sum: 10000 


AL iteration  88/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8504742180759257: 100%|██████████| 88/88 [00:28<00:00,  2.88it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7876426205039024: 100%|██████████| 16/16 [00:03<00:00,  5.02it/s]
Eval mean loss: 1.7876426205039024: 100%|██████████| 16/16 [00:03<00:00,  5.00it/s]


Epoch   2
Training mean loss: 1.8504742180759257: 100%|██████████| 88/88 [00:32<00:00,  2.74it/s]
Training mean loss: 1.5995676205916838: 100%|██████████| 88/88 [00:31<00:00,  2.89it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5289166048169136: 100%|██████████| 16/16 [00:03<00:00,  4.70it/s]
Eval mean loss: 1.5289166048169136: 100%|██████████| 16/16 [00:03<00:00,  4.71it/s]


Epoch   3
Training mean loss: 1.5995676205916838: 100%|██████████| 88/88 [00:35<00:00,  2.49it/s]
Training mean loss: 1.3831202306530692: 100%|██████████| 88/88 [00:30<00:00,  2.99it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3

100%|██████████| 8/8 [00:01<00:00,  5.14ba/s]
100%|██████████| 7152/7152 [00:02<00:00, 2850.25ex/s]
100%|██████████| 2848/2848 [00:01<00:00, 2717.99ex/s]



Updated AL datasets: train size = 2848, unlabelled size = 7152, sum: 10000 


AL iteration  89/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8604994034499265: 100%|██████████| 89/89 [00:32<00:00,  2.50it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7711045518517494: 100%|██████████| 16/16 [00:03<00:00,  4.58it/s]
Eval mean loss: 1.7711045518517494: 100%|██████████| 16/16 [00:03<00:00,  4.51it/s]


Epoch   2
Training mean loss: 1.8604994034499265: 100%|██████████| 89/89 [00:35<00:00,  2.48it/s]
Training mean loss: 1.6072268445840043: 100%|██████████| 89/89 [00:33<00:00,  2.80it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5006792172789574: 100%|██████████| 16/16 [00:03<00:00,  4.87it/s]
Eval mean loss: 1.5006792172789574: 100%|██████████| 16/16 [00:03<00:00,  4.85it/s]


Epoch   3
Training mean loss: 1.6072268445840043: 100%|██████████| 89/89 [00:36<00:00,  2.42it/s]
Training mean loss: 1.3398616380905837: 100%|██████████| 89/89 [00:31<00:00,  2.81it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.2

100%|██████████| 8/8 [00:01<00:00,  4.79ba/s]
100%|██████████| 7120/7120 [00:03<00:00, 2243.48ex/s]
100%|██████████| 2880/2880 [00:00<00:00, 2947.23ex/s]



Updated AL datasets: train size = 2880, unlabelled size = 7120, sum: 10000 


AL iteration  90/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8193765534294977: 100%|██████████| 90/90 [00:36<00:00,  2.38it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6849961057305336: 100%|██████████| 16/16 [00:03<00:00,  4.46it/s]
Eval mean loss: 1.6849961057305336: 100%|██████████| 16/16 [00:03<00:00,  4.46it/s]


Epoch   2
Training mean loss: 1.8193765534294977: 100%|██████████| 90/90 [00:40<00:00,  2.23it/s]
Training mean loss: 1.528018335501353: 100%|██████████| 90/90 [00:38<00:00,  2.42it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4316729754209518: 100%|██████████| 16/16 [00:04<00:00,  3.73it/s]
Eval mean loss: 1.4316729754209518: 100%|██████████| 16/16 [00:04<00:00,  3.71it/s]


Epoch   3
Training mean loss: 1.528018335501353: 100%|██████████| 90/90 [00:42<00:00,  2.10it/s]
Training mean loss: 1.2879140814145407: 100%|██████████| 90/90 [00:31<00:00,  2.89it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.24

100%|██████████| 8/8 [00:01<00:00,  4.72ba/s]
100%|██████████| 7088/7088 [00:03<00:00, 2146.50ex/s]
100%|██████████| 2912/2912 [00:01<00:00, 2813.81ex/s]



Updated AL datasets: train size = 2912, unlabelled size = 7088, sum: 10000 


AL iteration  91/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8326489204888816: 100%|██████████| 91/91 [00:34<00:00,  2.58it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7207942008972168: 100%|██████████| 16/16 [00:03<00:00,  4.23it/s]
Eval mean loss: 1.7207942008972168: 100%|██████████| 16/16 [00:03<00:00,  4.19it/s]


Epoch   2
Training mean loss: 1.8326489204888816: 100%|██████████| 91/91 [00:38<00:00,  2.36it/s]
Training mean loss: 1.570155959862929: 100%|██████████| 91/91 [00:41<00:00,  2.31it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4648974314332008: 100%|██████████| 16/16 [00:04<00:00,  3.70it/s]
Eval mean loss: 1.4648974314332008: 100%|██████████| 16/16 [00:04<00:00,  3.54it/s]


Epoch   3
Training mean loss: 1.570155959862929: 100%|██████████| 91/91 [00:45<00:00,  1.99it/s]
Training mean loss: 1.3246206097550444: 100%|██████████| 91/91 [00:36<00:00,  2.50it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.24

100%|██████████| 8/8 [00:01<00:00,  4.59ba/s]
100%|██████████| 7056/7056 [00:03<00:00, 1947.59ex/s]
100%|██████████| 2944/2944 [00:01<00:00, 2440.24ex/s]



Updated AL datasets: train size = 2944, unlabelled size = 7056, sum: 10000 


AL iteration  92/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8573478525099547: 100%|██████████| 92/92 [00:35<00:00,  2.72it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7450670823454857: 100%|██████████| 16/16 [00:04<00:00,  4.01it/s]
Eval mean loss: 1.7450670823454857: 100%|██████████| 16/16 [00:04<00:00,  3.96it/s]


Epoch   2
Training mean loss: 1.8573478525099547: 100%|██████████| 92/92 [00:39<00:00,  2.35it/s]
Training mean loss: 1.591209098048832: 100%|██████████| 92/92 [00:32<00:00,  2.91it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4687381014227867: 100%|██████████| 16/16 [00:03<00:00,  4.35it/s]
Eval mean loss: 1.4687381014227867: 100%|██████████| 16/16 [00:03<00:00,  4.34it/s]


Epoch   3
Training mean loss: 1.591209098048832: 100%|██████████| 92/92 [00:36<00:00,  2.51it/s]
Training mean loss: 1.326767006646032: 100%|██████████| 92/92 [00:34<00:00,  2.70it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.23

100%|██████████| 8/8 [00:01<00:00,  5.35ba/s]
100%|██████████| 7024/7024 [00:02<00:00, 2840.80ex/s]
100%|██████████| 2976/2976 [00:01<00:00, 2940.42ex/s]



Updated AL datasets: train size = 2976, unlabelled size = 7024, sum: 10000 


AL iteration  93/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8284611304601033: 100%|██████████| 93/93 [00:32<00:00,  2.64it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6963530033826828: 100%|██████████| 16/16 [00:03<00:00,  4.37it/s]
Eval mean loss: 1.6963530033826828: 100%|██████████| 16/16 [00:03<00:00,  4.34it/s]


Epoch   2
Training mean loss: 1.8284611304601033: 100%|██████████| 93/93 [00:36<00:00,  2.55it/s]
Training mean loss: 1.5391128242656749: 100%|██████████| 93/93 [00:33<00:00,  2.79it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.41714296489954: 100%|██████████| 16/16 [00:03<00:00,  4.82it/s]  
Eval mean loss: 1.41714296489954: 100%|██████████| 16/16 [00:03<00:00,  4.80it/s]


Epoch   3
Training mean loss: 1.5391128242656749: 100%|██████████| 93/93 [00:37<00:00,  2.50it/s]
Training mean loss: 1.295026748411117: 100%|██████████| 93/93 [00:32<00:00,  2.83it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.195

100%|██████████| 8/8 [00:01<00:00,  5.18ba/s]
100%|██████████| 6992/6992 [00:02<00:00, 2714.63ex/s]
100%|██████████| 3008/3008 [00:01<00:00, 2994.96ex/s]



Updated AL datasets: train size = 3008, unlabelled size = 6992, sum: 10000 


AL iteration  94/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8469967220691925: 100%|██████████| 94/94 [00:33<00:00,  2.78it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7186608836054802: 100%|██████████| 16/16 [00:03<00:00,  4.62it/s]
Eval mean loss: 1.7186608836054802: 100%|██████████| 16/16 [00:03<00:00,  4.56it/s]


Epoch   2
Training mean loss: 1.8469967220691925: 100%|██████████| 94/94 [00:37<00:00,  2.53it/s]
Training mean loss: 1.5624652619057513: 100%|██████████| 94/94 [00:35<00:00,  2.66it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4694457352161407: 100%|██████████| 16/16 [00:03<00:00,  4.66it/s]
Eval mean loss: 1.4694457352161407: 100%|██████████| 16/16 [00:03<00:00,  4.65it/s]


Epoch   3
Training mean loss: 1.5624652619057513: 100%|██████████| 94/94 [00:38<00:00,  2.42it/s]
Training mean loss: 1.3166064062017075: 100%|██████████| 94/94 [00:36<00:00,  2.68it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.2

100%|██████████| 7/7 [00:01<00:00,  3.53ba/s]
100%|██████████| 6960/6960 [00:03<00:00, 2238.57ex/s]
100%|██████████| 3040/3040 [00:02<00:00, 1377.02ex/s]



Updated AL datasets: train size = 3040, unlabelled size = 6960, sum: 10000 


AL iteration  95/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8684027433395385: 100%|██████████| 95/95 [00:34<00:00,  2.68it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7701178938150406: 100%|██████████| 16/16 [00:04<00:00,  3.74it/s]
Eval mean loss: 1.7701178938150406: 100%|██████████| 16/16 [00:04<00:00,  3.76it/s]


Epoch   2
Training mean loss: 1.8684027433395385: 100%|██████████| 95/95 [00:39<00:00,  2.43it/s]
Training mean loss: 1.606755815054241: 100%|██████████| 95/95 [00:36<00:00,  2.76it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.5289260223507881: 100%|██████████| 16/16 [00:03<00:00,  4.60it/s]
Eval mean loss: 1.5289260223507881: 100%|██████████| 16/16 [00:03<00:00,  4.59it/s]


Epoch   3
Training mean loss: 1.606755815054241: 100%|██████████| 95/95 [00:40<00:00,  2.37it/s]
Training mean loss: 1.345809060648868: 100%|██████████| 95/95 [00:37<00:00,  2.58it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.26

100%|██████████| 7/7 [00:01<00:00,  4.58ba/s]
100%|██████████| 6928/6928 [00:02<00:00, 2372.32ex/s]
100%|██████████| 3072/3072 [00:01<00:00, 2831.16ex/s]



Updated AL datasets: train size = 3072, unlabelled size = 6928, sum: 10000 


AL iteration  96/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8446396725873153: 100%|██████████| 96/96 [00:34<00:00,  2.69it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6997895911335945: 100%|██████████| 16/16 [00:03<00:00,  4.40it/s]
Eval mean loss: 1.6997895911335945: 100%|██████████| 16/16 [00:03<00:00,  4.34it/s]


Epoch   2
Training mean loss: 1.8446396725873153: 100%|██████████| 96/96 [00:37<00:00,  2.53it/s]
Training mean loss: 1.5527468932171662: 100%|██████████| 96/96 [00:34<00:00,  2.80it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4225761219859123: 100%|██████████| 16/16 [00:03<00:00,  4.68it/s]
Eval mean loss: 1.4225761219859123: 100%|██████████| 16/16 [00:03<00:00,  4.67it/s]


Epoch   3
Training mean loss: 1.5527468932171662: 100%|██████████| 96/96 [00:37<00:00,  2.54it/s]
Training mean loss: 1.2930782275895278: 100%|██████████| 96/96 [00:39<00:00,  2.13it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.1

100%|██████████| 7/7 [00:01<00:00,  4.63ba/s]
100%|██████████| 6896/6896 [00:02<00:00, 2820.68ex/s]
100%|██████████| 3104/3104 [00:01<00:00, 2745.55ex/s]



Updated AL datasets: train size = 3104, unlabelled size = 6896, sum: 10000 


AL iteration  97/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8733221095861847: 100%|██████████| 97/97 [00:34<00:00,  2.47it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7511575669050217: 100%|██████████| 16/16 [00:03<00:00,  4.80it/s]
Eval mean loss: 1.7511575669050217: 100%|██████████| 16/16 [00:03<00:00,  4.78it/s]


Epoch   2
Training mean loss: 1.8733221095861847: 100%|██████████| 97/97 [00:37<00:00,  2.56it/s]
Training mean loss: 1.5771607430939822: 100%|██████████| 97/97 [00:34<00:00,  2.94it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.444871336221695: 100%|██████████| 16/16 [00:03<00:00,  4.57it/s] 
Eval mean loss: 1.444871336221695: 100%|██████████| 16/16 [00:03<00:00,  4.47it/s]


Epoch   3
Training mean loss: 1.5771607430939822: 100%|██████████| 97/97 [00:37<00:00,  2.58it/s]
Training mean loss: 1.3013587305226277: 100%|██████████| 97/97 [00:35<00:00,  2.82it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.21

100%|██████████| 7/7 [00:01<00:00,  4.66ba/s]
100%|██████████| 6864/6864 [00:02<00:00, 3042.17ex/s]
100%|██████████| 3136/3136 [00:01<00:00, 2963.14ex/s]



Updated AL datasets: train size = 3136, unlabelled size = 6864, sum: 10000 


AL iteration  98/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8558510858185437: 100%|██████████| 98/98 [00:34<00:00,  2.84it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7380525842308998: 100%|██████████| 16/16 [00:03<00:00,  4.91it/s]
Eval mean loss: 1.7380525842308998: 100%|██████████| 16/16 [00:03<00:00,  4.87it/s]


Epoch   2
Training mean loss: 1.8558510858185437: 100%|██████████| 98/98 [00:38<00:00,  2.56it/s]
Training mean loss: 1.562175377291076: 100%|██████████| 98/98 [00:33<00:00,  2.94it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4744038358330727: 100%|██████████| 16/16 [00:03<00:00,  4.87it/s]
Eval mean loss: 1.4744038358330727: 100%|██████████| 16/16 [00:03<00:00,  4.81it/s]


Epoch   3
Training mean loss: 1.562175377291076: 100%|██████████| 98/98 [00:36<00:00,  2.68it/s]
Training mean loss: 1.3045937345952403: 100%|██████████| 98/98 [00:32<00:00,  3.01it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.23

100%|██████████| 7/7 [00:01<00:00,  4.84ba/s]
100%|██████████| 6832/6832 [00:02<00:00, 3022.43ex/s]
100%|██████████| 3168/3168 [00:01<00:00, 3014.52ex/s]



Updated AL datasets: train size = 3168, unlabelled size = 6832, sum: 10000 


AL iteration  99/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.8329676341528844: 100%|██████████| 99/99 [00:38<00:00,  2.30it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.7352888956665993: 100%|██████████| 16/16 [00:03<00:00,  4.56it/s]
Eval mean loss: 1.7352888956665993: 100%|██████████| 16/16 [00:03<00:00,  4.57it/s]


Epoch   2
Training mean loss: 1.8329676341528844: 100%|██████████| 99/99 [00:42<00:00,  2.35it/s]
Training mean loss: 1.549357007248233: 100%|██████████| 99/99 [00:37<00:00,  2.77it/s] 

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.4069626405835152: 100%|██████████| 16/16 [00:03<00:00,  4.29it/s]
Eval mean loss: 1.4069626405835152: 100%|██████████| 16/16 [00:03<00:00,  4.37it/s]


Epoch   3
Training mean loss: 1.549357007248233: 100%|██████████| 99/99 [00:41<00:00,  2.40it/s]
Training mean loss: 1.2822840478685167: 100%|██████████| 99/99 [00:35<00:00,  2.91it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.22

100%|██████████| 7/7 [00:01<00:00,  4.93ba/s]
100%|██████████| 6800/6800 [00:02<00:00, 3015.97ex/s]
100%|██████████| 3200/3200 [00:01<00:00, 2904.14ex/s]



Updated AL datasets: train size = 3200, unlabelled size = 6800, sum: 10000 


AL iteration 100/100


Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Model initialized.


Epoch   1
Training mean loss: 1.817127788066864: 100%|██████████| 100/100 [00:33<00:00,  2.94it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.6549407988786697: 100%|██████████| 16/16 [00:03<00:00,  4.97it/s]
Eval mean loss: 1.6549407988786697: 100%|██████████| 16/16 [00:03<00:00,  4.95it/s]


Epoch   2
Training mean loss: 1.817127788066864: 100%|██████████| 100/100 [00:36<00:00,  2.74it/s]
Training mean loss: 1.5098282980918885: 100%|██████████| 100/100 [00:33<00:00,  2.98it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean loss: 1.3942479118704796: 100%|██████████| 16/16 [00:03<00:00,  4.92it/s]
Eval mean loss: 1.3942479118704796: 100%|██████████| 16/16 [00:03<00:00,  4.92it/s]


Epoch   3
Training mean loss: 1.5098282980918885: 100%|██████████| 100/100 [00:37<00:00,  2.70it/s]
Training mean loss: 1.2763945031166077: 100%|██████████| 100/100 [00:33<00:00,  3.00it/s]

Epoch finished. Evaluation:
Running Eval mode...
Eval mean l

100%|██████████| 7/7 [00:01<00:00,  4.95ba/s]
100%|██████████| 6768/6768 [00:02<00:00, 2603.62ex/s]
100%|██████████| 3232/3232 [00:01<00:00, 2819.40ex/s]



Updated AL datasets: train size = 3232, unlabelled size = 6768, sum: 10000 
