In [1]:
import torch
import numpy as np
from utils import get_dataset, get_net, get_strategy
import pickle

In [2]:
samples = np.load("samples_bert.npy")

In [3]:
samples

array([  0, 100, 161, 221, 282, 344, 409, 474])

In [4]:
# fix random seed
# np.random.seed(1)
# torch.manual_seed(1)
torch.backends.cudnn.enabled = False
name = "BERT"
strat = "LeastConfidence"
# device
acc_list_kc_list = []
agr_list_kc_list = []
kl_list_kc_list = []

for i in range(10):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # dataset = Data(unlab_x, unlab_y, test_x, test_y, cifar_handler) 
    dataset = get_dataset(name)    # load dataset
    net = get_net(name, device, y_num = 10)            # load network
    strategy = get_strategy(strat)(dataset, net)  # load strategy
    initial = samples[1]
    # start experiment
    dataset.initialize_labels(initial)
    print(f"number of labeled pool: {initial}")
    print(f"number of unlabeled pool: {dataset.n_pool-initial}")
    print(f"number of testing pool: {dataset.n_test}")
    print()

    # round 0 accuracy
    print("Round 0")
    strategy.train()
    preds, agr, kl = strategy.predict(dataset.get_test_data())
    acc_list_kc = []
    agr_list_kc = []
    kl_list_kc = []
    acc = dataset.cal_test_acc(preds)*100
    acc_list_kc.append(acc)
    agr_list_kc.append(agr)
    kl_list_kc.append(kl)
    print(f"Round 0 testing accuracy: {acc} %")
    print("Samples:", samples[1])
    for rd in range(1, len(samples)-1):
        print(f"Round {rd}")
        budget = samples[rd+1] - samples[rd]
        # query
        query_idxs = strategy.query(budget)
        # update labels
        strategy.update(query_idxs)
        strategy.train()

        # calculate accuracy
        preds, agr, kl = strategy.predict(dataset.get_test_data())
        acc = dataset.cal_test_acc(preds)*100
        print('\n')
        print(f"Round {rd} testing accuracy: {acc} %")
        acc_list_kc.append(acc)
        agr_list_kc.append(agr)
        kl_list_kc.append(kl)
        print("Samples:", samples[rd+1])
        torch.save(strategy.net.clf.state_dict(), "./extracted_models/"+name+"_"+strat+"_"+str(i)+str(3)+".pt")
    acc_list_kc_list.append(acc_list_kc)
    agr_list_kc_list.append(agr_list_kc)
    kl_list_kc_list.append(kl_list_kc)

    with open("./results/acc"+name+strat+str(3), "wb") as fp:
      pickle.dump(acc_list_kc_list, fp)
    with open("./results/agr"+name+strat+str(3), "wb") as fp:
      pickle.dump(agr_list_kc_list, fp)
    with open("./results/kl"+name+strat+str(3), "wb") as fp:
      pickle.dump(kl_list_kc_list, fp)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2167, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:13<00:27, 13.90s/it]

Train accuracy =  tensor(0.3167, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:24<00:11, 11.78s/it]

Train accuracy =  tensor(0.3833, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:34<00:00, 11.49s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.95it/s]


Round 0 testing accuracy: 19.310344827586206 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3125, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:26<00:52, 26.31s/it]

Train accuracy =  tensor(0.4896, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:52<00:26, 26.25s/it]

Train accuracy =  tensor(0.4167, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:05<00:00, 21.96s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.54it/s]




Round 1 testing accuracy: 31.724137931034484 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3864, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:28, 14.04s/it]

Train accuracy =  tensor(0.4697, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:28<00:14, 14.46s/it]

Train accuracy =  tensor(0.4697, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:42<00:00, 14.30s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.96it/s]




Round 2 testing accuracy: 46.55172413793103 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4620, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:30<01:00, 30.29s/it]

Train accuracy =  tensor(0.5497, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:49<00:23, 23.71s/it]

Train accuracy =  tensor(0.5965, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:16<00:00, 25.53s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.92it/s]




Round 3 testing accuracy: 46.55172413793103 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4879, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:30<01:00, 30.42s/it]

Train accuracy =  tensor(0.5024, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:50<00:24, 24.06s/it]

Train accuracy =  tensor(0.5700, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:20<00:00, 26.84s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.38it/s]




Round 4 testing accuracy: 55.51724137931034 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4756, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:34<01:08, 34.44s/it]

Train accuracy =  tensor(0.5447, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:57<00:27, 27.96s/it]

Train accuracy =  tensor(0.6545, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:30<00:00, 30.29s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.73it/s]




Round 5 testing accuracy: 51.37931034482759 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5825, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:37<01:15, 37.87s/it]

Train accuracy =  tensor(0.6105, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:59<00:28, 28.51s/it]

Train accuracy =  tensor(0.6526, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:23<00:00, 27.74s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.98it/s]




Round 6 testing accuracy: 60.0 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.1833, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.84s/it]

Train accuracy =  tensor(0.2000, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:19<00:09,  9.83s/it]

Train accuracy =  tensor(0.3167, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:30<00:00, 10.25s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.94it/s]


Round 0 testing accuracy: 24.137931034482758 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2604, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:46, 23.32s/it]

Train accuracy =  tensor(0.2604, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:35<00:16, 16.86s/it]

Train accuracy =  tensor(0.4479, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:48<00:00, 16.05s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:10<00:00,  3.62it/s]




Round 1 testing accuracy: 42.06896551724138 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4697, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:56, 28.43s/it]

Train accuracy =  tensor(0.4924, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:21, 21.39s/it]

Train accuracy =  tensor(0.5000, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:58<00:00, 19.62s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.78it/s]




Round 2 testing accuracy: 37.58620689655172 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5497, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:31<01:02, 31.25s/it]

Train accuracy =  tensor(0.5673, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:48<00:22, 22.98s/it]

Train accuracy =  tensor(0.6257, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:07<00:00, 22.51s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.98it/s]




Round 3 testing accuracy: 36.55172413793103 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5845, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:38, 19.27s/it]

Train accuracy =  tensor(0.6860, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:38<00:18, 18.99s/it]

Train accuracy =  tensor(0.7295, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:10<00:00, 23.43s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.96it/s]




Round 4 testing accuracy: 66.20689655172414 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7846, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:42, 21.33s/it]

Train accuracy =  tensor(1.0000, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:41<00:20, 20.77s/it]

Train accuracy =  tensor(1.0732, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:13<00:00, 24.36s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.90it/s]




Round 5 testing accuracy: 86.89655172413792 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0211, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:31<01:02, 31.23s/it]

Train accuracy =  tensor(1.1579, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:54<00:26, 26.34s/it]

Train accuracy =  tensor(1.1965, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:27<00:00, 29.15s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.68it/s]




Round 6 testing accuracy: 84.82758620689656 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:10<00:20, 10.14s/it]

Train accuracy =  tensor(0.2500, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:22<00:11, 11.34s/it]

Train accuracy =  tensor(0.3500, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:32<00:00, 10.92s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.76it/s]


Round 0 testing accuracy: 21.03448275862069 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3229, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:13<00:26, 13.36s/it]

Train accuracy =  tensor(0.3958, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:25<00:12, 12.59s/it]

Train accuracy =  tensor(0.5729, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.69s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.90it/s]




Round 1 testing accuracy: 37.93103448275862 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4015, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:29<00:58, 29.16s/it]

Train accuracy =  tensor(0.5076, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:45<00:21, 21.38s/it]

Train accuracy =  tensor(0.5682, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:59<00:00, 19.75s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.26it/s]




Round 2 testing accuracy: 38.96551724137931 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4795, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:16<00:33, 16.94s/it]

Train accuracy =  tensor(0.4795, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:47<00:24, 24.89s/it]

Train accuracy =  tensor(0.5497, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:03<00:00, 21.24s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.14it/s]




Round 3 testing accuracy: 43.44827586206896 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5990, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:40, 20.13s/it]

Train accuracy =  tensor(0.8551, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:49<00:25, 25.48s/it]

Train accuracy =  tensor(0.9275, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:06<00:00, 22.19s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:17<00:00,  2.06it/s]




Round 4 testing accuracy: 45.17241379310345 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8455, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:35<01:10, 35.28s/it]

Train accuracy =  tensor(1.0325, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:57<00:27, 27.34s/it]

Train accuracy =  tensor(1.0691, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:25<00:00, 28.39s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.27it/s]




Round 5 testing accuracy: 61.37931034482759 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9614, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:33<01:06, 33.48s/it]

Train accuracy =  tensor(0.9754, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:27, 27.10s/it]

Train accuracy =  tensor(1.0491, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:30<00:00, 30.12s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.63it/s]




Round 6 testing accuracy: 65.51724137931035 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2667, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:11<00:23, 11.51s/it]

Train accuracy =  tensor(0.2500, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:21<00:10, 10.71s/it]

Train accuracy =  tensor(0.4000, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:33<00:00, 11.20s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.85it/s]


Round 0 testing accuracy: 18.96551724137931 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3438, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:30, 15.11s/it]

Train accuracy =  tensor(0.2396, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:26<00:13, 13.07s/it]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.91s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.88it/s]




Round 1 testing accuracy: 18.96551724137931 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5455, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:29, 14.54s/it]

Train accuracy =  tensor(0.7727, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:30<00:15, 15.37s/it]

Train accuracy =  tensor(0.8561, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:45<00:00, 15.03s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.97it/s]




Round 2 testing accuracy: 41.724137931034484 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6667, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:57, 28.85s/it]

Train accuracy =  tensor(0.7544, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:45<00:21, 21.59s/it]

Train accuracy =  tensor(0.8012, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:02<00:00, 20.84s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.85it/s]




Round 3 testing accuracy: 51.03448275862069 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7343, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:36, 18.35s/it]

Train accuracy =  tensor(0.8792, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:40<00:20, 20.71s/it]

Train accuracy =  tensor(0.9227, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:59<00:00, 19.76s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.70it/s]




Round 4 testing accuracy: 61.03448275862069 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8374, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:22<00:45, 22.87s/it]

Train accuracy =  tensor(1.0813, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:22, 22.09s/it]

Train accuracy =  tensor(1.1423, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:19<00:00, 26.49s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.81it/s]




Round 5 testing accuracy: 90.6896551724138 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1404, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:34<01:09, 34.74s/it]

Train accuracy =  tensor(1.2421, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:57<00:27, 27.41s/it]

Train accuracy =  tensor(1.2632, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:30<00:00, 30.15s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.00it/s]




Round 6 testing accuracy: 93.10344827586206 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3667, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.65s/it]

Train accuracy =  tensor(0.4000, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:28<00:15, 15.09s/it]

Train accuracy =  tensor(0.4000, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:37<00:00, 12.40s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.66it/s]


Round 0 testing accuracy: 24.482758620689655 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5208, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.61s/it]

Train accuracy =  tensor(0.4479, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:37<00:17, 17.62s/it]

Train accuracy =  tensor(0.4062, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:48<00:00, 16.03s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.88it/s]




Round 1 testing accuracy: 24.482758620689655 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3939, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:42, 21.39s/it]

Train accuracy =  tensor(0.4470, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:35<00:17, 17.16s/it]

Train accuracy =  tensor(0.6136, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:49<00:00, 16.47s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.75it/s]




Round 2 testing accuracy: 24.482758620689655 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7310, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:37, 18.83s/it]

Train accuracy =  tensor(0.8480, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:50<00:26, 26.12s/it]

Train accuracy =  tensor(0.8421, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:06<00:00, 22.02s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.57it/s]




Round 3 testing accuracy: 43.44827586206896 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7295, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:39, 19.65s/it]

Train accuracy =  tensor(0.8599, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:38<00:19, 19.43s/it]

Train accuracy =  tensor(0.9517, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:08<00:00, 22.95s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.95it/s]




Round 4 testing accuracy: 55.86206896551724 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9675, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:22<00:45, 22.61s/it]

Train accuracy =  tensor(1.0854, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:43<00:21, 21.71s/it]

Train accuracy =  tensor(1.0854, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:16<00:00, 25.61s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.73it/s]




Round 5 testing accuracy: 66.55172413793103 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9789, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:22<00:44, 22.04s/it]

Train accuracy =  tensor(1.1579, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:54<00:27, 27.91s/it]

Train accuracy =  tensor(1.2421, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:28<00:00, 29.56s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.55it/s]




Round 6 testing accuracy: 91.72413793103448 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2167, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.64s/it]

Train accuracy =  tensor(0.3667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:19<00:09,  9.53s/it]

Train accuracy =  tensor(0.3833, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:31<00:00, 10.58s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.82it/s]


Round 0 testing accuracy: 21.379310344827587 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4062, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:11<00:23, 11.65s/it]

Train accuracy =  tensor(0.4792, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:23<00:11, 11.66s/it]

Train accuracy =  tensor(0.5729, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:45<00:00, 15.31s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.89it/s]




Round 1 testing accuracy: 22.758620689655174 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5909, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:13<00:27, 13.82s/it]

Train accuracy =  tensor(0.6894, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:32<00:16, 16.42s/it]

Train accuracy =  tensor(0.6364, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:57<00:00, 19.06s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.83it/s]




Round 2 testing accuracy: 27.241379310344826 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6199, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:36, 18.41s/it]

Train accuracy =  tensor(0.7719, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:34<00:17, 17.25s/it]

Train accuracy =  tensor(0.7544, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:52<00:00, 17.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.03it/s]




Round 3 testing accuracy: 36.206896551724135 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7246, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:30<01:00, 30.05s/it]

Train accuracy =  tensor(0.9614, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:50<00:24, 24.13s/it]

Train accuracy =  tensor(1.0193, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:09<00:00, 23.14s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  2.02it/s]




Round 4 testing accuracy: 59.310344827586206 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9431, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:34<01:08, 34.39s/it]

Train accuracy =  tensor(1.0528, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:07<00:33, 33.39s/it]

Train accuracy =  tensor(1.1016, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:27<00:00, 29.03s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.14it/s]




Round 5 testing accuracy: 62.758620689655174 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0982, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:44<01:28, 44.45s/it]

Train accuracy =  tensor(1.2000, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:06<00:31, 31.51s/it]

Train accuracy =  tensor(1.2456, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:43<00:00, 34.54s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.02it/s]




Round 6 testing accuracy: 84.48275862068965 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2500, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:18,  9.09s/it]

Train accuracy =  tensor(0.1667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:19<00:09,  9.99s/it]

Train accuracy =  tensor(0.3500, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:29<00:00,  9.76s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.03it/s]


Round 0 testing accuracy: 19.310344827586206 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3646, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:29, 14.81s/it]

Train accuracy =  tensor(0.4583, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:27<00:13, 13.45s/it]

Train accuracy =  tensor(0.5000, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.78s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:13<00:00,  2.74it/s]




Round 1 testing accuracy: 19.310344827586206 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5152, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:24<00:48, 24.33s/it]

Train accuracy =  tensor(0.5909, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:48<00:24, 24.40s/it]

Train accuracy =  tensor(0.6742, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:02<00:00, 20.82s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.96it/s]




Round 2 testing accuracy: 37.58620689655172 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5439, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:17<00:35, 17.55s/it]

Train accuracy =  tensor(0.5965, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:34<00:17, 17.04s/it]

Train accuracy =  tensor(0.5673, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:51<00:00, 17.11s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.75it/s]




Round 3 testing accuracy: 37.93103448275862 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5556, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:40, 20.38s/it]

Train accuracy =  tensor(0.5700, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:51<00:26, 26.47s/it]

Train accuracy =  tensor(0.7343, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:11<00:00, 23.78s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.78it/s]




Round 4 testing accuracy: 46.89655172413793 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6992, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:41, 20.68s/it]

Train accuracy =  tensor(0.7602, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:42<00:21, 21.16s/it]

Train accuracy =  tensor(0.7520, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:06<00:00, 22.01s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.89it/s]




Round 5 testing accuracy: 52.758620689655174 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8561, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.91s/it]

Train accuracy =  tensor(0.9368, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:48<00:24, 24.23s/it]

Train accuracy =  tensor(0.9895, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:22<00:00, 27.55s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.69it/s]




Round 6 testing accuracy: 69.6551724137931 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3500, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:11<00:23, 11.91s/it]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:33<00:17, 17.42s/it]

Train accuracy =  tensor(0.3667, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:43<00:00, 14.38s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.85it/s]


Round 0 testing accuracy: 20.344827586206897 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3750, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:30, 15.04s/it]

Train accuracy =  tensor(0.4271, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:26<00:12, 12.77s/it]

Train accuracy =  tensor(0.4062, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:37<00:00, 12.48s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.67it/s]




Round 1 testing accuracy: 37.24137931034483 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4015, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:13<00:27, 13.64s/it]

Train accuracy =  tensor(0.4773, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:29<00:14, 14.67s/it]

Train accuracy =  tensor(0.6667, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:44<00:00, 14.81s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.92it/s]




Round 2 testing accuracy: 36.896551724137936 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5380, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:31, 15.71s/it]

Train accuracy =  tensor(0.6257, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:31<00:15, 15.64s/it]

Train accuracy =  tensor(0.6550, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:56<00:00, 18.91s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.93it/s]




Round 3 testing accuracy: 44.13793103448276 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7005, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.56s/it]

Train accuracy =  tensor(0.8502, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:21, 22.00s/it]

Train accuracy =  tensor(0.8937, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:03<00:00, 21.00s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.97it/s]




Round 4 testing accuracy: 54.137931034482754 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8455, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:40, 20.36s/it]

Train accuracy =  tensor(1.0488, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:54<00:28, 28.67s/it]

Train accuracy =  tensor(1.1098, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:26<00:00, 28.93s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.65it/s]




Round 5 testing accuracy: 76.55172413793103 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1., device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:27<00:54, 27.16s/it]

Train accuracy =  tensor(1.0596, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:03<00:32, 32.61s/it]

Train accuracy =  tensor(1.1579, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:26<00:00, 28.95s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.14it/s]




Round 6 testing accuracy: 89.65517241379311 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3500, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:10<00:20, 10.24s/it]

Train accuracy =  tensor(0.3833, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:32<00:17, 17.45s/it]

Train accuracy =  tensor(0.4167, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:42<00:00, 14.23s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.61it/s]


Round 0 testing accuracy: 18.620689655172416 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2396, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:24, 12.40s/it]

Train accuracy =  tensor(0.2812, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:26<00:13, 13.12s/it]

Train accuracy =  tensor(0.3021, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:49<00:00, 16.53s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.47it/s]




Round 1 testing accuracy: 23.448275862068964 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5909, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:13<00:27, 13.83s/it]

Train accuracy =  tensor(0.8485, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:27<00:13, 13.95s/it]

Train accuracy =  tensor(0.8864, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:41<00:00, 13.79s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.13it/s]




Round 2 testing accuracy: 46.206896551724135 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7602, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:38, 19.16s/it]

Train accuracy =  tensor(0.8596, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:34<00:16, 16.77s/it]

Train accuracy =  tensor(0.8830, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:01<00:00, 20.40s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.01it/s]




Round 3 testing accuracy: 52.06896551724138 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8502, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:36, 18.29s/it]

Train accuracy =  tensor(1.0338, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:36<00:18, 18.43s/it]

Train accuracy =  tensor(1.1401, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:06<00:00, 22.04s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.57it/s]




Round 4 testing accuracy: 85.17241379310346 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0366, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:32<01:04, 32.08s/it]

Train accuracy =  tensor(1.1463, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:53<00:25, 25.55s/it]

Train accuracy =  tensor(1.1829, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:18<00:00, 26.02s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:13<00:00,  2.84it/s]




Round 5 testing accuracy: 93.10344827586206 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1298, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:47<01:35, 47.94s/it]

Train accuracy =  tensor(1.1825, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:10<00:33, 33.01s/it]

Train accuracy =  tensor(1.2491, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:35<00:00, 31.89s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.98it/s]




Round 6 testing accuracy: 94.13793103448276 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5000, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:24, 12.23s/it]

Train accuracy =  tensor(0.3667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:22<00:11, 11.28s/it]

Train accuracy =  tensor(0.6000, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:32<00:00, 10.99s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.66it/s]


Round 0 testing accuracy: 21.72413793103448 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3854, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:24, 12.43s/it]

Train accuracy =  tensor(0.4792, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:25<00:12, 12.75s/it]

Train accuracy =  tensor(0.4792, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:51<00:00, 17.02s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.96it/s]




Round 1 testing accuracy: 32.068965517241374 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5076, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:31, 15.57s/it]

Train accuracy =  tensor(0.6667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:40<00:20, 20.85s/it]

Train accuracy =  tensor(0.7803, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:05<00:00, 21.89s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.17it/s]




Round 2 testing accuracy: 36.206896551724135 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7895, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:38, 19.45s/it]

Train accuracy =  tensor(0.9708, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:34<00:16, 16.92s/it]

Train accuracy =  tensor(0.9766, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:51<00:00, 17.15s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.94it/s]




Round 3 testing accuracy: 50.689655172413794 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9469, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:36, 18.27s/it]

Train accuracy =  tensor(1.0338, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:36<00:18, 18.46s/it]

Train accuracy =  tensor(1.0580, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:55<00:00, 18.52s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.99it/s]




Round 4 testing accuracy: 59.6551724137931 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8699, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:41, 20.51s/it]

Train accuracy =  tensor(0.9431, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:52<00:27, 27.49s/it]

Train accuracy =  tensor(0.9512, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:15<00:00, 25.25s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.74it/s]




Round 5 testing accuracy: 60.0 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8386, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:36<01:13, 36.94s/it]

Train accuracy =  tensor(0.8842, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:59<00:28, 28.56s/it]

Train accuracy =  tensor(0.8947, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:35<00:00, 31.79s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.07it/s]




Round 6 testing accuracy: 61.03448275862069 %
Samples: 474


In [17]:
acc_list_kc_list

[[19.310344827586206,
  31.724137931034484,
  46.55172413793103,
  46.55172413793103,
  55.51724137931034,
  51.37931034482759,
  60.0],
 [24.137931034482758,
  42.06896551724138,
  37.58620689655172,
  36.55172413793103,
  66.20689655172414,
  86.89655172413792,
  84.82758620689656],
 [21.03448275862069,
  37.93103448275862,
  38.96551724137931,
  43.44827586206896,
  45.17241379310345,
  61.37931034482759,
  65.51724137931035],
 [19.96164712105796,
  29.50883438950384,
  48.512695143976,
  44.679072525800564,
  54.39057618246424,
  49.10938790889123,
  60.325002454240675],
 [22.540053983224638,
  41.111452864068816,
  39.731080579076625,
  34.56282044048536,
  65.30283013005935,
  86.31888070768126,
  87.13711713103736],
 [21.379310344827587,
  22.758620689655174,
  27.241379310344826,
  36.206896551724135,
  59.310344827586206,
  62.758620689655174,
  84.48275862068965],
 [19.310344827586206,
  19.310344827586206,
  37.58620689655172,
  37.93103448275862,
  46.89655172413793,
  52.7