In [1]:
import torch
import numpy as np
from utils import get_dataset, get_net, get_strategy
import pickle

In [2]:
samples = np.load("samples_bert.npy")

In [3]:
samples

array([  0, 100, 161, 221, 282, 344, 409, 474])

In [4]:
# fix random seed
# np.random.seed(1)
# torch.manual_seed(1)
torch.backends.cudnn.enabled = False
name = "BERT"
strat = "KCenterGreedy"
# device
acc_list_kc_list = []
agr_list_kc_list = []
kl_list_kc_list = []

for i in range(10):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:1" if use_cuda else "cpu")

    # dataset = Data(unlab_x, unlab_y, test_x, test_y, cifar_handler) 
    dataset = get_dataset(name)    # load dataset
    net = get_net(name, device, y_num = 10)            # load network
    strategy = get_strategy(strat)(dataset, net)  # load strategy
    initial = samples[1]
    # start experiment
    dataset.initialize_labels(initial)
    print(f"number of labeled pool: {initial}")
    print(f"number of unlabeled pool: {dataset.n_pool-initial}")
    print(f"number of testing pool: {dataset.n_test}")
    print()

    # round 0 accuracy
    print("Round 0")
    strategy.train()
    preds, agr, kl = strategy.predict(dataset.get_test_data())
    acc_list_kc = []
    agr_list_kc = []
    kl_list_kc = []
    acc = dataset.cal_test_acc(preds)*100
    acc_list_kc.append(acc)
    agr_list_kc.append(agr)
    kl_list_kc.append(kl)
    print(f"Round 0 testing accuracy: {acc} %")
    print("Samples:", samples[1])
    for rd in range(1, len(samples)-1):
        print(f"Round {rd}")
        budget = samples[rd+1] - samples[rd]
        # query
        query_idxs = strategy.query(budget)
        # update labels
        strategy.update(query_idxs)
        strategy.train()

        # calculate accuracy
        preds, agr, kl = strategy.predict(dataset.get_test_data())
        acc = dataset.cal_test_acc(preds)*100
        print('\n')
        print(f"Round {rd} testing accuracy: {acc} %")
        acc_list_kc.append(acc)
        agr_list_kc.append(agr)
        kl_list_kc.append(kl)
        print("Samples:", samples[rd+1])
        torch.save(strategy.net.clf.state_dict(), "./extracted_models/"+name+"_"+strat+"_"+str(i)+str(3)+".pt")
    acc_list_kc_list.append(acc_list_kc)
    agr_list_kc_list.append(agr_list_kc)
    kl_list_kc_list.append(kl_list_kc)

    with open("./results/acc"+name+strat+str(3), "wb") as fp:
      pickle.dump(acc_list_kc_list, fp)
    with open("./results/agr"+name+strat+str(3), "wb") as fp:
      pickle.dump(agr_list_kc_list, fp)
    with open("./results/kl"+name+strat+str(3), "wb") as fp:
      pickle.dump(kl_list_kc_list, fp)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2833, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:11<00:23, 11.59s/it]

Train accuracy =  tensor(0.2833, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:22<00:10, 10.98s/it]

Train accuracy =  tensor(0.4667, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:31<00:00, 10.43s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.98it/s]


Round 0 testing accuracy: 25.17241379310345 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 225.89it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3646, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:16<00:32, 16.40s/it]

Train accuracy =  tensor(0.4375, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:38<00:19, 19.79s/it]

Train accuracy =  tensor(0.5000, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:52<00:00, 17.47s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.33it/s]




Round 1 testing accuracy: 31.724137931034484 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 190.43it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5227, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:46, 23.40s/it]

Train accuracy =  tensor(0.5909, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:38<00:18, 18.28s/it]

Train accuracy =  tensor(0.6288, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:52<00:00, 17.61s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.52it/s]




Round 2 testing accuracy: 26.551724137931032 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 157.18it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6842, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:30, 15.35s/it]

Train accuracy =  tensor(0.7193, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:31<00:15, 15.71s/it]

Train accuracy =  tensor(0.7310, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:47<00:00, 15.72s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.93it/s]




Round 3 testing accuracy: 38.275862068965516 %
Samples: 282
Round 4


100%|███████████████████████████████████████████████████████████████| 62/62 [00:03<00:00, 17.70it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7585, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:40, 20.29s/it]

Train accuracy =  tensor(0.8502, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:40<00:20, 20.16s/it]

Train accuracy =  tensor(0.8406, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:05<00:00, 21.95s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.73it/s]




Round 4 testing accuracy: 41.03448275862069 %
Samples: 344
Round 5


100%|██████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 110.00it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8537, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:27<00:54, 27.01s/it]

Train accuracy =  tensor(0.9146, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:47<00:23, 23.21s/it]

Train accuracy =  tensor(0.9106, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:07<00:00, 22.59s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.89it/s]




Round 5 testing accuracy: 41.03448275862069 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 91.53it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9158, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:24<00:48, 24.42s/it]

Train accuracy =  tensor(0.9789, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:50<00:25, 25.24s/it]

Train accuracy =  tensor(0.9860, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:22<00:00, 27.53s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.13it/s]




Round 6 testing accuracy: 41.37931034482759 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2000, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.71s/it]

Train accuracy =  tensor(0.2833, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:23<00:11, 11.87s/it]

Train accuracy =  tensor(0.2667, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:34<00:00, 11.66s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:17<00:00,  2.08it/s]


Round 0 testing accuracy: 24.82758620689655 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 267.55it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3854, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:28, 14.29s/it]

Train accuracy =  tensor(0.5104, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:26<00:12, 12.94s/it]

Train accuracy =  tensor(0.5729, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.97s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.81it/s]




Round 1 testing accuracy: 28.620689655172416 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 184.72it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4924, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:39, 19.78s/it]

Train accuracy =  tensor(0.5303, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:36<00:18, 18.07s/it]

Train accuracy =  tensor(0.5606, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:50<00:00, 16.83s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:09<00:00,  4.04it/s]




Round 2 testing accuracy: 25.17241379310345 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 147.00it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4971, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:27<00:55, 27.70s/it]

Train accuracy =  tensor(0.6374, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:21, 21.06s/it]

Train accuracy =  tensor(0.6433, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:00<00:00, 20.25s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.92it/s]




Round 3 testing accuracy: 40.0 %
Samples: 282
Round 4


100%|██████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 133.62it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6570, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:57, 28.93s/it]

Train accuracy =  tensor(0.7826, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:46<00:22, 22.29s/it]

Train accuracy =  tensor(0.8454, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:04<00:00, 21.52s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.28it/s]




Round 4 testing accuracy: 68.62068965517241 %
Samples: 344
Round 5


100%|███████████████████████████████████████████████████████████████| 65/65 [00:01<00:00, 45.16it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8902, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:35<01:10, 35.31s/it]

Train accuracy =  tensor(0.9756, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:57<00:27, 27.56s/it]

Train accuracy =  tensor(1.0488, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:31<00:00, 30.50s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.71it/s]




Round 5 testing accuracy: 72.41379310344827 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 70.57it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9860, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:42<01:24, 42.13s/it]

Train accuracy =  tensor(1.1088, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:07<00:32, 32.41s/it]

Train accuracy =  tensor(1.1509, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:31<00:00, 30.44s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:21<00:00,  1.75it/s]




Round 6 testing accuracy: 93.44827586206897 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2500, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:11<00:22, 11.32s/it]

Train accuracy =  tensor(0.3667, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:21<00:10, 10.86s/it]

Train accuracy =  tensor(0.3667, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:31<00:00, 10.62s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.03it/s]


Round 0 testing accuracy: 24.82758620689655 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 270.17it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3438, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.74s/it]

Train accuracy =  tensor(0.3646, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:46<00:23, 23.19s/it]

Train accuracy =  tensor(0.5417, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:57<00:00, 19.22s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.01it/s]




Round 1 testing accuracy: 29.310344827586203 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 184.15it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4394, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:13<00:26, 13.13s/it]

Train accuracy =  tensor(0.5379, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:27<00:14, 14.10s/it]

Train accuracy =  tensor(0.5909, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:41<00:00, 13.76s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.87it/s]




Round 2 testing accuracy: 39.310344827586206 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 151.56it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5439, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:36, 18.37s/it]

Train accuracy =  tensor(0.6550, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:45<00:23, 23.29s/it]

Train accuracy =  tensor(0.6842, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:00<00:00, 20.17s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.97it/s]




Round 3 testing accuracy: 50.0 %
Samples: 282
Round 4


100%|██████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 123.04it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7440, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:38, 19.13s/it]

Train accuracy =  tensor(0.7923, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:50<00:26, 26.32s/it]

Train accuracy =  tensor(0.8406, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:07<00:00, 22.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.25it/s]




Round 4 testing accuracy: 56.896551724137936 %
Samples: 344
Round 5


100%|██████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 104.38it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7683, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:39, 19.70s/it]

Train accuracy =  tensor(0.8780, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:40<00:20, 20.56s/it]

Train accuracy =  tensor(0.9675, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:07<00:00, 22.59s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.71it/s]




Round 5 testing accuracy: 64.82758620689654 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 87.33it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9123, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:26<00:52, 26.25s/it]

Train accuracy =  tensor(0.9789, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:01<00:31, 31.29s/it]

Train accuracy =  tensor(0.9965, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:24<00:00, 28.31s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.73it/s]




Round 6 testing accuracy: 72.06896551724138 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2333, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:22<00:45, 22.63s/it]

Train accuracy =  tensor(0.3333, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:32<00:15, 15.29s/it]

Train accuracy =  tensor(0.3333, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:42<00:00, 14.22s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.23it/s]


Round 0 testing accuracy: 28.27586206896552 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 116.37it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3750, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:42, 21.36s/it]

Train accuracy =  tensor(0.4479, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:32<00:15, 15.34s/it]

Train accuracy =  tensor(0.5521, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:43<00:00, 14.51s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.05it/s]




Round 1 testing accuracy: 46.55172413793103 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 167.43it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5758, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:26<00:52, 26.30s/it]

Train accuracy =  tensor(0.6439, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:51<00:25, 25.61s/it]

Train accuracy =  tensor(0.6818, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:08<00:00, 22.80s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.96it/s]




Round 2 testing accuracy: 47.241379310344826 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 149.23it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7778, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:17<00:34, 17.14s/it]

Train accuracy =  tensor(0.7778, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:33<00:16, 16.61s/it]

Train accuracy =  tensor(0.7778, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:00<00:00, 20.16s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.07it/s]




Round 3 testing accuracy: 47.58620689655172 %
Samples: 282
Round 4


100%|███████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 88.27it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8116, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:37, 18.69s/it]

Train accuracy =  tensor(0.8068, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:37<00:19, 19.02s/it]

Train accuracy =  tensor(0.8309, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:08<00:00, 22.82s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.75it/s]




Round 4 testing accuracy: 47.93103448275862 %
Samples: 344
Round 5


100%|██████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 112.79it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8862, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:25<00:51, 25.52s/it]

Train accuracy =  tensor(0.8984, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:55<00:28, 28.41s/it]

Train accuracy =  tensor(0.9228, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:25<00:00, 28.64s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:06<00:00,  5.39it/s]




Round 5 testing accuracy: 47.58620689655172 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:01<00:00, 45.42it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8912, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:22<00:44, 22.00s/it]

Train accuracy =  tensor(0.9649, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:29, 29.59s/it]

Train accuracy =  tensor(0.9404, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:20<00:00, 26.77s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:11<00:00,  3.35it/s]




Round 6 testing accuracy: 47.58620689655172 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3333, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:43, 21.67s/it]

Train accuracy =  tensor(0.3167, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:32<00:15, 15.36s/it]

Train accuracy =  tensor(0.2500, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:42<00:00, 14.15s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.00it/s]


Round 0 testing accuracy: 30.344827586206897 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 216.90it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2604, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:24<00:48, 24.00s/it]

Train accuracy =  tensor(0.3229, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:43<00:21, 21.16s/it]

Train accuracy =  tensor(0.2917, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:58<00:00, 19.46s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.80it/s]




Round 1 testing accuracy: 20.689655172413794 %
Samples: 161
Round 2


100%|███████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 93.88it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4697, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:56, 28.08s/it]

Train accuracy =  tensor(0.5682, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:21, 21.15s/it]

Train accuracy =  tensor(0.5682, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:58<00:00, 19.67s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.88it/s]




Round 2 testing accuracy: 46.206896551724135 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 155.67it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6784, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:31<01:02, 31.09s/it]

Train accuracy =  tensor(0.7953, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:47<00:22, 22.49s/it]

Train accuracy =  tensor(0.8012, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:05<00:00, 21.93s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.41it/s]




Round 3 testing accuracy: 49.6551724137931 %
Samples: 282
Round 4


100%|███████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 70.70it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7729, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:30<01:00, 30.18s/it]

Train accuracy =  tensor(0.8406, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:00<00:30, 30.33s/it]

Train accuracy =  tensor(0.8599, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:19<00:00, 26.59s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.66it/s]




Round 4 testing accuracy: 52.06896551724138 %
Samples: 344
Round 5


100%|██████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 102.85it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8333, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:32<01:04, 32.15s/it]

Train accuracy =  tensor(0.9106, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:51<00:24, 24.76s/it]

Train accuracy =  tensor(0.9593, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:22<00:00, 27.36s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.00it/s]




Round 5 testing accuracy: 60.0 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 91.30it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9439, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:35<01:11, 35.80s/it]

Train accuracy =  tensor(1.0351, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:59<00:28, 28.82s/it]

Train accuracy =  tensor(1.1404, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:31<00:00, 30.36s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.02it/s]




Round 6 testing accuracy: 70.34482758620689 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2667, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.98s/it]

Train accuracy =  tensor(0.3333, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:19<00:09,  9.87s/it]

Train accuracy =  tensor(0.2833, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:28<00:00,  9.60s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.93it/s]


Round 0 testing accuracy: 20.689655172413794 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 271.00it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2708, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:30, 15.25s/it]

Train accuracy =  tensor(0.3542, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:26<00:13, 13.18s/it]

Train accuracy =  tensor(0.4271, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:49<00:00, 16.52s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.60it/s]




Round 1 testing accuracy: 33.44827586206897 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 185.61it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4848, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:43, 21.65s/it]

Train accuracy =  tensor(0.5682, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:35<00:17, 17.11s/it]

Train accuracy =  tensor(0.6818, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:48<00:00, 16.29s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.93it/s]




Round 2 testing accuracy: 38.62068965517241 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 146.37it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5965, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:26<00:53, 26.79s/it]

Train accuracy =  tensor(0.7485, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:43<00:21, 21.05s/it]

Train accuracy =  tensor(0.7193, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:12<00:00, 24.02s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.09it/s]




Round 3 testing accuracy: 48.275862068965516 %
Samples: 282
Round 4


100%|██████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 129.35it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7778, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.56s/it]

Train accuracy =  tensor(0.8599, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:21, 21.78s/it]

Train accuracy =  tensor(0.9324, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:10<00:00, 23.44s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.99it/s]




Round 4 testing accuracy: 63.793103448275865 %
Samples: 344
Round 5


100%|██████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 101.01it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8740, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:41, 20.87s/it]

Train accuracy =  tensor(0.9878, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:51<00:26, 26.87s/it]

Train accuracy =  tensor(1.0244, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:15<00:00, 25.02s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.00it/s]




Round 5 testing accuracy: 84.82758620689656 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 96.73it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9930, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:40<01:20, 40.37s/it]

Train accuracy =  tensor(1.1088, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:05<00:31, 31.43s/it]

Train accuracy =  tensor(1.1789, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:37<00:00, 32.63s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.64it/s]




Round 6 testing accuracy: 88.96551724137932 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3333, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.73s/it]

Train accuracy =  tensor(0.3833, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:18<00:09,  9.17s/it]

Train accuracy =  tensor(0.3833, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:27<00:00,  9.24s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:09<00:00,  3.83it/s]


Round 0 testing accuracy: 16.206896551724135 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 117.79it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3333, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:41, 20.59s/it]

Train accuracy =  tensor(0.3854, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:42<00:21, 21.59s/it]

Train accuracy =  tensor(0.4479, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:54<00:00, 18.32s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.56it/s]




Round 1 testing accuracy: 32.068965517241374 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 188.71it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4394, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:29, 14.57s/it]

Train accuracy =  tensor(0.5227, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:40<00:21, 21.39s/it]

Train accuracy =  tensor(0.6061, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:57<00:00, 19.02s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.06it/s]




Round 2 testing accuracy: 25.17241379310345 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 152.08it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5322, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:56, 28.31s/it]

Train accuracy =  tensor(0.6784, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:21, 21.38s/it]

Train accuracy =  tensor(0.7778, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:02<00:00, 20.82s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.96it/s]




Round 3 testing accuracy: 31.379310344827587 %
Samples: 282
Round 4


100%|██████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 114.10it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8164, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:38, 19.50s/it]

Train accuracy =  tensor(0.9034, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:40<00:20, 20.46s/it]

Train accuracy =  tensor(0.9662, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:11<00:00, 23.72s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.41it/s]




Round 4 testing accuracy: 61.724137931034484 %
Samples: 344
Round 5


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 97.15it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9390, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:42, 21.28s/it]

Train accuracy =  tensor(1.0528, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:41<00:20, 20.92s/it]

Train accuracy =  tensor(1.0732, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:17<00:00, 25.93s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.71it/s]




Round 5 testing accuracy: 71.37931034482759 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:01<00:00, 49.98it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0596, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:46, 23.38s/it]

Train accuracy =  tensor(1.1439, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:29, 29.01s/it]

Train accuracy =  tensor(1.1825, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:18<00:00, 26.07s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.98it/s]




Round 6 testing accuracy: 74.48275862068967 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2833, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.55s/it]

Train accuracy =  tensor(0.5000, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:19<00:09,  9.64s/it]

Train accuracy =  tensor(0.3000, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:39<00:00, 13.23s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.84it/s]


Round 0 testing accuracy: 18.620689655172416 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 247.44it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5000, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:25, 12.59s/it]

Train accuracy =  tensor(0.5833, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:25<00:12, 12.83s/it]

Train accuracy =  tensor(0.5833, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:37<00:00, 12.53s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.95it/s]




Round 1 testing accuracy: 28.27586206896552 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 185.61it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5227, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:29, 14.64s/it]

Train accuracy =  tensor(0.5909, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:29<00:14, 14.90s/it]

Train accuracy =  tensor(0.5985, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:56<00:00, 18.98s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  2.01it/s]




Round 2 testing accuracy: 32.41379310344827 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 145.55it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5556, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:17<00:34, 17.22s/it]

Train accuracy =  tensor(0.6491, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:23, 23.35s/it]

Train accuracy =  tensor(0.7427, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:02<00:00, 20.69s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.17it/s]




Round 3 testing accuracy: 46.55172413793103 %
Samples: 282
Round 4


100%|███████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 72.83it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7536, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:18<00:36, 18.32s/it]

Train accuracy =  tensor(0.8551, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:37<00:18, 18.83s/it]

Train accuracy =  tensor(0.8599, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:57<00:00, 19.08s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  1.95it/s]




Round 4 testing accuracy: 54.48275862068965 %
Samples: 344
Round 5


100%|██████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 103.74it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8537, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:46, 23.31s/it]

Train accuracy =  tensor(1.0244, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:28, 28.97s/it]

Train accuracy =  tensor(1.1382, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:17<00:00, 25.84s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.52it/s]




Round 5 testing accuracy: 74.82758620689656 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 89.34it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0596, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:33<01:06, 33.30s/it]

Train accuracy =  tensor(1.2140, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:27, 27.63s/it]

Train accuracy =  tensor(1.2561, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:22<00:00, 27.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.64it/s]




Round 6 testing accuracy: 80.0 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2333, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:08<00:17,  8.90s/it]

Train accuracy =  tensor(0.3667, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:17<00:08,  8.76s/it]

Train accuracy =  tensor(0.3833, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:26<00:00,  8.73s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.27it/s]


Round 0 testing accuracy: 36.55172413793103 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 294.27it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3021, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:16<00:32, 16.28s/it]

Train accuracy =  tensor(0.4375, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:29<00:14, 14.40s/it]

Train accuracy =  tensor(0.4792, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:41<00:00, 13.95s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.72it/s]




Round 1 testing accuracy: 41.724137931034484 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 180.64it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6061, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:28, 14.12s/it]

Train accuracy =  tensor(0.6515, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:31<00:16, 16.06s/it]

Train accuracy =  tensor(0.6136, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:56<00:00, 18.82s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.06it/s]




Round 2 testing accuracy: 44.827586206896555 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 155.17it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6901, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:29<00:58, 29.38s/it]

Train accuracy =  tensor(0.7368, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:45<00:21, 21.46s/it]

Train accuracy =  tensor(0.8363, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:02<00:00, 20.80s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.45it/s]




Round 3 testing accuracy: 60.3448275862069 %
Samples: 282
Round 4


100%|██████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 125.79it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8019, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:42, 21.19s/it]

Train accuracy =  tensor(0.9179, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:40<00:20, 20.09s/it]

Train accuracy =  tensor(0.9710, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:10<00:00, 23.64s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.72it/s]




Round 4 testing accuracy: 68.27586206896552 %
Samples: 344
Round 5


100%|██████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 103.89it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8984, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:21<00:43, 21.59s/it]

Train accuracy =  tensor(0.9919, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:55<00:28, 28.73s/it]

Train accuracy =  tensor(0.9959, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:16<00:00, 25.52s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.92it/s]




Round 5 testing accuracy: 72.06896551724138 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 78.56it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9895, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:36<01:13, 36.88s/it]

Train accuracy =  tensor(1.0807, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:59<00:28, 28.47s/it]

Train accuracy =  tensor(1.1088, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:31<00:00, 30.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.27it/s]




Round 6 testing accuracy: 77.58620689655173 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3333, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:09<00:19,  9.52s/it]

Train accuracy =  tensor(0.4000, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:24<00:12, 12.55s/it]

Train accuracy =  tensor(0.4500, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:33<00:00, 11.26s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.93it/s]


Round 0 testing accuracy: 25.517241379310345 %
Samples: 100
Round 1


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 283.75it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4792, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:11<00:23, 11.89s/it]

Train accuracy =  tensor(0.3229, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:23<00:11, 11.59s/it]

Train accuracy =  tensor(0.4167, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:45<00:00, 15.18s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.24it/s]




Round 1 testing accuracy: 27.93103448275862 %
Samples: 161
Round 2


100%|██████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 183.24it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3864, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:17<00:34, 17.07s/it]

Train accuracy =  tensor(0.4470, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:30<00:15, 15.17s/it]

Train accuracy =  tensor(0.5682, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:47<00:00, 15.75s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:19<00:00,  1.92it/s]




Round 2 testing accuracy: 36.896551724137936 %
Samples: 221
Round 3


100%|██████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 147.67it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4912, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:56, 28.31s/it]

Train accuracy =  tensor(0.6491, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:28, 28.06s/it]

Train accuracy =  tensor(0.5848, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:11<00:00, 23.86s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:09<00:00,  4.08it/s]




Round 3 testing accuracy: 41.37931034482759 %
Samples: 282
Round 4


100%|██████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 123.67it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6763, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:57, 28.65s/it]

Train accuracy =  tensor(0.7681, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:46<00:22, 22.57s/it]

Train accuracy =  tensor(0.7536, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:15<00:00, 25.32s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.14it/s]




Round 4 testing accuracy: 41.724137931034484 %
Samples: 344
Round 5


100%|███████████████████████████████████████████████████████████████| 65/65 [00:01<00:00, 49.29it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8618, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:33<01:07, 33.53s/it]

Train accuracy =  tensor(0.9065, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:05<00:32, 32.86s/it]

Train accuracy =  tensor(0.9512, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:27<00:00, 29.17s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:09<00:00,  4.00it/s]




Round 5 testing accuracy: 54.48275862068965 %
Samples: 409
Round 6


100%|███████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 87.54it/s]


Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9684, device='cuda:1')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:34<01:09, 34.52s/it]

Train accuracy =  tensor(1.0351, device='cuda:1')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:58<00:28, 28.05s/it]

Train accuracy =  tensor(1.0351, device='cuda:1')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:31<00:00, 30.47s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:09<00:00,  4.04it/s]




Round 6 testing accuracy: 58.620689655172406 %
Samples: 474


In [6]:
acc_list_kc_list

[[25.17241379310345,
  31.724137931034484,
  26.551724137931032,
  38.275862068965516,
  41.03448275862069,
  41.03448275862069,
  41.37931034482759],
 [24.82758620689655,
  28.620689655172416,
  25.17241379310345,
  40.0,
  68.62068965517241,
  72.41379310344827,
  93.44827586206897],
 [24.82758620689655,
  29.310344827586203,
  39.310344827586206,
  50.0,
  56.896551724137936,
  64.82758620689654,
  72.06896551724138],
 [28.27586206896552,
  46.55172413793103,
  47.241379310344826,
  47.58620689655172,
  47.93103448275862,
  47.58620689655172,
  47.58620689655172],
 [30.344827586206897,
  20.689655172413794,
  46.206896551724135,
  49.6551724137931,
  52.06896551724138,
  60.0,
  70.34482758620689],
 [20.689655172413794,
  33.44827586206897,
  38.62068965517241,
  48.275862068965516,
  63.793103448275865,
  84.82758620689656,
  88.96551724137932],
 [16.206896551724135,
  32.068965517241374,
  25.17241379310345,
  31.379310344827587,
  61.724137931034484,
  71.37931034482759,
  74.482