In [1]:
import torch
import numpy as np
from utils import get_dataset, get_net, get_strategy
import pickle

In [2]:
samples = np.load("samples_bert.npy")

In [3]:
samples

array([  0, 100, 161, 221, 282, 344, 409, 474])

In [6]:
# fix random seed
# np.random.seed(1)
# torch.manual_seed(1)
torch.backends.cudnn.enabled = False
name = "BERT"
strat = "RandomSampling"
# device
acc_list_kc_list = []
agr_list_kc_list = []
kl_list_kc_list = []

for i in range(10):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # dataset = Data(unlab_x, unlab_y, test_x, test_y, cifar_handler) 
    dataset = get_dataset(name)    # load dataset
    net = get_net(name, device, y_num = 10)            # load network
    strategy = get_strategy(strat)(dataset, net)  # load strategy
    initial = samples[1]
    # start experiment
    dataset.initialize_labels(initial)
    print(f"number of labeled pool: {initial}")
    print(f"number of unlabeled pool: {dataset.n_pool-initial}")
    print(f"number of testing pool: {dataset.n_test}")
    print()

    # round 0 accuracy
    print("Round 0")
    strategy.train()
    preds, agr, kl = strategy.predict(dataset.get_test_data())
    acc_list_kc = []
    agr_list_kc = []
    kl_list_kc = []
    acc = dataset.cal_test_acc(preds)*100
    acc_list_kc.append(acc)
    agr_list_kc.append(agr)
    kl_list_kc.append(kl)
    print(f"Round 0 testing accuracy: {acc} %")
    print("Samples:", samples[1])
    for rd in range(1, len(samples)-1):
        print(f"Round {rd}")
        budget = samples[rd+1] - samples[rd]
        # query
        query_idxs = strategy.query(budget)
        # update labels
        strategy.update(query_idxs)
        strategy.train()

        # calculate accuracy
        preds, agr, kl = strategy.predict(dataset.get_test_data())
        acc = dataset.cal_test_acc(preds)*100
        print('\n')
        print(f"Round {rd} testing accuracy: {acc} %")
        acc_list_kc.append(acc)
        agr_list_kc.append(agr)
        kl_list_kc.append(kl)
        print("Samples:", samples[rd+1])
        torch.save(strategy.net.clf.state_dict(), "./extracted_models/"+name+"_"+strat+"_"+str(i)+str(3)+".pt")
    acc_list_kc_list.append(acc_list_kc)
    agr_list_kc_list.append(agr_list_kc)
    kl_list_kc_list.append(kl_list_kc)

    with open("./results/acc"+name+strat+str(3), "wb") as fp:
      pickle.dump(acc_list_kc_list, fp)
    with open("./results/agr"+name+strat+str(3), "wb") as fp:
      pickle.dump(agr_list_kc_list, fp)
    with open("./results/kl"+name+strat+str(3), "wb") as fp:
      pickle.dump(kl_list_kc_list, fp)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4167, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:10<00:20, 10.02s/it]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:19<00:09,  9.55s/it]

Train accuracy =  tensor(0.3167, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:29<00:00,  9.73s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:11<00:00,  3.14it/s]


Round 0 testing accuracy: 21.72413793103448 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2604, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:14<00:28, 14.01s/it]

Train accuracy =  tensor(0.4062, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:26<00:13, 13.31s/it]

Train accuracy =  tensor(0.3125, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.71s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.19it/s]




Round 1 testing accuracy: 25.17241379310345 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4015, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:40<01:20, 40.32s/it]

Train accuracy =  tensor(0.5227, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:08<00:33, 33.33s/it]

Train accuracy =  tensor(0.5833, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:31<00:00, 30.55s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:10<00:00,  3.59it/s]




Round 2 testing accuracy: 28.620689655172416 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5731, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:20<00:41, 20.63s/it]

Train accuracy =  tensor(0.5848, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:42<00:21, 21.55s/it]

Train accuracy =  tensor(0.6725, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:32<00:00, 30.86s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  2.01it/s]




Round 3 testing accuracy: 27.241379310344826 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7005, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:50<01:41, 50.55s/it]

Train accuracy =  tensor(0.7488, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:18<00:37, 37.47s/it]

Train accuracy =  tensor(0.7488, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:44<00:00, 34.80s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:26<00:00,  1.39it/s]




Round 4 testing accuracy: 28.620689655172416 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7724, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:36<01:13, 36.96s/it]

Train accuracy =  tensor(0.8699, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:39<00:52, 52.16s/it]

Train accuracy =  tensor(0.9634, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:27<00:00, 49.29s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:15<00:00,  2.33it/s]




Round 5 testing accuracy: 60.689655172413794 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0456, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:32<01:04, 32.42s/it]

Train accuracy =  tensor(1.1123, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:08<00:34, 34.31s/it]

Train accuracy =  tensor(1.1298, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:56<00:00, 38.84s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:27<00:00,  1.36it/s]




Round 6 testing accuracy: 74.48275862068967 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3000, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:31, 15.82s/it]

Train accuracy =  tensor(0.2667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:30<00:15, 15.22s/it]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:55<00:00, 18.33s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:11<00:00,  3.35it/s]


Round 0 testing accuracy: 21.379310344827587 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3750, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:16<00:33, 16.57s/it]

Train accuracy =  tensor(0.5000, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:31<00:15, 15.58s/it]

Train accuracy =  tensor(0.4896, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:01<00:00, 20.40s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:15<00:00,  2.34it/s]




Round 1 testing accuracy: 25.862068965517242 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5076, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:38<01:16, 38.43s/it]

Train accuracy =  tensor(0.5909, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:16<00:38, 38.22s/it]

Train accuracy =  tensor(0.6364, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:52<00:00, 37.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:10<00:00,  3.63it/s]




Round 2 testing accuracy: 35.86206896551724 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6550, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:36<01:13, 36.70s/it]

Train accuracy =  tensor(0.7778, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:07<00:33, 33.49s/it]

Train accuracy =  tensor(0.8187, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:41<00:00, 33.76s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  2.04it/s]




Round 3 testing accuracy: 45.17241379310345 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.8889, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:56<01:52, 56.01s/it]

Train accuracy =  tensor(1., device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:36<00:46, 46.79s/it]

Train accuracy =  tensor(1.0531, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:13<00:00, 44.63s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:13<00:00,  2.70it/s]




Round 4 testing accuracy: 59.6551724137931 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0650, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:41<01:22, 41.41s/it]

Train accuracy =  tensor(1.1504, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:15<00:37, 37.34s/it]

Train accuracy =  tensor(1.1748, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:49<00:00, 36.63s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:11<00:00,  3.12it/s]




Round 5 testing accuracy: 73.79310344827587 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1544, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:30<01:01, 30.60s/it]

Train accuracy =  tensor(1.2281, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:16<00:39, 39.39s/it]

Train accuracy =  tensor(1.2386, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:16<00:00, 45.49s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:18<00:00,  2.02it/s]




Round 6 testing accuracy: 90.0 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4167, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:17<00:34, 17.20s/it]

Train accuracy =  tensor(0.4000, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:34<00:17, 17.29s/it]

Train accuracy =  tensor(0.3500, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:56<00:00, 18.72s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:12<00:00,  3.08it/s]


Round 0 testing accuracy: 22.758620689655174 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3958, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:15<00:31, 15.72s/it]

Train accuracy =  tensor(0.4062, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:28<00:14, 14.24s/it]

Train accuracy =  tensor(0.4479, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:56<00:00, 18.72s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.31it/s]




Round 1 testing accuracy: 22.758620689655174 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4015, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:38<01:17, 38.76s/it]

Train accuracy =  tensor(0.4621, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:09<00:33, 33.92s/it]

Train accuracy =  tensor(0.5076, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:34<00:00, 31.53s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:10<00:00,  3.66it/s]




Round 2 testing accuracy: 24.137931034482758 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6199, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.95s/it]

Train accuracy =  tensor(0.7485, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:45<00:22, 22.77s/it]

Train accuracy =  tensor(0.8012, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:11<00:00, 23.88s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.19it/s]




Round 3 testing accuracy: 42.758620689655174 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9565, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:56<01:52, 56.19s/it]

Train accuracy =  tensor(1.1014, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:36<00:47, 47.01s/it]

Train accuracy =  tensor(1.1401, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:05<00:00, 41.96s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:11<00:00,  3.19it/s]




Round 4 testing accuracy: 71.72413793103448 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1016, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:38<01:16, 38.29s/it]

Train accuracy =  tensor(1.1667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:07<00:32, 32.91s/it]

Train accuracy =  tensor(1.1911, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:50<00:00, 36.98s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:17<00:00,  2.16it/s]




Round 5 testing accuracy: 74.48275862068967 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1719, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [01:07<02:15, 67.98s/it]

Train accuracy =  tensor(1.2456, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:52<00:53, 53.93s/it]

Train accuracy =  tensor(1.2807, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:26<00:00, 48.78s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:11<00:00,  3.21it/s]




Round 6 testing accuracy: 80.6896551724138 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2167, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:25, 12.86s/it]

Train accuracy =  tensor(0.3167, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:35<00:18, 18.61s/it]

Train accuracy =  tensor(0.3167, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:50<00:00, 16.83s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.30it/s]


Round 0 testing accuracy: 27.241379310344826 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3125, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:32<01:05, 32.75s/it]

Train accuracy =  tensor(0.3958, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:02<00:31, 31.22s/it]

Train accuracy =  tensor(0.4167, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:17<00:00, 25.84s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:14<00:00,  2.58it/s]




Round 1 testing accuracy: 26.20689655172414 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3864, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:39, 19.64s/it]

Train accuracy =  tensor(0.4848, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:44<00:22, 22.84s/it]

Train accuracy =  tensor(0.5379, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:02<00:00, 20.79s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:10<00:00,  3.62it/s]




Round 2 testing accuracy: 27.93103448275862 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5146, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:44<01:29, 44.52s/it]

Train accuracy =  tensor(0.6316, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:18<00:38, 38.23s/it]

Train accuracy =  tensor(0.6550, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:55<00:00, 38.55s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:14<00:00,  2.60it/s]




Round 3 testing accuracy: 37.24137931034483 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7295, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:30<01:01, 30.81s/it]

Train accuracy =  tensor(0.8454, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:13<00:37, 37.90s/it]

Train accuracy =  tensor(0.9469, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:39<00:00, 33.18s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:23<00:00,  1.58it/s]




Round 4 testing accuracy: 49.310344827586206 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9512, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:29<00:58, 29.03s/it]

Train accuracy =  tensor(1.0447, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:09<00:35, 35.95s/it]

Train accuracy =  tensor(1.1098, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:56<00:00, 38.97s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:27<00:00,  1.34it/s]




Round 5 testing accuracy: 58.9655172413793 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1123, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:38<01:16, 38.47s/it]

Train accuracy =  tensor(1.1684, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:11<00:35, 35.53s/it]

Train accuracy =  tensor(1.1789, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:43<00:00, 34.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:13<00:00,  2.79it/s]




Round 6 testing accuracy: 69.3103448275862 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3167, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:26<00:53, 26.80s/it]

Train accuracy =  tensor(0.2833, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:42<00:20, 20.39s/it]

Train accuracy =  tensor(0.4333, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:58<00:00, 19.57s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.24it/s]


Round 0 testing accuracy: 25.17241379310345 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5104, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:29<00:59, 29.65s/it]

Train accuracy =  tensor(0.5833, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:46<00:21, 21.99s/it]

Train accuracy =  tensor(0.6042, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:11<00:00, 23.79s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:08<00:00,  4.39it/s]




Round 1 testing accuracy: 35.86206896551724 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5682, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:16<00:33, 16.86s/it]

Train accuracy =  tensor(0.5833, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:29, 29.98s/it]

Train accuracy =  tensor(0.7727, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:34<00:00, 31.63s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:25<00:00,  1.43it/s]




Round 2 testing accuracy: 39.6551724137931 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9123, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.91s/it]

Train accuracy =  tensor(1.0409, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:00<00:31, 31.48s/it]

Train accuracy =  tensor(1.0643, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:23<00:00, 27.90s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.30it/s]




Round 3 testing accuracy: 69.3103448275862 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0290, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [01:02<02:05, 62.60s/it]

Train accuracy =  tensor(1.1498, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:32<00:43, 43.36s/it]

Train accuracy =  tensor(1.1836, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:00<00:00, 40.15s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:21<00:00,  1.69it/s]




Round 4 testing accuracy: 81.37931034482759 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1789, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:32<01:05, 32.78s/it]

Train accuracy =  tensor(1.2114, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:10<00:35, 35.48s/it]

Train accuracy =  tensor(1.2520, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:08<00:00, 42.74s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  4.94it/s]




Round 5 testing accuracy: 93.10344827586206 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.2632, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:40<01:21, 40.92s/it]

Train accuracy =  tensor(1.3018, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:32<00:47, 47.18s/it]

Train accuracy =  tensor(1.3018, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:16<00:00, 45.39s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.31it/s]




Round 6 testing accuracy: 93.79310344827586 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:26<00:52, 26.09s/it]

Train accuracy =  tensor(0.3667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:41<00:19, 19.69s/it]

Train accuracy =  tensor(0.2167, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:56<00:00, 18.79s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:07<00:00,  5.07it/s]


Round 0 testing accuracy: 20.689655172413794 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2604, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:24, 12.14s/it]

Train accuracy =  tensor(0.4479, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:31<00:16, 16.26s/it]

Train accuracy =  tensor(0.4375, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:47<00:00, 15.87s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:14<00:00,  2.58it/s]




Round 1 testing accuracy: 25.517241379310345 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4924, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:57, 28.94s/it]

Train accuracy =  tensor(0.5455, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:54<00:26, 26.74s/it]

Train accuracy =  tensor(0.5379, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:11<00:00, 23.93s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:21<00:00,  1.75it/s]




Round 2 testing accuracy: 27.241379310344826 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5380, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:23<00:47, 23.86s/it]

Train accuracy =  tensor(0.5906, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:43<00:21, 21.34s/it]

Train accuracy =  tensor(0.7251, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:06<00:00, 22.19s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.24it/s]




Round 3 testing accuracy: 40.3448275862069 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7440, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:58<01:56, 58.34s/it]

Train accuracy =  tensor(0.8937, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:24<00:39, 39.29s/it]

Train accuracy =  tensor(0.9179, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:48<00:00, 36.27s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:24<00:00,  1.53it/s]




Round 4 testing accuracy: 46.206896551724135 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9024, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:58<01:56, 58.19s/it]

Train accuracy =  tensor(0.9675, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:55<00:57, 57.74s/it]

Train accuracy =  tensor(0.9715, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:25<00:00, 48.57s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:13<00:00,  2.75it/s]




Round 5 testing accuracy: 43.44827586206896 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9825, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:39<01:18, 39.43s/it]

Train accuracy =  tensor(1.0351, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:42<00:53, 53.47s/it]

Train accuracy =  tensor(1.0632, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:35<00:00, 51.92s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:25<00:00,  1.46it/s]




Round 6 testing accuracy: 56.55172413793104 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3000, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:24, 12.25s/it]

Train accuracy =  tensor(0.2333, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:25<00:12, 12.94s/it]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:35<00:00, 11.93s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:10<00:00,  3.43it/s]


Round 0 testing accuracy: 13.448275862068964 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3125, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:30<01:01, 30.79s/it]

Train accuracy =  tensor(0.4271, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:46<00:22, 22.13s/it]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:08<00:00, 22.91s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:27<00:00,  1.36it/s]




Round 1 testing accuracy: 23.79310344827586 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4394, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:27<00:54, 27.24s/it]

Train accuracy =  tensor(0.5682, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:46<00:22, 22.67s/it]

Train accuracy =  tensor(0.6515, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:16<00:00, 25.40s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:11<00:00,  3.26it/s]




Round 2 testing accuracy: 43.44827586206896 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5789, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:26<00:52, 26.31s/it]

Train accuracy =  tensor(0.7076, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:28, 28.47s/it]

Train accuracy =  tensor(0.7018, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:41<00:00, 33.89s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.24it/s]




Round 3 testing accuracy: 45.17241379310345 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7633, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:39<01:19, 39.73s/it]

Train accuracy =  tensor(0.8502, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:12<00:35, 35.38s/it]

Train accuracy =  tensor(0.9565, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:51<00:00, 37.01s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:14<00:00,  2.53it/s]




Round 4 testing accuracy: 55.86206896551724 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9797, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:36<01:12, 36.49s/it]

Train accuracy =  tensor(1.0732, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:04<00:31, 31.27s/it]

Train accuracy =  tensor(1.1423, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:01<00:00, 40.48s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.29it/s]




Round 5 testing accuracy: 82.06896551724138 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1509, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:52<01:44, 52.43s/it]

Train accuracy =  tensor(1.2632, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:40<00:49, 49.74s/it]

Train accuracy =  tensor(1.2807, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:40<00:00, 53.37s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:15<00:00,  2.43it/s]




Round 6 testing accuracy: 95.17241379310344 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3000, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:13<00:26, 13.06s/it]

Train accuracy =  tensor(0.2500, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:27<00:13, 13.90s/it]

Train accuracy =  tensor(0.2333, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:54<00:00, 18.09s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:15<00:00,  2.46it/s]


Round 0 testing accuracy: 21.03448275862069 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3021, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:12<00:25, 12.76s/it]

Train accuracy =  tensor(0.3750, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:45<00:24, 24.28s/it]

Train accuracy =  tensor(0.3542, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:06<00:00, 22.33s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.24it/s]




Round 1 testing accuracy: 21.72413793103448 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3106, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:34<01:08, 34.45s/it]

Train accuracy =  tensor(0.4621, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:51<00:24, 24.32s/it]

Train accuracy =  tensor(0.3712, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:09<00:00, 23.28s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:13<00:00,  2.81it/s]




Round 2 testing accuracy: 20.344827586206897 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4678, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:41<01:22, 41.24s/it]

Train accuracy =  tensor(0.5906, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:26<00:43, 43.69s/it]

Train accuracy =  tensor(0.6784, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:01<00:00, 40.44s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:24<00:00,  1.52it/s]




Round 3 testing accuracy: 46.206896551724135 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7053, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:52<01:44, 52.26s/it]

Train accuracy =  tensor(0.9324, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:19<00:37, 37.35s/it]

Train accuracy =  tensor(0.9469, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:43<00:00, 34.48s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:27<00:00,  1.34it/s]




Round 4 testing accuracy: 66.20689655172414 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.9878, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:47<01:34, 47.22s/it]

Train accuracy =  tensor(1.1057, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:41<00:51, 51.12s/it]

Train accuracy =  tensor(1.1057, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:18<00:00, 46.07s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:10<00:00,  3.52it/s]




Round 5 testing accuracy: 72.06896551724138 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0982, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:51<01:43, 51.60s/it]

Train accuracy =  tensor(1.1544, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:45<00:52, 52.81s/it]

Train accuracy =  tensor(1.1895, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:46<00:00, 55.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:09<00:00,  3.85it/s]




Round 6 testing accuracy: 72.41379310344827 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4333, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:39, 19.94s/it]

Train accuracy =  tensor(0.2667, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:33<00:16, 16.34s/it]

Train accuracy =  tensor(0.4833, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:44<00:00, 14.98s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:22<00:00,  1.64it/s]


Round 0 testing accuracy: 18.96551724137931 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.3229, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:19<00:38, 19.07s/it]

Train accuracy =  tensor(0.4583, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:33<00:16, 16.58s/it]

Train accuracy =  tensor(0.5625, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:55<00:00, 18.47s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.30it/s]




Round 1 testing accuracy: 46.55172413793103 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6212, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:38<01:16, 38.39s/it]

Train accuracy =  tensor(0.7121, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:56<00:26, 26.68s/it]

Train accuracy =  tensor(0.8030, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:16<00:00, 25.54s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:14<00:00,  2.52it/s]




Round 2 testing accuracy: 60.0 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.7953, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:24<00:48, 24.19s/it]

Train accuracy =  tensor(0.9474, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:00<00:31, 31.28s/it]

Train accuracy =  tensor(1.0351, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:18<00:00, 26.30s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.23it/s]




Round 3 testing accuracy: 79.65517241379311 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.0580, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:51<01:42, 51.02s/it]

Train accuracy =  tensor(1.1449, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:24<00:40, 40.71s/it]

Train accuracy =  tensor(1.1884, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:06<00:00, 42.26s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:14<00:00,  2.60it/s]




Round 4 testing accuracy: 80.3448275862069 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.1545, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:33<01:07, 33.89s/it]

Train accuracy =  tensor(1.2154, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:06<00:33, 33.35s/it]

Train accuracy =  tensor(1.2480, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:35<00:00, 31.79s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.26it/s]




Round 5 testing accuracy: 86.89655172413792 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(1.2140, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:29<00:59, 29.96s/it]

Train accuracy =  tensor(1.2737, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:23<00:44, 44.05s/it]

Train accuracy =  tensor(1.2842, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:19<00:00, 46.46s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.21it/s]




Round 6 testing accuracy: 90.3448275862069 %
Samples: 474


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of labeled pool: 100
number of unlabeled pool: 7000
number of testing pool: 290

Round 0


  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.2667, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:24<00:49, 24.61s/it]

Train accuracy =  tensor(0.3333, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:36<00:17, 17.36s/it]

Train accuracy =  tensor(0.3667, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:59<00:00, 19.97s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:09<00:00,  4.06it/s]


Round 0 testing accuracy: 23.103448275862068 %
Samples: 100
Round 1
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4583, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:16<00:32, 16.36s/it]

Train accuracy =  tensor(0.4167, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:37<00:18, 18.98s/it]

Train accuracy =  tensor(0.3542, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:00<00:00, 20.11s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:28<00:00,  1.31it/s]




Round 1 testing accuracy: 36.206896551724135 %
Samples: 161
Round 2
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.4470, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:57, 28.84s/it]

Train accuracy =  tensor(0.4621, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:49<00:23, 23.93s/it]

Train accuracy =  tensor(0.5379, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:12<00:00, 24.13s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:14<00:00,  2.56it/s]




Round 2 testing accuracy: 40.3448275862069 %
Samples: 221
Round 3
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5556, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:25<00:51, 25.67s/it]

Train accuracy =  tensor(0.6374, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [00:45<00:22, 22.03s/it]

Train accuracy =  tensor(0.5497, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:30<00:00, 30.30s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:28<00:00,  1.32it/s]




Round 3 testing accuracy: 40.689655172413794 %
Samples: 282
Round 4
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.5942, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:28<00:56, 28.49s/it]

Train accuracy =  tensor(0.6087, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:12<00:37, 37.64s/it]

Train accuracy =  tensor(0.6522, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [01:52<00:00, 37.44s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:16<00:00,  2.27it/s]




Round 4 testing accuracy: 40.0 %
Samples: 344
Round 5
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6138, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [01:02<02:04, 62.45s/it]

Train accuracy =  tensor(0.6748, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:52<00:54, 54.99s/it]

Train accuracy =  tensor(0.7154, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:40<00:00, 53.41s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:13<00:00,  2.67it/s]




Round 5 testing accuracy: 40.3448275862069 %
Samples: 409
Round 6
Loading last best model


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|                                                                         | 0/3 [00:00<?, ?it/s]

Train accuracy =  tensor(0.6807, device='cuda:0')
saving model


 33%|█████████████████████▋                                           | 1/3 [00:49<01:38, 49.34s/it]

Train accuracy =  tensor(0.7509, device='cuda:0')
saving model


 67%|███████████████████████████████████████████▎                     | 2/3 [01:32<00:45, 45.85s/it]

Train accuracy =  tensor(0.7579, device='cuda:0')
saving model


100%|█████████████████████████████████████████████████████████████████| 3/3 [02:26<00:00, 48.69s/it]
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 37/37 [00:27<00:00,  1.34it/s]




Round 6 testing accuracy: 40.3448275862069 %
Samples: 474


In [7]:
acc_list_kc_list

[[21.72413793103448,
  25.17241379310345,
  28.620689655172416,
  27.241379310344826,
  28.620689655172416,
  60.689655172413794,
  74.48275862068967],
 [21.379310344827587,
  25.862068965517242,
  35.86206896551724,
  45.17241379310345,
  59.6551724137931,
  73.79310344827587,
  90.0],
 [22.758620689655174,
  22.758620689655174,
  24.137931034482758,
  42.758620689655174,
  71.72413793103448,
  74.48275862068967,
  80.6896551724138],
 [27.241379310344826,
  26.20689655172414,
  27.93103448275862,
  37.24137931034483,
  49.310344827586206,
  58.9655172413793,
  69.3103448275862],
 [25.17241379310345,
  35.86206896551724,
  39.6551724137931,
  69.3103448275862,
  81.37931034482759,
  93.10344827586206,
  93.79310344827586],
 [20.689655172413794,
  25.517241379310345,
  27.241379310344826,
  40.3448275862069,
  46.206896551724135,
  43.44827586206896,
  56.55172413793104],
 [13.448275862068964,
  23.79310344827586,
  43.44827586206896,
  45.17241379310345,
  55.86206896551724,
  82.06896

In [8]:
np.mean(acc_list_kc_list, axis = 0)

array([21.55172414, 28.96551724, 34.75862069, 47.37931034, 57.93103448,
       68.5862069 , 76.31034483])