In [1]:
import boto3
import task_distill_soumyajit
import sys

In [2]:
teacher = "models/models--JeremiahZ--bert-base-uncased-rte"
student = "models/models--huawei-noah--TinyBERT_General_4L_312D"
data = "data/glue_data/RTE"
task = "rte"
augs = True
fast = 0.0
mode = "fast"
base_output = "models/TinyBERT-RTE"

if augs:
    base_output = base_output + "-(" + mode + ")-(" + str(fast) + ")/"
else:
    base_output = base_output + "/"

temp_output = base_output + "step1"
output = base_output + "step2"
eval_output = base_output + "eval"

In [3]:
print(temp_output)
print(output)
print(eval_output)

models/TinyBERT-RTE-(fast)-(0.0)/step1
models/TinyBERT-RTE-(fast)-(0.0)/step2
models/TinyBERT-RTE-(fast)-(0.0)/eval


In [4]:
task_distill_step1 = dict(teacher_model = teacher,
                            student_model = student,
                            data_dir = data,
                            task_name = task,
                            output_dir = temp_output,
                            max_seq_length = 128,
                            train_batch_size = 32,
                            num_train_epochs = 10,
                            do_lower_case = True,
                            aug_train = augs,
                            aug_mode = mode,
                            fast_aug = fast)

task_distill_step2 = dict(teacher_model = teacher,
                            student_model = temp_output,
                            data_dir = data,
                            task_name = task,
                            output_dir = output,
                            learning_rate=3e-5,
                            eval_step=100,
                            max_seq_length = 128,
                            train_batch_size = 32,
                            num_train_epochs = 10,
                            do_lower_case = True,
                            pred_distill = True,
                            aug_train = augs,
                            aug_mode = mode,
                            fast_aug = fast)

evaluation = dict(student_model = output,
                            data_dir = data,
                            task_name = task,
                            output_dir = eval_output,
                            eval_batch_size = 32,
                            max_seq_length = 128,
                            do_lower_case = True,
                            do_eval = True)

In [5]:
def getArguments(dict_args):
    list_args = []
    for arg_name, arg_val in dict_args.items():
        if arg_name == "fast_aug":
            list_args.append(f"--{arg_name}={arg_val}")
        elif arg_val not in [True, False]:
            list_args.append(f"--{arg_name}={arg_val}")
        elif arg_val == True:
            list_args.append(f"--{arg_name}")

    return list_args

In [6]:
print(["task_distill_soumyajit.py"] + getArguments(task_distill_step1))

['task_distill_soumyajit.py', '--teacher_model=models/models--JeremiahZ--bert-base-uncased-rte', '--student_model=models/models--huawei-noah--TinyBERT_General_4L_312D', '--data_dir=data/glue_data/RTE', '--task_name=rte', '--output_dir=models/TinyBERT-RTE-(fast)-(0.0)/step1', '--max_seq_length=128', '--train_batch_size=32', '--num_train_epochs=10', '--do_lower_case', '--aug_train', '--aug_mode=fast', '--fast_aug=0.0']


In [7]:
arguments = ["task_distill_soumyajit.py"] + getArguments(task_distill_step1)
sys.argv = arguments

print(f"Arguments passed\n{arguments}")
task_distill_soumyajit.main()

Arguments passed
['task_distill_soumyajit.py', '--teacher_model=models/models--JeremiahZ--bert-base-uncased-rte', '--student_model=models/models--huawei-noah--TinyBERT_General_4L_312D', '--data_dir=data/glue_data/RTE', '--task_name=rte', '--output_dir=models/TinyBERT-RTE-(fast)-(0.0)/step1', '--max_seq_length=128', '--train_batch_size=32', '--num_train_epochs=10', '--do_lower_case', '--aug_train', '--aug_mode=fast', '--fast_aug=0.0']
12/13 09:41:13 PM The args: Namespace(data_dir='data/glue_data/RTE', teacher_model='models/models--JeremiahZ--bert-base-uncased-rte', student_model='models/models--huawei-noah--TinyBERT_General_4L_312D', task_name='rte', output_dir='models/TinyBERT-RTE-(fast)-(0.0)/step1', cache_dir='', max_seq_length=128, do_eval=False, do_lower_case=True, train_batch_size=32, eval_batch_size=32, learning_rate=5e-05, weight_decay=0.0001, num_train_epochs=10.0, warmup_proportion=0.1, no_cuda=False, seed=42, gradient_accumulation_steps=1, aug_train=True, aug_mode='fast', fa

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at ../torch/csrc/utils/python_arg_parser.cpp:1630.)
  next_m.mul_(beta1).add_(1 - beta1, grad)


12/13 09:41:37 PM ***** Running evaluation *****
12/13 09:41:37 PM   Epoch = 0 iter 49 step
12/13 09:41:37 PM   Num examples = 277
12/13 09:41:37 PM   Batch size = 32
12/13 09:41:37 PM ***** Eval results *****
12/13 09:41:37 PM   att_loss = 4.695378541946411
12/13 09:41:37 PM   cls_loss = 0.0
12/13 09:41:37 PM   global_step = 49
12/13 09:41:37 PM   loss = 6.638311103898651
12/13 09:41:37 PM   rep_loss = 1.9429325157282304
12/13 09:41:37 PM ***** Save model *****
12/13 09:41:46 PM ***** Running evaluation *****
12/13 09:41:46 PM   Epoch = 0 iter 99 step
12/13 09:41:46 PM   Num examples = 277
12/13 09:41:46 PM   Batch size = 32
12/13 09:41:46 PM ***** Eval results *****
12/13 09:41:46 PM   att_loss = 4.445144906188503
12/13 09:41:46 PM   cls_loss = 0.0
12/13 09:41:46 PM   global_step = 99
12/13 09:41:46 PM   loss = 6.16315485732724
12/13 09:41:46 PM   rep_loss = 1.7180099378932605
12/13 09:41:46 PM ***** Save model *****
12/13 09:41:54 PM ***** Running evaluation *****
12/13 09:41:54 PM 

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:43:04 PM ***** Running evaluation *****
12/13 09:43:04 PM   Epoch = 1 iter 549 step
12/13 09:43:04 PM   Num examples = 277
12/13 09:43:04 PM   Batch size = 32
12/13 09:43:04 PM ***** Eval results *****
12/13 09:43:04 PM   att_loss = 3.7034560527120317
12/13 09:43:04 PM   cls_loss = 0.0
12/13 09:43:04 PM   global_step = 549
12/13 09:43:04 PM   loss = 4.8051718558583945
12/13 09:43:04 PM   rep_loss = 1.1017157690865653
12/13 09:43:04 PM ***** Save model *****
12/13 09:43:12 PM ***** Running evaluation *****
12/13 09:43:12 PM   Epoch = 1 iter 599 step
12/13 09:43:12 PM   Num examples = 277
12/13 09:43:12 PM   Batch size = 32
12/13 09:43:12 PM ***** Eval results *****
12/13 09:43:12 PM   att_loss = 3.7442278464635215
12/13 09:43:12 PM   cls_loss = 0.0
12/13 09:43:12 PM   global_step = 599
12/13 09:43:12 PM   loss = 4.846582330190218
12/13 09:43:12 PM   rep_loss = 1.1023544776134002
12/13 09:43:12 PM ***** Save model *****
12/13 09:43:21 PM ***** Running evaluation *****
12/13 09:4

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:44:30 PM ***** Running evaluation *****
12/13 09:44:30 PM   Epoch = 2 iter 1049 step
12/13 09:44:30 PM   Num examples = 277
12/13 09:44:30 PM   Batch size = 32
12/13 09:44:30 PM ***** Eval results *****
12/13 09:44:30 PM   att_loss = 3.576209919793265
12/13 09:44:30 PM   cls_loss = 0.0
12/13 09:44:30 PM   global_step = 1049
12/13 09:44:30 PM   loss = 4.600911242621286
12/13 09:44:30 PM   rep_loss = 1.0247013824326652
12/13 09:44:30 PM ***** Save model *****
12/13 09:44:39 PM ***** Running evaluation *****
12/13 09:44:39 PM   Epoch = 2 iter 1099 step
12/13 09:44:39 PM   Num examples = 277
12/13 09:44:39 PM   Batch size = 32
12/13 09:44:39 PM ***** Eval results *****
12/13 09:44:39 PM   att_loss = 3.7090222459090385
12/13 09:44:39 PM   cls_loss = 0.0
12/13 09:44:39 PM   global_step = 1099
12/13 09:44:39 PM   loss = 4.745470231039482
12/13 09:44:39 PM   rep_loss = 1.0364479966330946
12/13 09:44:39 PM ***** Save model *****
12/13 09:44:48 PM ***** Running evaluation *****
12/13 09

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:46:08 PM ***** Running evaluation *****
12/13 09:46:08 PM   Epoch = 3 iter 1599 step
12/13 09:46:08 PM   Num examples = 277
12/13 09:46:08 PM   Batch size = 32
12/13 09:46:08 PM ***** Eval results *****
12/13 09:46:08 PM   att_loss = 3.5120571388138666
12/13 09:46:08 PM   cls_loss = 0.0
12/13 09:46:08 PM   global_step = 1599
12/13 09:46:08 PM   loss = 4.501198808352153
12/13 09:46:08 PM   rep_loss = 0.9891416562928094
12/13 09:46:08 PM ***** Save model *****
12/13 09:46:17 PM ***** Running evaluation *****
12/13 09:46:17 PM   Epoch = 3 iter 1649 step
12/13 09:46:17 PM   Num examples = 277
12/13 09:46:17 PM   Batch size = 32
12/13 09:46:17 PM ***** Eval results *****
12/13 09:46:17 PM   att_loss = 3.5518198096474936
12/13 09:46:17 PM   cls_loss = 0.0
12/13 09:46:17 PM   global_step = 1649
12/13 09:46:17 PM   loss = 4.543899222861889
12/13 09:46:17 PM   rep_loss = 0.9920794263828633
12/13 09:46:17 PM ***** Save model *****
12/13 09:46:26 PM ***** Running evaluation *****
12/13 0

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:47:37 PM ***** Running evaluation *****
12/13 09:47:37 PM   Epoch = 4 iter 2099 step
12/13 09:47:37 PM   Num examples = 277
12/13 09:47:37 PM   Batch size = 32
12/13 09:47:37 PM ***** Eval results *****
12/13 09:47:37 PM   att_loss = 3.4930458227793375
12/13 09:47:37 PM   cls_loss = 0.0
12/13 09:47:37 PM   global_step = 2099
12/13 09:47:37 PM   loss = 4.46686216990153
12/13 09:47:37 PM   rep_loss = 0.9738163550694784
12/13 09:47:37 PM ***** Save model *****
12/13 09:47:46 PM ***** Running evaluation *****
12/13 09:47:46 PM   Epoch = 4 iter 2149 step
12/13 09:47:46 PM   Num examples = 277
12/13 09:47:46 PM   Batch size = 32
12/13 09:47:46 PM ***** Eval results *****
12/13 09:47:46 PM   att_loss = 3.554863933416513
12/13 09:47:46 PM   cls_loss = 0.0
12/13 09:47:46 PM   global_step = 2149
12/13 09:47:46 PM   loss = 4.531719336142907
12/13 09:47:46 PM   rep_loss = 0.9768554008924044
12/13 09:47:46 PM ***** Save model *****
12/13 09:47:55 PM ***** Running evaluation *****
12/13 09:

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:49:15 PM ***** Running evaluation *****
12/13 09:49:15 PM   Epoch = 5 iter 2649 step
12/13 09:49:15 PM   Num examples = 277
12/13 09:49:15 PM   Batch size = 32
12/13 09:49:15 PM ***** Eval results *****
12/13 09:49:15 PM   att_loss = 3.528121769428253
12/13 09:49:15 PM   cls_loss = 0.0
12/13 09:49:15 PM   global_step = 2649
12/13 09:49:15 PM   loss = 4.493611774661324
12/13 09:49:15 PM   rep_loss = 0.965489998459816
12/13 09:49:15 PM ***** Save model *****
12/13 09:49:24 PM ***** Running evaluation *****
12/13 09:49:24 PM   Epoch = 5 iter 2699 step
12/13 09:49:24 PM   Num examples = 277
12/13 09:49:24 PM   Batch size = 32
12/13 09:49:24 PM ***** Eval results *****
12/13 09:49:24 PM   att_loss = 3.5319980180009884
12/13 09:49:24 PM   cls_loss = 0.0
12/13 09:49:24 PM   global_step = 2699
12/13 09:49:24 PM   loss = 4.497966076465363
12/13 09:49:24 PM   rep_loss = 0.9659680508552714
12/13 09:49:24 PM ***** Save model *****
12/13 09:49:32 PM ***** Running evaluation *****
12/13 09:

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:50:43 PM ***** Running evaluation *****
12/13 09:50:43 PM   Epoch = 6 iter 3149 step
12/13 09:50:43 PM   Num examples = 277
12/13 09:50:43 PM   Batch size = 32
12/13 09:50:43 PM ***** Eval results *****
12/13 09:50:43 PM   att_loss = 3.4012620552726416
12/13 09:50:43 PM   cls_loss = 0.0
12/13 09:50:43 PM   global_step = 3149
12/13 09:50:43 PM   loss = 4.3478950313899825
12/13 09:50:43 PM   rep_loss = 0.9466330020324044
12/13 09:50:43 PM ***** Save model *****
12/13 09:50:52 PM ***** Running evaluation *****
12/13 09:50:52 PM   Epoch = 6 iter 3199 step
12/13 09:50:52 PM   Num examples = 277
12/13 09:50:52 PM   Batch size = 32
12/13 09:50:52 PM ***** Eval results *****
12/13 09:50:52 PM   att_loss = 3.4322082506467217
12/13 09:50:52 PM   cls_loss = 0.0
12/13 09:50:52 PM   global_step = 3199
12/13 09:50:52 PM   loss = 4.382360507364142
12/13 09:50:52 PM   rep_loss = 0.9501522640659384
12/13 09:50:52 PM ***** Save model *****
12/13 09:51:00 PM ***** Running evaluation *****
12/13 

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:52:11 PM ***** Running evaluation *****
12/13 09:52:11 PM   Epoch = 7 iter 3649 step
12/13 09:52:11 PM   Num examples = 277
12/13 09:52:11 PM   Batch size = 32
12/13 09:52:11 PM ***** Eval results *****
12/13 09:52:11 PM   att_loss = 2.983149766921997
12/13 09:52:11 PM   cls_loss = 0.0
12/13 09:52:11 PM   global_step = 3649
12/13 09:52:11 PM   loss = 3.8926455974578857
12/13 09:52:11 PM   rep_loss = 0.9094957709312439
12/13 09:52:11 PM ***** Save model *****
12/13 09:52:20 PM ***** Running evaluation *****
12/13 09:52:20 PM   Epoch = 7 iter 3699 step
12/13 09:52:20 PM   Num examples = 277
12/13 09:52:20 PM   Batch size = 32
12/13 09:52:20 PM ***** Eval results *****
12/13 09:52:20 PM   att_loss = 3.4402474669309764
12/13 09:52:20 PM   cls_loss = 0.0
12/13 09:52:20 PM   global_step = 3699
12/13 09:52:20 PM   loss = 4.387459461505596
12/13 09:52:20 PM   rep_loss = 0.947212007183295
12/13 09:52:20 PM ***** Save model *****
12/13 09:52:29 PM ***** Running evaluation *****
12/13 09

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:54:12 PM ***** Running evaluation *****
12/13 09:54:12 PM   Epoch = 8 iter 4199 step
12/13 09:54:12 PM   Num examples = 277
12/13 09:54:12 PM   Batch size = 32
12/13 09:54:12 PM ***** Eval results *****
12/13 09:54:12 PM   att_loss = 3.4695959552641837
12/13 09:54:12 PM   cls_loss = 0.0
12/13 09:54:12 PM   global_step = 4199
12/13 09:54:12 PM   loss = 4.413750040915705
12/13 09:54:12 PM   rep_loss = 0.9441540395059893
12/13 09:54:12 PM ***** Save model *****
12/13 09:54:28 PM ***** Running evaluation *****
12/13 09:54:28 PM   Epoch = 8 iter 4249 step
12/13 09:54:28 PM   Num examples = 277
12/13 09:54:28 PM   Batch size = 32
12/13 09:54:28 PM ***** Eval results *****
12/13 09:54:28 PM   att_loss = 3.455381284525365
12/13 09:54:28 PM   cls_loss = 0.0
12/13 09:54:28 PM   global_step = 4249
12/13 09:54:28 PM   loss = 4.398703204260932
12/13 09:54:28 PM   rep_loss = 0.943321892508754
12/13 09:54:28 PM ***** Save model *****
12/13 09:54:44 PM ***** Running evaluation *****
12/13 09:

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 09:56:54 PM ***** Running evaluation *****
12/13 09:56:54 PM   Epoch = 9 iter 4699 step
12/13 09:56:54 PM   Num examples = 277
12/13 09:56:54 PM   Batch size = 32
12/13 09:56:54 PM ***** Eval results *****
12/13 09:56:54 PM   att_loss = 3.5398056745529174
12/13 09:56:54 PM   cls_loss = 0.0
12/13 09:56:54 PM   global_step = 4699
12/13 09:56:54 PM   loss = 4.493295907974243
12/13 09:56:54 PM   rep_loss = 0.9534902334213257
12/13 09:56:54 PM ***** Save model *****
12/13 09:57:10 PM ***** Running evaluation *****
12/13 09:57:10 PM   Epoch = 9 iter 4749 step
12/13 09:57:10 PM   Num examples = 277
12/13 09:57:10 PM   Batch size = 32
12/13 09:57:10 PM ***** Eval results *****
12/13 09:57:10 PM   att_loss = 3.487754400571187
12/13 09:57:10 PM   cls_loss = 0.0
12/13 09:57:10 PM   global_step = 4749
12/13 09:57:10 PM   loss = 4.433496018250783
12/13 09:57:10 PM   rep_loss = 0.945741613705953
12/13 09:57:10 PM ***** Save model *****
12/13 09:57:26 PM ***** Running evaluation *****
12/13 09:

In [8]:
arguments = ["task_distill_soumyajit.py"] + getArguments(task_distill_step2)
sys.argv = arguments

print(f"Arguments passed\n{arguments}")
task_distill_soumyajit.main()

Arguments passed
['task_distill_soumyajit.py', '--teacher_model=models/models--JeremiahZ--bert-base-uncased-rte', '--student_model=models/TinyBERT-RTE-(fast)-(0.0)/step1', '--data_dir=data/glue_data/RTE', '--task_name=rte', '--output_dir=models/TinyBERT-RTE-(fast)-(0.0)/step2', '--learning_rate=3e-05', '--eval_step=100', '--max_seq_length=128', '--train_batch_size=32', '--num_train_epochs=10', '--do_lower_case', '--pred_distill', '--aug_train', '--aug_mode=fast', '--fast_aug=0.0']
12/13 09:59:40 PM The args: Namespace(data_dir='data/glue_data/RTE', teacher_model='models/models--JeremiahZ--bert-base-uncased-rte', student_model='models/TinyBERT-RTE-(fast)-(0.0)/step1', task_name='rte', output_dir='models/TinyBERT-RTE-(fast)-(0.0)/step2', cache_dir='', max_seq_length=128, do_eval=False, do_lower_case=True, train_batch_size=32, eval_batch_size=32, learning_rate=3e-05, weight_decay=0.0001, num_train_epochs=10.0, warmup_proportion=0.1, no_cuda=False, seed=42, gradient_accumulation_steps=1, a

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:00:26 PM ***** Running evaluation *****
12/13 10:00:26 PM   Epoch = 0 iter 99 step
12/13 10:00:26 PM   Num examples = 277
12/13 10:00:26 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:00:26 PM ***** Eval results *****
12/13 10:00:26 PM   acc = 0.4729241877256318
12/13 10:00:26 PM   att_loss = 0.0
12/13 10:00:26 PM   cls_loss = 0.32762846561393355
12/13 10:00:26 PM   eval_loss = 0.7414980398284065
12/13 10:00:26 PM   global_step = 99
12/13 10:00:26 PM   loss = 0.32762846561393355
12/13 10:00:26 PM   rep_loss = 0.0
12/13 10:00:26 PM ***** Save model *****
12/13 10:00:57 PM ***** Running evaluation *****
12/13 10:00:57 PM   Epoch = 0 iter 199 step
12/13 10:00:57 PM   Num examples = 277
12/13 10:00:57 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:00:58 PM ***** Eval results *****
12/13 10:00:58 PM   acc = 0.5884476534296029
12/13 10:00:58 PM   att_loss = 0.0
12/13 10:00:58 PM   cls_loss = 0.31278495078709856
12/13 10:00:58 PM   eval_loss = 0.6623678472306993
12/13 10:00:58 PM   global_step = 199
12/13 10:00:58 PM   loss = 0.31278495078709856
12/13 10:00:58 PM   rep_loss = 0.0
12/13 10:00:58 PM ***** Save model *****
12/13 10:01:29 PM ***** Running evaluation *****
12/13 10:01:29 PM   Epoch = 0 iter 299 step
12/13 10:01:29 PM   Num examples = 277
12/13 10:01:29 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:01:29 PM ***** Eval results *****
12/13 10:01:29 PM   acc = 0.6064981949458483
12/13 10:01:29 PM   att_loss = 0.0
12/13 10:01:29 PM   cls_loss = 0.30581487281665354
12/13 10:01:29 PM   eval_loss = 0.6807696421941122
12/13 10:01:29 PM   global_step = 299
12/13 10:01:29 PM   loss = 0.30581487281665354
12/13 10:01:29 PM   rep_loss = 0.0
12/13 10:01:29 PM ***** Save model *****
12/13 10:02:01 PM ***** Running evaluation *****
12/13 10:02:01 PM   Epoch = 0 iter 399 step
12/13 10:02:01 PM   Num examples = 277
12/13 10:02:01 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:02:01 PM ***** Eval results *****
12/13 10:02:01 PM   acc = 0.5631768953068592
12/13 10:02:01 PM   att_loss = 0.0
12/13 10:02:01 PM   cls_loss = 0.30213425758488494
12/13 10:02:01 PM   eval_loss = 0.6963117652469211
12/13 10:02:01 PM   global_step = 399
12/13 10:02:01 PM   loss = 0.30213425758488494
12/13 10:02:01 PM   rep_loss = 0.0
12/13 10:02:32 PM ***** Running evaluation *****
12/13 10:02:32 PM   Epoch = 0 iter 499 step
12/13 10:02:32 PM   Num examples = 277
12/13 10:02:32 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:02:33 PM ***** Eval results *****
12/13 10:02:33 PM   acc = 0.6353790613718412
12/13 10:02:33 PM   att_loss = 0.0
12/13 10:02:33 PM   cls_loss = 0.30014806430421037
12/13 10:02:33 PM   eval_loss = 0.6559031208356222
12/13 10:02:33 PM   global_step = 499
12/13 10:02:33 PM   loss = 0.30014806430421037
12/13 10:02:33 PM   rep_loss = 0.0
12/13 10:02:33 PM ***** Save model *****


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:03:04 PM ***** Running evaluation *****
12/13 10:03:04 PM   Epoch = 1 iter 599 step
12/13 10:03:04 PM   Num examples = 277
12/13 10:03:04 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:03:05 PM ***** Eval results *****
12/13 10:03:05 PM   acc = 0.51985559566787
12/13 10:03:05 PM   att_loss = 0.0
12/13 10:03:05 PM   cls_loss = 0.2893962630858788
12/13 10:03:05 PM   eval_loss = 0.7076286806000603
12/13 10:03:05 PM   global_step = 599
12/13 10:03:05 PM   loss = 0.2893962630858788
12/13 10:03:05 PM   rep_loss = 0.0
12/13 10:03:36 PM ***** Running evaluation *****
12/13 10:03:36 PM   Epoch = 1 iter 699 step
12/13 10:03:36 PM   Num examples = 277
12/13 10:03:36 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:03:36 PM ***** Eval results *****
12/13 10:03:36 PM   acc = 0.51985559566787
12/13 10:03:36 PM   att_loss = 0.0
12/13 10:03:36 PM   cls_loss = 0.28885915306177035
12/13 10:03:36 PM   eval_loss = 0.7150765988561842
12/13 10:03:36 PM   global_step = 699
12/13 10:03:36 PM   loss = 0.28885915306177035
12/13 10:03:36 PM   rep_loss = 0.0
12/13 10:04:07 PM ***** Running evaluation *****
12/13 10:04:07 PM   Epoch = 1 iter 799 step
12/13 10:04:07 PM   Num examples = 277
12/13 10:04:07 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:04:08 PM ***** Eval results *****
12/13 10:04:08 PM   acc = 0.5523465703971119
12/13 10:04:08 PM   att_loss = 0.0
12/13 10:04:08 PM   cls_loss = 0.28947863915412547
12/13 10:04:08 PM   eval_loss = 0.703427255153656
12/13 10:04:08 PM   global_step = 799
12/13 10:04:08 PM   loss = 0.28947863915412547
12/13 10:04:08 PM   rep_loss = 0.0
12/13 10:04:39 PM ***** Running evaluation *****
12/13 10:04:39 PM   Epoch = 1 iter 899 step
12/13 10:04:39 PM   Num examples = 277
12/13 10:04:39 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:04:39 PM ***** Eval results *****
12/13 10:04:39 PM   acc = 0.6245487364620939
12/13 10:04:39 PM   att_loss = 0.0
12/13 10:04:39 PM   cls_loss = 0.28898238284247263
12/13 10:04:39 PM   eval_loss = 0.6651495297749838
12/13 10:04:39 PM   global_step = 899
12/13 10:04:39 PM   loss = 0.28898238284247263
12/13 10:04:39 PM   rep_loss = 0.0
12/13 10:05:10 PM ***** Running evaluation *****
12/13 10:05:10 PM   Epoch = 1 iter 999 step
12/13 10:05:10 PM   Num examples = 277
12/13 10:05:10 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:05:11 PM ***** Eval results *****
12/13 10:05:11 PM   acc = 0.6245487364620939
12/13 10:05:11 PM   att_loss = 0.0
12/13 10:05:11 PM   cls_loss = 0.28868269103590916
12/13 10:05:11 PM   eval_loss = 0.6663139793607924
12/13 10:05:11 PM   global_step = 999
12/13 10:05:11 PM   loss = 0.28868269103590916
12/13 10:05:11 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:05:42 PM ***** Running evaluation *****
12/13 10:05:42 PM   Epoch = 2 iter 1099 step
12/13 10:05:42 PM   Num examples = 277
12/13 10:05:42 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:05:42 PM ***** Eval results *****
12/13 10:05:42 PM   acc = 0.628158844765343
12/13 10:05:42 PM   att_loss = 0.0
12/13 10:05:42 PM   cls_loss = 0.28775946874367564
12/13 10:05:42 PM   eval_loss = 0.666837043232388
12/13 10:05:42 PM   global_step = 1099
12/13 10:05:42 PM   loss = 0.28775946874367564
12/13 10:05:42 PM   rep_loss = 0.0
12/13 10:06:13 PM ***** Running evaluation *****
12/13 10:06:13 PM   Epoch = 2 iter 1199 step
12/13 10:06:13 PM   Num examples = 277
12/13 10:06:13 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:06:13 PM ***** Eval results *****
12/13 10:06:13 PM   acc = 0.6064981949458483
12/13 10:06:13 PM   att_loss = 0.0
12/13 10:06:13 PM   cls_loss = 0.2866336272400656
12/13 10:06:13 PM   eval_loss = 0.6728384362326728
12/13 10:06:13 PM   global_step = 1199
12/13 10:06:13 PM   loss = 0.2866336272400656
12/13 10:06:13 PM   rep_loss = 0.0
12/13 10:06:45 PM ***** Running evaluation *****
12/13 10:06:45 PM   Epoch = 2 iter 1299 step
12/13 10:06:45 PM   Num examples = 277
12/13 10:06:45 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:06:45 PM ***** Eval results *****
12/13 10:06:45 PM   acc = 0.5992779783393501
12/13 10:06:45 PM   att_loss = 0.0
12/13 10:06:45 PM   cls_loss = 0.28664724766512323
12/13 10:06:45 PM   eval_loss = 0.6758838229709201
12/13 10:06:45 PM   global_step = 1299
12/13 10:06:45 PM   loss = 0.28664724766512323
12/13 10:06:45 PM   rep_loss = 0.0
12/13 10:07:16 PM ***** Running evaluation *****
12/13 10:07:16 PM   Epoch = 2 iter 1399 step
12/13 10:07:16 PM   Num examples = 277
12/13 10:07:16 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:07:16 PM ***** Eval results *****
12/13 10:07:16 PM   acc = 0.6028880866425993
12/13 10:07:16 PM   att_loss = 0.0
12/13 10:07:16 PM   cls_loss = 0.2872332617515275
12/13 10:07:16 PM   eval_loss = 0.6738345159424676
12/13 10:07:16 PM   global_step = 1399
12/13 10:07:16 PM   loss = 0.2872332617515275
12/13 10:07:16 PM   rep_loss = 0.0
12/13 10:07:47 PM ***** Running evaluation *****
12/13 10:07:47 PM   Epoch = 2 iter 1499 step
12/13 10:07:47 PM   Num examples = 277
12/13 10:07:47 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:07:48 PM ***** Eval results *****
12/13 10:07:48 PM   acc = 0.6064981949458483
12/13 10:07:48 PM   att_loss = 0.0
12/13 10:07:48 PM   cls_loss = 0.2872839778726941
12/13 10:07:48 PM   eval_loss = 0.663815995057424
12/13 10:07:48 PM   global_step = 1499
12/13 10:07:48 PM   loss = 0.2872839778726941
12/13 10:07:48 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:08:19 PM ***** Running evaluation *****
12/13 10:08:19 PM   Epoch = 3 iter 1599 step
12/13 10:08:19 PM   Num examples = 277
12/13 10:08:19 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:08:19 PM ***** Eval results *****
12/13 10:08:19 PM   acc = 0.5992779783393501
12/13 10:08:19 PM   att_loss = 0.0
12/13 10:08:19 PM   cls_loss = 0.28683295100927353
12/13 10:08:19 PM   eval_loss = 0.6705364651150174
12/13 10:08:19 PM   global_step = 1599
12/13 10:08:19 PM   loss = 0.28683295100927353
12/13 10:08:19 PM   rep_loss = 0.0
12/13 10:08:51 PM ***** Running evaluation *****
12/13 10:08:51 PM   Epoch = 3 iter 1699 step
12/13 10:08:51 PM   Num examples = 277
12/13 10:08:51 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:08:51 PM ***** Eval results *****
12/13 10:08:51 PM   acc = 0.5884476534296029
12/13 10:08:51 PM   att_loss = 0.0
12/13 10:08:51 PM   cls_loss = 0.2860168621820562
12/13 10:08:51 PM   eval_loss = 0.6786398622724745
12/13 10:08:51 PM   global_step = 1699
12/13 10:08:51 PM   loss = 0.2860168621820562
12/13 10:08:51 PM   rep_loss = 0.0
12/13 10:09:22 PM ***** Running evaluation *****
12/13 10:09:22 PM   Epoch = 3 iter 1799 step
12/13 10:09:22 PM   Num examples = 277
12/13 10:09:22 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:09:22 PM ***** Eval results *****
12/13 10:09:22 PM   acc = 0.5667870036101083
12/13 10:09:22 PM   att_loss = 0.0
12/13 10:09:22 PM   cls_loss = 0.2869815884505288
12/13 10:09:22 PM   eval_loss = 0.6809747285313077
12/13 10:09:23 PM   global_step = 1799
12/13 10:09:23 PM   loss = 0.2869815884505288
12/13 10:09:23 PM   rep_loss = 0.0
12/13 10:09:54 PM ***** Running evaluation *****
12/13 10:09:54 PM   Epoch = 3 iter 1899 step
12/13 10:09:54 PM   Num examples = 277
12/13 10:09:54 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:09:54 PM ***** Eval results *****
12/13 10:09:54 PM   acc = 0.5776173285198556
12/13 10:09:54 PM   att_loss = 0.0
12/13 10:09:54 PM   cls_loss = 0.2871870557289748
12/13 10:09:54 PM   eval_loss = 0.7030147910118103
12/13 10:09:54 PM   global_step = 1899
12/13 10:09:54 PM   loss = 0.2871870557289748
12/13 10:09:54 PM   rep_loss = 0.0
12/13 10:10:25 PM ***** Running evaluation *****
12/13 10:10:25 PM   Epoch = 3 iter 1999 step
12/13 10:10:25 PM   Num examples = 277
12/13 10:10:25 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:10:26 PM ***** Eval results *****
12/13 10:10:26 PM   acc = 0.5812274368231047
12/13 10:10:26 PM   att_loss = 0.0
12/13 10:10:26 PM   cls_loss = 0.2870134929314666
12/13 10:10:26 PM   eval_loss = 0.6873999966515435
12/13 10:10:26 PM   global_step = 1999
12/13 10:10:26 PM   loss = 0.2870134929314666
12/13 10:10:26 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:10:57 PM ***** Running evaluation *****
12/13 10:10:57 PM   Epoch = 4 iter 2099 step
12/13 10:10:57 PM   Num examples = 277
12/13 10:10:57 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:10:57 PM ***** Eval results *****
12/13 10:10:57 PM   acc = 0.5848375451263538
12/13 10:10:57 PM   att_loss = 0.0
12/13 10:10:57 PM   cls_loss = 0.2878644029299418
12/13 10:10:57 PM   eval_loss = 0.6818020807372199
12/13 10:10:57 PM   global_step = 2099
12/13 10:10:57 PM   loss = 0.2878644029299418
12/13 10:10:57 PM   rep_loss = 0.0
12/13 10:11:28 PM ***** Running evaluation *****
12/13 10:11:28 PM   Epoch = 4 iter 2199 step
12/13 10:11:28 PM   Num examples = 277
12/13 10:11:28 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:11:28 PM ***** Eval results *****
12/13 10:11:28 PM   acc = 0.5631768953068592
12/13 10:11:28 PM   att_loss = 0.0
12/13 10:11:28 PM   cls_loss = 0.28725927679435065
12/13 10:11:28 PM   eval_loss = 0.688748644457923
12/13 10:11:28 PM   global_step = 2199
12/13 10:11:28 PM   loss = 0.28725927679435065
12/13 10:11:28 PM   rep_loss = 0.0
12/13 10:11:59 PM ***** Running evaluation *****
12/13 10:11:59 PM   Epoch = 4 iter 2299 step
12/13 10:11:59 PM   Num examples = 277
12/13 10:11:59 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:11:59 PM ***** Eval results *****
12/13 10:11:59 PM   acc = 0.5956678700361011
12/13 10:11:59 PM   att_loss = 0.0
12/13 10:11:59 PM   cls_loss = 0.2863697478937548
12/13 10:11:59 PM   eval_loss = 0.6812773413128324
12/13 10:11:59 PM   global_step = 2299
12/13 10:11:59 PM   loss = 0.2863697478937548
12/13 10:11:59 PM   rep_loss = 0.0
12/13 10:12:30 PM ***** Running evaluation *****
12/13 10:12:30 PM   Epoch = 4 iter 2399 step
12/13 10:12:30 PM   Num examples = 277
12/13 10:12:30 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:12:31 PM ***** Eval results *****
12/13 10:12:31 PM   acc = 0.5667870036101083
12/13 10:12:31 PM   att_loss = 0.0
12/13 10:12:31 PM   cls_loss = 0.2864716949917021
12/13 10:12:31 PM   eval_loss = 0.6898648341496786
12/13 10:12:31 PM   global_step = 2399
12/13 10:12:31 PM   loss = 0.2864716949917021
12/13 10:12:31 PM   rep_loss = 0.0
12/13 10:13:02 PM ***** Running evaluation *****
12/13 10:13:02 PM   Epoch = 4 iter 2499 step
12/13 10:13:02 PM   Num examples = 277
12/13 10:13:02 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:13:02 PM ***** Eval results *****
12/13 10:13:02 PM   acc = 0.5595667870036101
12/13 10:13:02 PM   att_loss = 0.0
12/13 10:13:02 PM   cls_loss = 0.28677529987082423
12/13 10:13:02 PM   eval_loss = 0.7060231500201755
12/13 10:13:02 PM   global_step = 2499
12/13 10:13:02 PM   loss = 0.28677529987082423
12/13 10:13:02 PM   rep_loss = 0.0
12/13 10:13:33 PM ***** Running evaluation *****
12/13 10:13:33 PM   Epoch = 4 iter 2599 step
12/13 10:13:33 PM   Num examples = 277
12/13 10:13:33 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:13:34 PM ***** Eval results *****
12/13 10:13:34 PM   acc = 0.5848375451263538
12/13 10:13:34 PM   att_loss = 0.0
12/13 10:13:34 PM   cls_loss = 0.2867804189908852
12/13 10:13:34 PM   eval_loss = 0.6900798479715983
12/13 10:13:34 PM   global_step = 2599
12/13 10:13:34 PM   loss = 0.2867804189908852
12/13 10:13:34 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:14:05 PM ***** Running evaluation *****
12/13 10:14:05 PM   Epoch = 5 iter 2699 step
12/13 10:14:05 PM   Num examples = 277
12/13 10:14:05 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:14:05 PM ***** Eval results *****
12/13 10:14:05 PM   acc = 0.5884476534296029
12/13 10:14:05 PM   att_loss = 0.0
12/13 10:14:05 PM   cls_loss = 0.28900363090190484
12/13 10:14:05 PM   eval_loss = 0.680994345082177
12/13 10:14:05 PM   global_step = 2699
12/13 10:14:05 PM   loss = 0.28900363090190484
12/13 10:14:05 PM   rep_loss = 0.0
12/13 10:14:36 PM ***** Running evaluation *****
12/13 10:14:36 PM   Epoch = 5 iter 2799 step
12/13 10:14:36 PM   Num examples = 277
12/13 10:14:36 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:14:37 PM ***** Eval results *****
12/13 10:14:37 PM   acc = 0.5776173285198556
12/13 10:14:37 PM   att_loss = 0.0
12/13 10:14:37 PM   cls_loss = 0.2873195496570204
12/13 10:14:37 PM   eval_loss = 0.6816014779938592
12/13 10:14:37 PM   global_step = 2799
12/13 10:14:37 PM   loss = 0.2873195496570204
12/13 10:14:37 PM   rep_loss = 0.0
12/13 10:15:07 PM ***** Running evaluation *****
12/13 10:15:07 PM   Epoch = 5 iter 2899 step
12/13 10:15:07 PM   Num examples = 277
12/13 10:15:07 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:15:08 PM ***** Eval results *****
12/13 10:15:08 PM   acc = 0.5667870036101083
12/13 10:15:08 PM   att_loss = 0.0
12/13 10:15:08 PM   cls_loss = 0.2869632046948485
12/13 10:15:08 PM   eval_loss = 0.694237642818027
12/13 10:15:08 PM   global_step = 2899
12/13 10:15:08 PM   loss = 0.2869632046948485
12/13 10:15:08 PM   rep_loss = 0.0
12/13 10:15:39 PM ***** Running evaluation *****
12/13 10:15:39 PM   Epoch = 5 iter 2999 step
12/13 10:15:39 PM   Num examples = 277
12/13 10:15:39 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:15:39 PM ***** Eval results *****
12/13 10:15:39 PM   acc = 0.5740072202166066
12/13 10:15:39 PM   att_loss = 0.0
12/13 10:15:39 PM   cls_loss = 0.28691305556726937
12/13 10:15:39 PM   eval_loss = 0.6881507635116577
12/13 10:15:39 PM   global_step = 2999
12/13 10:15:39 PM   loss = 0.28691305556726937
12/13 10:15:39 PM   rep_loss = 0.0
12/13 10:16:10 PM ***** Running evaluation *****
12/13 10:16:10 PM   Epoch = 5 iter 3099 step
12/13 10:16:10 PM   Num examples = 277
12/13 10:16:10 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:16:10 PM ***** Eval results *****
12/13 10:16:10 PM   acc = 0.5776173285198556
12/13 10:16:10 PM   att_loss = 0.0
12/13 10:16:10 PM   cls_loss = 0.28670610279327463
12/13 10:16:10 PM   eval_loss = 0.6868116127120124
12/13 10:16:10 PM   global_step = 3099
12/13 10:16:10 PM   loss = 0.28670610279327463
12/13 10:16:10 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:16:42 PM ***** Running evaluation *****
12/13 10:16:42 PM   Epoch = 6 iter 3199 step
12/13 10:16:42 PM   Num examples = 277
12/13 10:16:42 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:16:42 PM ***** Eval results *****
12/13 10:16:42 PM   acc = 0.5776173285198556
12/13 10:16:42 PM   att_loss = 0.0
12/13 10:16:42 PM   cls_loss = 0.28572449700473107
12/13 10:16:42 PM   eval_loss = 0.6951386663648818
12/13 10:16:42 PM   global_step = 3199
12/13 10:16:42 PM   loss = 0.28572449700473107
12/13 10:16:42 PM   rep_loss = 0.0
12/13 10:17:13 PM ***** Running evaluation *****
12/13 10:17:13 PM   Epoch = 6 iter 3299 step
12/13 10:17:13 PM   Num examples = 277
12/13 10:17:13 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:17:13 PM ***** Eval results *****
12/13 10:17:13 PM   acc = 0.5812274368231047
12/13 10:17:13 PM   att_loss = 0.0
12/13 10:17:13 PM   cls_loss = 0.28593895166595545
12/13 10:17:13 PM   eval_loss = 0.6814950704574585
12/13 10:17:13 PM   global_step = 3299
12/13 10:17:13 PM   loss = 0.28593895166595545
12/13 10:17:13 PM   rep_loss = 0.0
12/13 10:17:45 PM ***** Running evaluation *****
12/13 10:17:45 PM   Epoch = 6 iter 3399 step
12/13 10:17:45 PM   Num examples = 277
12/13 10:17:45 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:17:45 PM ***** Eval results *****
12/13 10:17:45 PM   acc = 0.5776173285198556
12/13 10:17:45 PM   att_loss = 0.0
12/13 10:17:45 PM   cls_loss = 0.2851940543005318
12/13 10:17:45 PM   eval_loss = 0.6951433883772956
12/13 10:17:45 PM   global_step = 3399
12/13 10:17:45 PM   loss = 0.2851940543005318
12/13 10:17:45 PM   rep_loss = 0.0
12/13 10:18:16 PM ***** Running evaluation *****
12/13 10:18:16 PM   Epoch = 6 iter 3499 step
12/13 10:18:16 PM   Num examples = 277
12/13 10:18:16 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:18:16 PM ***** Eval results *****
12/13 10:18:16 PM   acc = 0.5812274368231047
12/13 10:18:16 PM   att_loss = 0.0
12/13 10:18:16 PM   cls_loss = 0.2857994171953073
12/13 10:18:16 PM   eval_loss = 0.6803915765550401
12/13 10:18:16 PM   global_step = 3499
12/13 10:18:16 PM   loss = 0.2857994171953073
12/13 10:18:16 PM   rep_loss = 0.0
12/13 10:18:47 PM ***** Running evaluation *****
12/13 10:18:47 PM   Epoch = 6 iter 3599 step
12/13 10:18:47 PM   Num examples = 277
12/13 10:18:47 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:18:48 PM ***** Eval results *****
12/13 10:18:48 PM   acc = 0.5451263537906137
12/13 10:18:48 PM   att_loss = 0.0
12/13 10:18:48 PM   cls_loss = 0.2860961846897012
12/13 10:18:48 PM   eval_loss = 0.6999462246894836
12/13 10:18:48 PM   global_step = 3599
12/13 10:18:48 PM   loss = 0.2860961846897012
12/13 10:18:48 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:19:19 PM ***** Running evaluation *****
12/13 10:19:19 PM   Epoch = 7 iter 3699 step
12/13 10:19:19 PM   Num examples = 277
12/13 10:19:19 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:19:19 PM ***** Eval results *****
12/13 10:19:19 PM   acc = 0.5342960288808665
12/13 10:19:19 PM   att_loss = 0.0
12/13 10:19:19 PM   cls_loss = 0.2846665451159844
12/13 10:19:19 PM   eval_loss = 0.7045755386352539
12/13 10:19:19 PM   global_step = 3699
12/13 10:19:19 PM   loss = 0.2846665451159844
12/13 10:19:19 PM   rep_loss = 0.0
12/13 10:19:50 PM ***** Running evaluation *****
12/13 10:19:50 PM   Epoch = 7 iter 3799 step
12/13 10:19:50 PM   Num examples = 277
12/13 10:19:50 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:19:50 PM ***** Eval results *****
12/13 10:19:50 PM   acc = 0.5812274368231047
12/13 10:19:50 PM   att_loss = 0.0
12/13 10:19:50 PM   cls_loss = 0.28579004677502734
12/13 10:19:50 PM   eval_loss = 0.6809346477190653
12/13 10:19:50 PM   global_step = 3799
12/13 10:19:50 PM   loss = 0.28579004677502734
12/13 10:19:50 PM   rep_loss = 0.0
12/13 10:20:21 PM ***** Running evaluation *****
12/13 10:20:21 PM   Epoch = 7 iter 3899 step
12/13 10:20:21 PM   Num examples = 277
12/13 10:20:21 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:20:22 PM ***** Eval results *****
12/13 10:20:22 PM   acc = 0.5812274368231047
12/13 10:20:22 PM   att_loss = 0.0
12/13 10:20:22 PM   cls_loss = 0.2854333540040349
12/13 10:20:22 PM   eval_loss = 0.6873681412802802
12/13 10:20:22 PM   global_step = 3899
12/13 10:20:22 PM   loss = 0.2854333540040349
12/13 10:20:22 PM   rep_loss = 0.0
12/13 10:20:53 PM ***** Running evaluation *****
12/13 10:20:53 PM   Epoch = 7 iter 3999 step
12/13 10:20:53 PM   Num examples = 277
12/13 10:20:53 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:20:53 PM ***** Eval results *****
12/13 10:20:53 PM   acc = 0.5595667870036101
12/13 10:20:53 PM   att_loss = 0.0
12/13 10:20:53 PM   cls_loss = 0.2858089101924138
12/13 10:20:53 PM   eval_loss = 0.6906983455022176
12/13 10:20:53 PM   global_step = 3999
12/13 10:20:53 PM   loss = 0.2858089101924138
12/13 10:20:53 PM   rep_loss = 0.0
12/13 10:21:24 PM ***** Running evaluation *****
12/13 10:21:24 PM   Epoch = 7 iter 4099 step
12/13 10:21:24 PM   Num examples = 277
12/13 10:21:24 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:21:24 PM ***** Eval results *****
12/13 10:21:24 PM   acc = 0.5667870036101083
12/13 10:21:24 PM   att_loss = 0.0
12/13 10:21:24 PM   cls_loss = 0.28639178624195333
12/13 10:21:24 PM   eval_loss = 0.6883480350176493
12/13 10:21:24 PM   global_step = 4099
12/13 10:21:24 PM   loss = 0.28639178624195333
12/13 10:21:24 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:21:55 PM ***** Running evaluation *****
12/13 10:21:55 PM   Epoch = 8 iter 4199 step
12/13 10:21:55 PM   Num examples = 277
12/13 10:21:55 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:21:55 PM ***** Eval results *****
12/13 10:21:55 PM   acc = 0.5631768953068592
12/13 10:21:55 PM   att_loss = 0.0
12/13 10:21:55 PM   cls_loss = 0.28602526745488566
12/13 10:21:55 PM   eval_loss = 0.6888710856437683
12/13 10:21:55 PM   global_step = 4199
12/13 10:21:55 PM   loss = 0.28602526745488566
12/13 10:21:55 PM   rep_loss = 0.0
12/13 10:22:26 PM ***** Running evaluation *****
12/13 10:22:26 PM   Epoch = 8 iter 4299 step
12/13 10:22:26 PM   Num examples = 277
12/13 10:22:26 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:22:27 PM ***** Eval results *****
12/13 10:22:27 PM   acc = 0.5523465703971119
12/13 10:22:27 PM   att_loss = 0.0
12/13 10:22:27 PM   cls_loss = 0.28574439450984696
12/13 10:22:27 PM   eval_loss = 0.6927235722541809
12/13 10:22:27 PM   global_step = 4299
12/13 10:22:27 PM   loss = 0.28574439450984696
12/13 10:22:27 PM   rep_loss = 0.0
12/13 10:22:58 PM ***** Running evaluation *****
12/13 10:22:58 PM   Epoch = 8 iter 4399 step
12/13 10:22:58 PM   Num examples = 277
12/13 10:22:58 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:22:58 PM ***** Eval results *****
12/13 10:22:58 PM   acc = 0.5776173285198556
12/13 10:22:58 PM   att_loss = 0.0
12/13 10:22:58 PM   cls_loss = 0.28509473103981514
12/13 10:22:58 PM   eval_loss = 0.6890822649002075
12/13 10:22:58 PM   global_step = 4399
12/13 10:22:58 PM   loss = 0.28509473103981514
12/13 10:22:58 PM   rep_loss = 0.0
12/13 10:23:29 PM ***** Running evaluation *****
12/13 10:23:29 PM   Epoch = 8 iter 4499 step
12/13 10:23:29 PM   Num examples = 277
12/13 10:23:29 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:23:30 PM ***** Eval results *****
12/13 10:23:30 PM   acc = 0.5631768953068592
12/13 10:23:30 PM   att_loss = 0.0
12/13 10:23:30 PM   cls_loss = 0.285532381509366
12/13 10:23:30 PM   eval_loss = 0.6893674665027194
12/13 10:23:30 PM   global_step = 4499
12/13 10:23:30 PM   loss = 0.285532381509366
12/13 10:23:30 PM   rep_loss = 0.0
12/13 10:24:01 PM ***** Running evaluation *****
12/13 10:24:01 PM   Epoch = 8 iter 4599 step
12/13 10:24:01 PM   Num examples = 277
12/13 10:24:01 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:24:01 PM ***** Eval results *****
12/13 10:24:01 PM   acc = 0.5740072202166066
12/13 10:24:01 PM   att_loss = 0.0
12/13 10:24:01 PM   cls_loss = 0.28574344152779146
12/13 10:24:01 PM   eval_loss = 0.6930854519208273
12/13 10:24:01 PM   global_step = 4599
12/13 10:24:01 PM   loss = 0.28574344152779146
12/13 10:24:01 PM   rep_loss = 0.0


Iteration:   0%|          | 0/522 [00:00<?, ?it/s]

12/13 10:24:32 PM ***** Running evaluation *****
12/13 10:24:32 PM   Epoch = 9 iter 4699 step
12/13 10:24:32 PM   Num examples = 277
12/13 10:24:32 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:24:33 PM ***** Eval results *****
12/13 10:24:33 PM   acc = 0.5667870036101083
12/13 10:24:33 PM   att_loss = 0.0
12/13 10:24:33 PM   cls_loss = 0.29126897156238557
12/13 10:24:33 PM   eval_loss = 0.6895361012882657
12/13 10:24:33 PM   global_step = 4699
12/13 10:24:33 PM   loss = 0.29126897156238557
12/13 10:24:33 PM   rep_loss = 0.0
12/13 10:25:04 PM ***** Running evaluation *****
12/13 10:25:04 PM   Epoch = 9 iter 4799 step
12/13 10:25:04 PM   Num examples = 277
12/13 10:25:04 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:25:04 PM ***** Eval results *****
12/13 10:25:04 PM   acc = 0.5667870036101083
12/13 10:25:04 PM   att_loss = 0.0
12/13 10:25:04 PM   cls_loss = 0.2853957149115476
12/13 10:25:04 PM   eval_loss = 0.6889904472563002
12/13 10:25:04 PM   global_step = 4799
12/13 10:25:04 PM   loss = 0.2853957149115476
12/13 10:25:04 PM   rep_loss = 0.0
12/13 10:25:35 PM ***** Running evaluation *****
12/13 10:25:35 PM   Epoch = 9 iter 4899 step
12/13 10:25:35 PM   Num examples = 277
12/13 10:25:35 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:25:35 PM ***** Eval results *****
12/13 10:25:35 PM   acc = 0.555956678700361
12/13 10:25:35 PM   att_loss = 0.0
12/13 10:25:35 PM   cls_loss = 0.286812533225332
12/13 10:25:35 PM   eval_loss = 0.698199106587304
12/13 10:25:35 PM   global_step = 4899
12/13 10:25:35 PM   loss = 0.286812533225332
12/13 10:25:35 PM   rep_loss = 0.0
12/13 10:26:06 PM ***** Running evaluation *****
12/13 10:26:06 PM   Epoch = 9 iter 4999 step
12/13 10:26:06 PM   Num examples = 277
12/13 10:26:06 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:26:07 PM ***** Eval results *****
12/13 10:26:07 PM   acc = 0.5740072202166066
12/13 10:26:07 PM   att_loss = 0.0
12/13 10:26:07 PM   cls_loss = 0.286456486390483
12/13 10:26:07 PM   eval_loss = 0.6889664861891005
12/13 10:26:07 PM   global_step = 4999
12/13 10:26:07 PM   loss = 0.286456486390483
12/13 10:26:07 PM   rep_loss = 0.0
12/13 10:26:38 PM ***** Running evaluation *****
12/13 10:26:38 PM   Epoch = 9 iter 5099 step
12/13 10:26:38 PM   Num examples = 277
12/13 10:26:38 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:26:38 PM ***** Eval results *****
12/13 10:26:38 PM   acc = 0.5631768953068592
12/13 10:26:38 PM   att_loss = 0.0
12/13 10:26:38 PM   cls_loss = 0.28596234670499476
12/13 10:26:38 PM   eval_loss = 0.692317869928148
12/13 10:26:38 PM   global_step = 5099
12/13 10:26:38 PM   loss = 0.28596234670499476
12/13 10:26:38 PM   rep_loss = 0.0
12/13 10:27:09 PM ***** Running evaluation *****
12/13 10:27:09 PM   Epoch = 9 iter 5199 step
12/13 10:27:09 PM   Num examples = 277
12/13 10:27:09 PM   Batch size = 32


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:27:10 PM ***** Eval results *****
12/13 10:27:10 PM   acc = 0.5703971119133574
12/13 10:27:10 PM   att_loss = 0.0
12/13 10:27:10 PM   cls_loss = 0.2860311024329242
12/13 10:27:10 PM   eval_loss = 0.6890967157151964
12/13 10:27:10 PM   global_step = 5199
12/13 10:27:10 PM   loss = 0.2860311024329242
12/13 10:27:10 PM   rep_loss = 0.0


In [9]:
arguments = ["task_distill_soumyajit.py"] + getArguments(evaluation)
sys.argv = arguments

print(f"Arguments passed\n{arguments}")
task_distill_soumyajit.main()

Arguments passed
['task_distill_soumyajit.py', '--student_model=models/TinyBERT-RTE-(fast)-(0.0)/step2', '--data_dir=data/glue_data/RTE', '--task_name=rte', '--output_dir=models/TinyBERT-RTE-(fast)-(0.0)/eval', '--eval_batch_size=32', '--max_seq_length=128', '--do_lower_case', '--do_eval']
12/13 10:27:13 PM The args: Namespace(data_dir='data/glue_data/RTE', teacher_model=None, student_model='models/TinyBERT-RTE-(fast)-(0.0)/step2', task_name='rte', output_dir='models/TinyBERT-RTE-(fast)-(0.0)/eval', cache_dir='', max_seq_length=128, do_eval=True, do_lower_case=True, train_batch_size=32, eval_batch_size=32, learning_rate=5e-05, weight_decay=0.0001, num_train_epochs=3.0, warmup_proportion=0.1, no_cuda=False, seed=42, gradient_accumulation_steps=1, aug_train=False, aug_mode='slow', fast_aug=0.0, eval_step=50, pred_distill=False, data_url='', temperature=1.0)
12/13 10:27:13 PM device: cuda n_gpu: 1
12/13 10:27:13 PM Writing example 0 of 277
12/13 10:27:13 PM *** Example ***
12/13 10:27:13 

Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

12/13 10:27:14 PM ***** Eval results *****
12/13 10:27:14 PM   acc = 0.6353790613718412
12/13 10:27:14 PM   eval_loss = 0.6559031208356222


In [11]:
print("DONE")

DONE
