## Define new problem type and data reading function

We'll use IMDB dataset as example

In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
cd ../

/data3/yjp/bert-multitask-learning


In [3]:
from bert_multitask_learning import (get_or_make_label_encoder, FullTokenizer, 
                                     create_single_problem_generator, train_bert_multitask, 
                                     eval_bert_multitask, DynamicBatchSizeParams, TRAIN, EVAL, PREDICT)
import pickle

In [4]:
new_problem_type = {'imdb_cls': 'cls'}

def imdb_cls(params, mode):
    tokenizer = FullTokenizer(vocab_file=params.vocab_file)
    
    # get data
    (train_data, train_labels), (test_data, test_labels) = keras.datasets.imdb.load_data(num_words=10000)
    label_encoder = get_or_make_label_encoder(params, 'imdb_cls', mode, train_labels+test_labels)
    word_to_id = keras.datasets.imdb.get_word_index()
    index_from=3
    word_to_id = {k:(v+index_from) for k,v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2
    id_to_word = {value:key for key,value in word_to_id.items()}

    train_data = [[id_to_word[i] for i in sentence] for sentence in train_data]
    test_data = [[id_to_word[i] for i in sentence] for sentence in test_data]
    
    if mode == TRAIN:
        input_list = train_data
        target_list = train_labels
    else:
        input_list = test_data
        target_list = test_labels
    
    if mode == PREDICT:
        return input_list, target_list, label_encoder
        
    return create_single_problem_generator('imdb_cls', input_list, target_list, label_encoder, params, tokenizer, mode)

new_problem_process_fn_dict = {'imdb_cls': imdb_cls}
    

## Train Model

Please make sure you're using the correct checkpoint to initialize model.

In [5]:
params = DynamicBatchSizeParams()
params.init_checkpoint = 'models/cased_L-12_H-768_A-12'
tf.logging.set_verbosity(tf.logging.DEBUG)
train_bert_multitask(problem='imdb_cls', num_gpus=1, 
                     num_epochs=10, params=params, 
                     problem_type_dict=new_problem_type, processing_fn_dict=new_problem_process_fn_dict)

Adding new problem imdb_cls, problem type: cls
INFO:tensorflow:Device is available but not used by distribute strategy: /device:CPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_CPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:1
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:2
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:3
INFO:tensorflow:Device is available but not used by distribute strategy: /device:GPU:1
INFO:tensorflow:Device is available but not used by distribute strategy: /device:GPU:2
INFO:tensorflow:Device is available but not used by distribute strategy: /device:GPU:3
INFO:tensorflow:Configured nccl all-reduce.
INFO:tensorflow:Initializing RunConfig with distribution strategies.
INFO:tensorflow:Not using Distribute 

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:batch_all_reduce invoked for batches size = 201 with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
DEBUG:tensorflow:Initialize variable bert/embeddings/LayerNorm/beta:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/embeddings/LayerNorm/beta
DEBUG:tensorflow:Initialize variable bert/embeddings/LayerNorm/gamma:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/embeddings/LayerNorm/gamma
DEBUG:tensorflow:Initialize variable bert/embeddings/position_embeddings:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/embeddings/position_embeddings
DEBUG:tensorflow:Initialize variable bert/embeddings/token_type_embeddings:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/embeddings/token_type_embeddings
DEBUG:tensorflow:Initialize variable bert/embeddings/word_embeddings:0 from checkpoint models/cased_L-12_H-768_A-12 with

DEBUG:tensorflow:Initialize variable bert/encoder/layer_10/attention/self/query/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_10/attention/self/query/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_10/attention/self/value/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_10/attention/self/value/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_10/attention/self/value/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_10/attention/self/value/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_10/intermediate/dense/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_10/intermediate/dense/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_10/intermediate/dense/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_10/intermediate/dense/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_10/output/Layer

DEBUG:tensorflow:Initialize variable bert/encoder/layer_3/attention/self/key/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_3/attention/self/key/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_3/attention/self/key/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_3/attention/self/key/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_3/attention/self/query/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_3/attention/self/query/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_3/attention/self/query/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_3/attention/self/query/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_3/attention/self/value/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_3/attention/self/value/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_3/attention/self/value/kernel

DEBUG:tensorflow:Initialize variable bert/encoder/layer_6/attention/output/LayerNorm/gamma:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_6/attention/output/LayerNorm/gamma
DEBUG:tensorflow:Initialize variable bert/encoder/layer_6/attention/output/dense/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_6/attention/output/dense/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_6/attention/output/dense/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_6/attention/output/dense/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_6/attention/self/key/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_6/attention/self/key/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_6/attention/self/key/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_6/attention/self/key/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_6/atten

DEBUG:tensorflow:Initialize variable bert/encoder/layer_8/output/dense/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_8/output/dense/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_8/output/dense/kernel:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_8/output/dense/kernel
DEBUG:tensorflow:Initialize variable bert/encoder/layer_9/attention/output/LayerNorm/beta:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_9/attention/output/LayerNorm/beta
DEBUG:tensorflow:Initialize variable bert/encoder/layer_9/attention/output/LayerNorm/gamma:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_9/attention/output/LayerNorm/gamma
DEBUG:tensorflow:Initialize variable bert/encoder/layer_9/attention/output/dense/bias:0 from checkpoint models/cased_L-12_H-768_A-12 with bert/encoder/layer_9/attention/output/dense/bias
DEBUG:tensorflow:Initialize variable bert/encoder/layer_9/attention/output/dense/ke

DEBUG:tensorflow:*** Example ***
DEBUG:tensorflow:tokens: [CLS] < ##star ##t ##> big hair big b ##oo ##bs bad music and a giant safety pin these are the words to best describe this terrible movie i love ch ##ees ##y horror movies and i ##' ##ve seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an a ##bo ##mination the script is completely laugh ##able the best is the end show ##down with the cop and how he worked out who the killer is it ##' ##s just so damn terribly written the clothes are sick ##ening and funny in equal < ##unk ##> the hair is big lots of b ##oo ##bs < ##unk ##> men wear those cut < ##unk ##> shirts that show off their < ##unk ##> sick ##ening that men actually wore them and the music is just < ##unk ##> trash that plays over and over again in almost every scene there is trash ##y music b ##oo ##bs and < ##unk ##> taking away bodies and the gym still doesn ##' ##t close for < ##unk ##> all joking aside 

DEBUG:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
DEBUG:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 1992 1716 1992 171 5658 4832 2213 1390 1105 170 4994 3429 10473 1292 1132 1103 1734 1106 1436 5594 1142 6434 2523 178 1567 22572 8870 1183 5367 5558 1105 178 28131 2707 1562 5229 1133 1142 1125 1400 1106 1129 1113 1104 1103 4997 1518 1189 1103 4928 1110 2526 4240 1105 9944 1103 3176 1110 1126 170 4043 9204 1103 5444 1110 2423 4046 1895 1103 1436 1110 1103 1322 1437 5455 1114 1103 9947 1105 1293 1119 1589 1149 1150 1103 7010 1110 1122 28131 1116 1198 1177 4477 18049 1637 1103 3459 1132 4809 4777 1105 6276 1107 4463 133 12660 28144 1103 1716 1110 1992 7424 1104 171 5658 4832 133 12660 28144 1441 4330 1343 2195 133 12660 28144 11710 1115 1437 1228 1147 133 12660 28144 4809 4777 1115 1441 2140 4307 1172 1105 1103 1390 1110 1198 133 12660 28144 13151 1115 2399 1166 1105 1166 1254 1107 1593 1451 2741 1175 1110 13151 1183 1390 171 5658 4832 1105 133 12660 28144 1781 1283 3470 1105 1103 10759 1253 2144 28131 1204 1601 1111 133 12660 28144 11

DEBUG:tensorflow:imdb_cls_label_ids: 2
DEBUG:tensorflow:imdb_cls_label: 1
DEBUG:tensorflow:*** Example ***
DEBUG:tensorflow:tokens: [CLS] < ##star ##t ##> worst mistake of my life br br i picked this movie up at target for 5 because i figured hey it ##' ##s sand ##ler i can get some cheap laughs i was wrong completely wrong mid way through the film all three of my friends were asleep and i was still suffering worst plot worst script worst movie i have ever seen i wanted to hit my head up against a wall for an hour then i ##' ##d stop and you know why because it felt damn good upon b ##ashi ##ng my head in i stuck that damn movie in the < ##unk ##> and watched it burn and that felt better than anything else i ##' ##ve ever done it took am ##eric ##an ps ##ych ##o army of darkness and kill bill just to get over that crap i hate you sand ##ler for actually going through with this and ruin ##ing a whole day of my life [SEP]
DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 4997 6223 110

DEBUG:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
DEBUG:tensorflow:imdb_cls_label_ids: 2
DEBUG:tensorflow:imdb_cls_label: 1
DEBUG:tensorflow:*** Example ***
DEBUG:tensorflow:tokens: [CLS] < ##star ##t ##> big hair big b ##oo ##bs bad music and a giant safety pin these are the words to best describe this terrible movie i love ch ##ees ##y horror movies and i ##' ##ve seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an a ##bo ##mination the script is com

DEBUG:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
DEBUG:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 1142 1273 1108 1198 8431 9616 2450 19335 1642 2447 2490 28131 1116 1541 10891 1103 1226 1152 1307 1105 1128 1180 1198 5403 1217 1175 11580 3740 133 12660 28144 1110 1126 6929 2811 1105 1208 1103 1269 1217 1900 133 12660 28144 1401 1338 1121 1103 1269 188 11627 2944 2248 1112 1991 1177 178 3097 1103 1864 1175 1108 170 1842 3797 1114 1142 1273 1103 20787 2340 13570 2032 1103 1273 1127 1632 1122 1108 1198 8431 1177 1277 1115 178 3306 1103 1273 1112 1770 1112 1122 1108 1308 1111 133 12660 28144 1105 1156 18029 1122 1106 2490 1106 2824 1105 1103 4689 5339 1108 6929 1541 6104 1120 1103 1322 1122 1108 1177 6782 1105 1128 1221 1184 1152 1474 1191 1128 5354 1120 170 1273 1122 1538 1138 1151 1363 1105 1142 5397 1108 1145 133 12660 28144 1106 1103 1160 1376 2298 28131 1116 1115 1307 1103 133 12660 28144 1104 18570 1389 1105 185 18318 1152 1127 1198 8431 1482 1132 1510 1286 1149 1104 1103 133 12660 28144 2190 178 1341 1272 1103 2940 1115 1505 11

DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 1103 133 12660 28144 133 12660 28144 1120 25514 1103 2361 3271 1242 1201 1170 1103 1856 178 1169 1253 1267 1107 1139 133 12660 28144 2552 1126 9808 5141 1139 1910 28131 1116 1534 1231 7854 1979 1103 2321 1104 133 12660 28144 1131 2228 1103 2650 1435 3534 1123 7615 1110 1115 1104 1126 2552 7737 1141 1106 1103 1958 1113 1103 133 12660 28144 3208 1324 170 2837 1137 1177 1121 1187 1131 2491 9304 9304 1104 1736 1122 2171 1242 1201 1196 1131 1108 1255 1133 1128 2010 28131 1204 3319 1121 1103 1236 1131 3301 1122 1103 1269 1642 1110 1500 1107 6668 1103 2251 1105 133 12660 28144 1104 188 18982 1931 1112 178 6352 1122 1114 170 1910 1141 1480 1107 133 12660 28144 170 1469 2195 1107 1106 1660 1117 1683 1103 6145 1598 1106 5134 1159 9304 9304 2801 2085 1205 1176 1142 1561 1226 1104 1412 1217 1150 2144 28131 1204 2676 1103 2801 1412 2153 1500 1366 1165 1195 1127 1482 1152 1561 1412 10062 1362 1105 1112 1195 4328 2214 1152 2654 1253 2867 1112 7670 

INFO:tensorflow:loss = 0.008170923, step = 3700 (55.181 sec)
INFO:tensorflow:imdb_cls_loss = 0.008170923, learning_rate = 1.0527394e-05, total_training_steps = 7812 (55.189 sec)
INFO:tensorflow:global_step/sec: 1.81638
INFO:tensorflow:loss = 0.08452615, step = 3800 (55.060 sec)
INFO:tensorflow:imdb_cls_loss = 0.08452615, learning_rate = 1.02713775e-05, total_training_steps = 7812 (55.060 sec)
INFO:tensorflow:global_step/sec: 1.80878
INFO:tensorflow:loss = 0.0048134914, step = 3900 (55.273 sec)
INFO:tensorflow:imdb_cls_loss = 0.0048134914, learning_rate = 1.0015361e-05, total_training_steps = 7812 (55.274 sec)
INFO:tensorflow:global_step/sec: 1.81287
INFO:tensorflow:loss = 0.0016526552, step = 4000 (55.177 sec)
INFO:tensorflow:imdb_cls_loss = 0.0016526552, learning_rate = 9.759345e-06, total_training_steps = 7812 (55.175 sec)
DEBUG:tensorflow:*** Example ***
DEBUG:tensorflow:tokens: [CLS] < ##star ##t ##> this film was just brilliant casting location scenery story direction everyone ##'

DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 1142 1144 1106 1129 1141 1104 1103 4997 2441 1104 1103 3281 1165 1139 2053 178 1127 2903 1142 1273 1217 1103 4010 3703 1122 1108 5850 1120 1195 1198 2068 2542 1103 1148 1544 1126 2396 1114 1412 19946 6893 1103 1837 1120 1293 2213 1122 1541 1108 1103 1832 1104 1103 1159 2490 1950 1107 1103 4041 1198 1408 2520 1106 1296 1168 2128 1137 2412 6675 1154 1147 3618 13433 1115 1152 2140 3004 1948 1152 1125 133 12660 28144 1684 1106 2824 1142 7216 2165 9107 1111 170 1273 1122 1538 1138 1350 1176 170 1632 1911 1113 2526 1133 1113 1273 1122 2736 1176 1185 1141 1107 1103 1273 1144 170 9956 1184 1110 1280 1113 11074 3176 11074 13452 178 1169 28131 1204 1243 1506 1293 133 12660 28144 1142 1110 1106 2824 3277 3739 1126 2396 170 2113 1104 1240 1297 102
DEBUG:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

INFO:tensorflow:global_step/sec: 1.7186
INFO:tensorflow:loss = 0.002199422, step = 4200 (58.194 sec)
INFO:tensorflow:imdb_cls_loss = 0.002199422, learning_rate = 9.247311e-06, total_training_steps = 7812 (58.189 sec)
INFO:tensorflow:global_step/sec: 1.81065
INFO:tensorflow:loss = 0.005737393, step = 4300 (55.240 sec)
INFO:tensorflow:imdb_cls_loss = 0.005737393, learning_rate = 8.991295e-06, total_training_steps = 7812 (55.245 sec)
INFO:tensorflow:global_step/sec: 1.81954
INFO:tensorflow:loss = 0.004064858, step = 4400 (54.957 sec)
INFO:tensorflow:imdb_cls_loss = 0.004064858, learning_rate = 8.735278e-06, total_training_steps = 7812 (54.957 sec)
INFO:tensorflow:global_step/sec: 1.82059
INFO:tensorflow:loss = 0.4254811, step = 4500 (54.926 sec)
INFO:tensorflow:imdb_cls_loss = 0.4254811, learning_rate = 8.479263e-06, total_training_steps = 7812 (54.921 sec)
INFO:tensorflow:global_step/sec: 1.817
INFO:tensorflow:loss = 0.1292316, step = 4600 (55.015 sec)
INFO:tensorflow:imdb_cls_loss = 0.1

DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 1992 1716 1992 171 5658 4832 2213 1390 1105 170 4994 3429 10473 1292 1132 1103 1734 1106 1436 5594 1142 6434 2523 178 1567 22572 8870 1183 5367 5558 1105 178 28131 2707 1562 5229 1133 1142 1125 1400 1106 1129 1113 1104 1103 4997 1518 1189 1103 4928 1110 2526 4240 1105 9944 1103 3176 1110 1126 170 4043 9204 1103 5444 1110 2423 4046 1895 1103 1436 1110 1103 1322 1437 5455 1114 1103 9947 1105 1293 1119 1589 1149 1150 1103 7010 1110 1122 28131 1116 1198 1177 4477 18049 1637 1103 3459 1132 4809 4777 1105 6276 1107 4463 133 12660 28144 1103 1716 1110 1992 7424 1104 171 5658 4832 133 12660 28144 1441 4330 1343 2195 133 12660 28144 11710 1115 1437 1228 1147 133 12660 28144 4809 4777 1115 1441 2140 4307 1172 1105 1103 1390 1110 1198 133 12660 28144 13151 1115 2399 1166 1105 1166 1254 1107 1593 1451 2741 1175 1110 13151 1183 1390 171 5658 4832 1105 133 12660 28144 1781 1283 3470 1105 1103 10759 1253 2144 28131 1204 1601 1111 133 12660 28144 11

DEBUG:tensorflow:imdb_cls_label_ids: 2
DEBUG:tensorflow:imdb_cls_label: 1
DEBUG:tensorflow:*** Example ***
DEBUG:tensorflow:tokens: [CLS] < ##star ##t ##> worst mistake of my life br br i picked this movie up at target for 5 because i figured hey it ##' ##s sand ##ler i can get some cheap laughs i was wrong completely wrong mid way through the film all three of my friends were asleep and i was still suffering worst plot worst script worst movie i have ever seen i wanted to hit my head up against a wall for an hour then i ##' ##d stop and you know why because it felt damn good upon b ##ashi ##ng my head in i stuck that damn movie in the < ##unk ##> and watched it burn and that felt better than anything else i ##' ##ve ever done it took am ##eric ##an ps ##ych ##o army of darkness and kill bill just to get over that crap i hate you sand ##ler for actually going through with this and ruin ##ing a whole day of my life [SEP]
DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 4997 6223 110

DEBUG:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
DEBUG:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

DEBUG:tensorflow:input_ids: 101 133 10058 1204 28144 1103 133 12660 28144 133 12660 28144 1120 25514 1103 2361 3271 1242 1201 1170 1103 1856 178 1169 1253 1267 1107 1139 133 12660 28144 2552 1126 9808 5141 1139 1910 28131 1116 1534 1231 7854 1979 1103 2321 1104 133 12660 28144 1131 2228 1103 2650 1435 3534 1123 7615 1110 1115 1104 1126 2552 7737 1141 1106 1103 1958 1113 1103 133 12660 28144 3208 1324 170 2837 1137 1177 1121 1187 1131 2491 9304 9304 1104 1736 1122 2171 1242 1201 1196 1131 1108 1255 1133 1128 2010 28131 1204 3319 1121 1103 1236 1131 3301 1122 1103 1269 1642 1110 1500 1107 6668 1103 2251 1105 133 12660 28144 1104 188 18982 1931 1112 178 6352 1122 1114 170 1910 1141 1480 1107 133 12660 28144 170 1469 2195 1107 1106 1660 1117 1683 1103 6145 1598 1106 5134 1159 9304 9304 2801 2085 1205 1176 1142 1561 1226 1104 1412 1217 1150 2144 28131 1204 2676 1103 2801 1412 2153 1500 1366 1165 1195 1127 1482 1152 1561 1412 10062 1362 1105 1112 1195 4328 2214 1152 2654 1253 2867 1112 7670 

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7fcf88779be0>

## Evaluate Model


In [6]:
print(eval_bert_multitask(problem='imdb_cls', num_gpus=1, 
                     params=params, eval_scheme='acc',
                     problem_type_dict=new_problem_type, processing_fn_dict=new_problem_process_fn_dict))

Params problem assigned. Problem list: ['imdb_cls']
INFO:tensorflow:Device is available but not used by distribute strategy: /device:CPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_CPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:1
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:2
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:3
INFO:tensorflow:Device is available but not used by distribute strategy: /device:GPU:1
INFO:tensorflow:Device is available but not used by distribute strategy: /device:GPU:2
INFO:tensorflow:Device is available but not used by distribute strategy: /device:GPU:3
INFO:tensorflow:Configured nccl all-reduce.
INFO:tensorflow:Initializing RunConfig with distribution strategies.
INFO:tensorflow:Not using Distri

Processing Inputs: 100%|██████████| 25000/25000 [04:53<00:00, 85.21it/s] 


{'imdb_cls_Accuracy': 0.91272, 'imdb_cls_Accuracy Per Sequence': 0.91272}
