In [1]:
import tensorflow as tf
from bert import run_classifier,modeling
from datetime import datetime

In [2]:
DATA_DIR = "./DATA_DIR"
OUTPUT_DIR = "./OUTPUT_DIR"
VOCAB_FILE = "./BERT_BASE_DIR/vocab.txt"
BERT_CONFIG_FILE = "./BERT_BASE_DIR/bert_config.json"
INIT_CHECKPOINT = "./BERT_BASE_DIR/bert_model.ckpt"
TRAIN_FILE = "./DATA_DIR/train.tf_record"
TEST_FILE = "./DATA_DIR/test.tf_record"
MAX_SEQ_LENGTH = 128
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 10.0
WARMUP_PROPORTION = 0.1
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 50
NUM_TRAIN_STEPS = 12184
NUM_WARMUP_STEPS = 1218

In [6]:
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    bert_config = modeling.BertConfig.from_json_file(BERT_CONFIG_FILE)
    if MAX_SEQ_LENGTH > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model was only trained up to sequence length %d"
            % (MAX_SEQ_LENGTH, bert_config.max_position_embeddings))
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=None,
        master=None,
        model_dir=OUTPUT_DIR,
        save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=SAVE_SUMMARY_STEPS,
            num_shards=8,
            per_host_input_for_training=is_per_host))

    model_fn = run_classifier.model_fn_builder(
        bert_config=bert_config,
        num_labels=7,
        init_checkpoint=INIT_CHECKPOINT,
        learning_rate=LEARNING_RATE,
        num_train_steps=NUM_TRAIN_STEPS,
        num_warmup_steps=NUM_WARMUP_STEPS,
        use_tpu=False,
        use_one_hot_embeddings=False)

    estimator = tf.contrib.tpu.TPUEstimator(use_tpu=False,
                                            model_fn=model_fn,
                                            config=run_config,
                                            train_batch_size=BATCH_SIZE,
                                            eval_batch_size=8,
                                            predict_batch_size=8)

    train_input_fn = run_classifier.file_based_input_fn_builder(
        input_file=TRAIN_FILE,
        seq_length=MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=True)
    print("=============== begin to train ===============")
    current_time = datetime.now()
    estimator.train(input_fn=train_input_fn, max_steps=NUM_TRAIN_STEPS)
    print("=============== end of train ===============")
    print("Training took time ", datetime.now() - current_time)

In [7]:
tf.app.run()

INFO:tensorflow:Using config: {'_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 1, '_service': None, '_evaluation_master': '', '_master': '', '_task_type': 'worker', '_device_fn': None, '_num_ps_replicas': 0, '_task_id': 0, '_model_dir': './OUTPUT_DIR', '_save_summary_steps': 100, '_global_id_in_cluster': 0, '_tf_random_seed': None, '_tpu_config': TPUConfig(iterations_per_loop=50, num_shards=8, computation_shape=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None), '_save_checkpoints_secs': None, '_is_chief': True, '_train_distribute': None, '_save_checkpoints_steps': 500, '_session_config': None, '_cluster': None, '_log_step_count_steps': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fca8dba0860>, '_keep_checkpoint_max': 5}
INFO:tensorflow:_TPUContext: eval_on_tpu True
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running train on CPU
INFO:tensorflow:*** Features ***
INFO:tensorflow:  name 

INFO:tensorflow:Initialize variable bert/encoder/layer_10/attention/output/LayerNorm/beta:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_10/attention/output/LayerNorm/beta
INFO:tensorflow:Initialize variable bert/encoder/layer_10/attention/output/LayerNorm/gamma:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_10/attention/output/LayerNorm/gamma
INFO:tensorflow:Initialize variable bert/encoder/layer_10/attention/output/dense/bias:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_10/attention/output/dense/bias
INFO:tensorflow:Initialize variable bert/encoder/layer_10/attention/output/dense/kernel:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_10/attention/output/dense/kernel
INFO:tensorflow:Initialize variable bert/encoder/layer_10/attention/self/key/bias:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_10/attention/self/key/bias
INFO:tensorflow:Initialize vari

INFO:tensorflow:Initialize variable bert/encoder/layer_2/output/LayerNorm/beta:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_2/output/LayerNorm/beta
INFO:tensorflow:Initialize variable bert/encoder/layer_2/output/LayerNorm/gamma:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_2/output/LayerNorm/gamma
INFO:tensorflow:Initialize variable bert/encoder/layer_2/output/dense/bias:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_2/output/dense/bias
INFO:tensorflow:Initialize variable bert/encoder/layer_2/output/dense/kernel:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_2/output/dense/kernel
INFO:tensorflow:Initialize variable bert/encoder/layer_3/attention/output/LayerNorm/beta:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_3/attention/output/LayerNorm/beta
INFO:tensorflow:Initialize variable bert/encoder/layer_3/attention/output/LayerNorm/gamma:0 from checkpoi

INFO:tensorflow:Initialize variable bert/encoder/layer_5/attention/self/value/kernel:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_5/attention/self/value/kernel
INFO:tensorflow:Initialize variable bert/encoder/layer_5/intermediate/dense/bias:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_5/intermediate/dense/bias
INFO:tensorflow:Initialize variable bert/encoder/layer_5/intermediate/dense/kernel:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_5/intermediate/dense/kernel
INFO:tensorflow:Initialize variable bert/encoder/layer_5/output/LayerNorm/beta:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_5/output/LayerNorm/beta
INFO:tensorflow:Initialize variable bert/encoder/layer_5/output/LayerNorm/gamma:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_5/output/LayerNorm/gamma
INFO:tensorflow:Initialize variable bert/encoder/layer_5/output/dense/bias:0 from checkpo

INFO:tensorflow:Initialize variable bert/encoder/layer_8/attention/self/query/bias:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_8/attention/self/query/bias
INFO:tensorflow:Initialize variable bert/encoder/layer_8/attention/self/query/kernel:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_8/attention/self/query/kernel
INFO:tensorflow:Initialize variable bert/encoder/layer_8/attention/self/value/bias:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_8/attention/self/value/bias
INFO:tensorflow:Initialize variable bert/encoder/layer_8/attention/self/value/kernel:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_8/attention/self/value/kernel
INFO:tensorflow:Initialize variable bert/encoder/layer_8/intermediate/dense/bias:0 from checkpoint ./BERT_BASE_DIR/bert_model.ckpt with bert/encoder/layer_8/intermediate/dense/bias
INFO:tensorflow:Initialize variable bert/encoder/layer_8/intermediate/d

INFO:tensorflow:  name = bert/encoder/layer_1/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  nam

INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKP

INFO:tensorflow:  name = bert/encoder/layer_10/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
