In [1]:
import os
import sys
import collections
import csv
import pandas as pd
import numpy as np
import tensorflow as tf
import pandas as pd
import numpy as np
import time

print(tf.__version__)

# BERT files
os.listdir("../bert-master")
sys.path.insert(0, '../bert-master')

from run_classifier import *
import modeling
import optimization
import tokenization

1.15.0



In [27]:
data_dir = './data'
output_dir = './output'

if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [21]:
train=pd.read_csv('../input/train.csv')
test=pd.read_csv('../input/test.csv')

# remove new lines etc.

train['comment_text'] = train['comment_text'].replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\n',  ' ', regex=True)
test['comment_text'] = test['comment_text'].replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\n',  ' ', regex=True)

# force train into cola format, test is fine as it is

train['dummy_1'] = 'meh'
train['dummy_2'] = '*'

train = train[['dummy_1','target','dummy_2','comment_text']]
train['target'] = np.where(train['target']>=0.5,1,0)

train = train.sample(frac=0.01)
test = test.sample(frac=0.01)

# export as tab seperated

train.to_csv(data_dir + '/train.tsv', sep='\t', index=False, header=False)
test.to_csv(data_dir + '/test.tsv', sep='\t', index=False, header=True)

In [28]:
task_name = 'cola'
bert_config_file = '../input/bert_config.json'
vocab_file = '../input/vocab.txt'
init_checkpoint = '../input/bert_model.ckpt'
do_lower_case = True
max_seq_length = 72
do_train = True
do_eval = False
do_predict = False
train_batch_size = 32
eval_batch_size = 32
predict_batch_size = 32
learning_rate = 2e-5 
num_train_epochs = 1.0
warmup_proportion = 0.1
use_tpu = False
master = None
save_checkpoints_steps = 99999999 # <----- don't want to save any checkpoints
iterations_per_loop = 1000
num_tpu_cores = 8
tpu_cluster_resolver = None

In [29]:
start = time.time()
print("--------------------------------------------------------")
print("Starting training ...")
print("--------------------------------------------------------")

bert_config = modeling.BertConfig.from_json_file(bert_config_file)

processor = ColaProcessor()
label_list = processor.get_labels()

tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)

tpu_cluster_resolver = None
is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

run_config = tf.contrib.tpu.RunConfig(
  cluster=tpu_cluster_resolver,
  master=master,
  model_dir=output_dir,
  save_checkpoints_steps=save_checkpoints_steps,
  tpu_config=tf.contrib.tpu.TPUConfig(
      iterations_per_loop=iterations_per_loop,
      num_shards=num_tpu_cores,
      per_host_input_for_training=is_per_host))

train_examples = processor.get_train_examples(data_dir)
num_train_steps = int(len(train_examples) / train_batch_size * num_train_epochs)
num_warmup_steps = int(num_train_steps * warmup_proportion)

model_fn = model_fn_builder(
      bert_config=bert_config,
      num_labels=len(label_list),
      init_checkpoint=init_checkpoint,
      learning_rate=learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=use_tpu,
      use_one_hot_embeddings=use_tpu)

estimator = tf.contrib.tpu.TPUEstimator(
      use_tpu=use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=train_batch_size)
      
      
train_file = os.path.join(output_dir, "train.tf_record")

file_based_convert_examples_to_features(
    train_examples, label_list, max_seq_length, tokenizer, train_file)

tf.logging.info("***** Running training *****")
tf.logging.info("  Num examples = %d", len(train_examples))
tf.logging.info("  Batch size = %d", train_batch_size)
tf.logging.info("  Num steps = %d", num_train_steps)

train_input_fn = file_based_input_fn_builder(
    input_file=train_file,
    seq_length=max_seq_length,
    is_training=True,
    drop_remainder=True)
    
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

end = time.time()
print("--------------------------------------------------------")
print("Training complete in ", end - start, " seconds")
print("--------------------------------------------------------")

--------------------------------------------------------
Starting training ...
--------------------------------------------------------
INFO:tensorflow:Using config: {'_model_dir': './output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 99999999, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000029BABEA0488>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_

INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/key/bias:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/value/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/value/bias:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/output/dense/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder

INFO:tensorflow:examples/sec: 242.501
INFO:tensorflow:global_step/sec: 7.81502
INFO:tensorflow:examples/sec: 250.081
INFO:tensorflow:global_step/sec: 7.67489
INFO:tensorflow:examples/sec: 245.596
INFO:tensorflow:global_step/sec: 8.33596
INFO:tensorflow:examples/sec: 266.751
INFO:tensorflow:global_step/sec: 8.19935
INFO:tensorflow:examples/sec: 262.379
INFO:tensorflow:global_step/sec: 8.13266
INFO:tensorflow:examples/sec: 260.245
INFO:tensorflow:global_step/sec: 8.54975
INFO:tensorflow:examples/sec: 273.592
INFO:tensorflow:global_step/sec: 7.75441
INFO:tensorflow:examples/sec: 248.141
INFO:tensorflow:global_step/sec: 7.87652
INFO:tensorflow:examples/sec: 252.049
INFO:tensorflow:global_step/sec: 8.06709
INFO:tensorflow:examples/sec: 258.147
INFO:tensorflow:global_step/sec: 7.97835
INFO:tensorflow:examples/sec: 255.307
INFO:tensorflow:global_step/sec: 7.93905
INFO:tensorflow:examples/sec: 254.05
INFO:tensorflow:global_step/sec: 7.3016
INFO:tensorflow:examples/sec: 233.651
INFO:tensorflow:

INFO:tensorflow:examples/sec: 247.262
INFO:tensorflow:global_step/sec: 7.75443
INFO:tensorflow:examples/sec: 248.142
INFO:tensorflow:global_step/sec: 7.81499
INFO:tensorflow:examples/sec: 250.08
INFO:tensorflow:global_step/sec: 7.81502
INFO:tensorflow:examples/sec: 250.081
INFO:tensorflow:global_step/sec: 8.13266
INFO:tensorflow:examples/sec: 260.245
INFO:tensorflow:global_step/sec: 7.40978
INFO:tensorflow:examples/sec: 237.113
INFO:tensorflow:global_step/sec: 7.93905
INFO:tensorflow:examples/sec: 254.05
INFO:tensorflow:global_step/sec: 8.1993
INFO:tensorflow:examples/sec: 262.378
INFO:tensorflow:global_step/sec: 7.96382
INFO:tensorflow:examples/sec: 254.842
INFO:tensorflow:global_step/sec: 8.13267
INFO:tensorflow:examples/sec: 260.246
INFO:tensorflow:global_step/sec: 7.93905
INFO:tensorflow:examples/sec: 254.05
INFO:tensorflow:global_step/sec: 8.13267
INFO:tensorflow:examples/sec: 260.246
INFO:tensorflow:global_step/sec: 8.13269
INFO:tensorflow:examples/sec: 260.246
INFO:tensorflow:gl

INFO:tensorflow:examples/sec: 242.502
INFO:tensorflow:global_step/sec: 7.87654
INFO:tensorflow:examples/sec: 252.049
INFO:tensorflow:global_step/sec: 7.97314
INFO:tensorflow:examples/sec: 255.141
INFO:tensorflow:global_step/sec: 8.19933
INFO:tensorflow:examples/sec: 262.379
INFO:tensorflow:global_step/sec: 7.87654
INFO:tensorflow:examples/sec: 252.049
INFO:tensorflow:global_step/sec: 7.69474
INFO:tensorflow:examples/sec: 246.232
INFO:tensorflow:global_step/sec: 7.57818
INFO:tensorflow:examples/sec: 242.502
INFO:tensorflow:global_step/sec: 7.46506
INFO:tensorflow:examples/sec: 238.882
INFO:tensorflow:global_step/sec: 7.57818
INFO:tensorflow:examples/sec: 242.502
INFO:tensorflow:global_step/sec: 7.81499
INFO:tensorflow:examples/sec: 250.08
INFO:tensorflow:global_step/sec: 7.61349
INFO:tensorflow:examples/sec: 243.632
INFO:tensorflow:global_step/sec: 8.19933
INFO:tensorflow:examples/sec: 262.379
INFO:tensorflow:global_step/sec: 8.0671
INFO:tensorflow:examples/sec: 258.147
INFO:tensorflow:

INFO:tensorflow:examples/sec: 262.379
INFO:tensorflow:global_step/sec: 8.3137
INFO:tensorflow:examples/sec: 266.038
INFO:tensorflow:global_step/sec: 8.40603
INFO:tensorflow:examples/sec: 268.993
INFO:tensorflow:global_step/sec: 8.26711
INFO:tensorflow:examples/sec: 264.548
INFO:tensorflow:global_step/sec: 7.81499
INFO:tensorflow:examples/sec: 250.08
INFO:tensorflow:global_step/sec: 8.13147
INFO:tensorflow:examples/sec: 260.207
INFO:tensorflow:global_step/sec: 8.00255
INFO:tensorflow:examples/sec: 256.082
INFO:tensorflow:global_step/sec: 8.19932
INFO:tensorflow:examples/sec: 262.378
INFO:tensorflow:global_step/sec: 8.06709
INFO:tensorflow:examples/sec: 258.147
INFO:tensorflow:global_step/sec: 8.13269
INFO:tensorflow:examples/sec: 260.246
INFO:tensorflow:global_step/sec: 8.10456
INFO:tensorflow:examples/sec: 259.346
INFO:tensorflow:global_step/sec: 8.00255
INFO:tensorflow:examples/sec: 256.082
INFO:tensorflow:global_step/sec: 8.2671
INFO:tensorflow:examples/sec: 264.547
INFO:tensorflow:g

INFO:tensorflow:global_step/sec: 8.24134
INFO:tensorflow:examples/sec: 263.723
INFO:tensorflow:global_step/sec: 8.0671
INFO:tensorflow:examples/sec: 258.147
INFO:tensorflow:global_step/sec: 7.75442
INFO:tensorflow:examples/sec: 248.141
INFO:tensorflow:global_step/sec: 7.52119
INFO:tensorflow:examples/sec: 240.678
INFO:tensorflow:global_step/sec: 8.40606
INFO:tensorflow:examples/sec: 268.994
INFO:tensorflow:global_step/sec: 7.87651
INFO:tensorflow:examples/sec: 252.048
INFO:tensorflow:global_step/sec: 7.75438
INFO:tensorflow:examples/sec: 248.14
INFO:tensorflow:global_step/sec: 7.35532
INFO:tensorflow:examples/sec: 235.37
INFO:tensorflow:global_step/sec: 7.73356
INFO:tensorflow:examples/sec: 247.474
INFO:tensorflow:global_step/sec: 7.815
INFO:tensorflow:examples/sec: 250.08
INFO:tensorflow:global_step/sec: 7.636
INFO:tensorflow:examples/sec: 244.352
INFO:tensorflow:global_step/sec: 7.87655
INFO:tensorflow:examples/sec: 252.05
INFO:tensorflow:global_step/sec: 8.26707
INFO:tensorflow:exam

INFO:tensorflow:examples/sec: 262.379
INFO:tensorflow:global_step/sec: 8.10339
INFO:tensorflow:examples/sec: 259.308
INFO:tensorflow:Saving checkpoints for 564 into ./output\model.ckpt.
INFO:tensorflow:Loss for final step: 0.31889284.
INFO:tensorflow:training_loop marked as finished
--------------------------------------------------------
Training complete in  141.7216010093689  seconds
--------------------------------------------------------


In [30]:
def file_based_input_fn_builder(input_file, seq_length, is_training,
                                drop_remainder):
  """Creates an `input_fn` closure to be passed to TPUEstimator."""

  name_to_features = {
      "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
      "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
      "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
      "label_ids": tf.FixedLenFeature([], tf.int64),
      "is_real_example": tf.FixedLenFeature([], tf.int64),
  }

  def _decode_record(record, name_to_features):
    """Decodes a record to a TensorFlow example."""
    example = tf.parse_single_example(record, name_to_features)

    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
    # So cast all int64 to int32.
    for name in list(example.keys()):
      t = example[name]
      if t.dtype == tf.int64:
        t = tf.to_int32(t)
      example[name] = t

    return example

  def input_fn(params):
    """The actual input function."""
    
    #batch_size = params["batch_size"]
    batch_size = 64 # <----- hardcoded batch_size added here 
    
    # For training, we want a lot of parallel reading and shuffling.
    # For eval, we want no shuffling and parallel reading doesn't matter.
    d = tf.data.TFRecordDataset(input_file)
    if is_training:
      d = d.repeat()
      d = d.shuffle(buffer_size=100)

    d = d.apply(
        tf.contrib.data.map_and_batch(
            lambda record: _decode_record(record, name_to_features),
            batch_size=batch_size,
            drop_remainder=drop_remainder))

    return d

  return input_fn

In [31]:
start = time.time()
print("--------------------------------------------------------")
print("Starting inference ...")
print("--------------------------------------------------------")

predict_examples = processor.get_test_examples(data_dir)
num_actual_predict_examples = len(predict_examples)

predict_file = os.path.join(output_dir, "predict.tf_record")

file_based_convert_examples_to_features(predict_examples, label_list,
                                        max_seq_length, tokenizer,
                                        predict_file)

tf.logging.info("***** Running prediction*****")
tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                len(predict_examples), num_actual_predict_examples,
                len(predict_examples) - num_actual_predict_examples)
tf.logging.info("  Batch size = %d", predict_batch_size)

predict_drop_remainder = True if use_tpu else False
predict_input_fn = file_based_input_fn_builder(
    input_file=predict_file,
    seq_length=max_seq_length,
    is_training=False,
    drop_remainder=predict_drop_remainder)

result = estimator.predict(input_fn=predict_input_fn)

output_predict_file = os.path.join(output_dir, "test_results.tsv")

with tf.gfile.GFile(output_predict_file, "w") as writer:
    num_written_lines = 0
    tf.logging.info("***** Predict results *****")
    for (i, prediction) in enumerate(result):
        probabilities = prediction["probabilities"]
        if i >= num_actual_predict_examples:
            break
        output_line = "\t".join(
            str(class_probability)
            for class_probability in probabilities) + "\n"
        writer.write(output_line)
        num_written_lines += 1
        
end = time.time()
print("--------------------------------------------------------")
print("Inference complete in ", end - start, " seconds")
print("--------------------------------------------------------")

--------------------------------------------------------
Starting inference ...
--------------------------------------------------------
INFO:tensorflow:Writing example 0 of 973
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: test-1
INFO:tensorflow:tokens: [CLS] " the " " fence " " as you called it was put up in the 12 - 13th century because , in part , the church got tired of losing property and sheep to the oldest son of married priests whose parishes had grown in land and livestock through the sale of ind ##ul ##gence ##s . after the priest died much of the land and goods went to [SEP]
INFO:tensorflow:input_ids: 101 1000 1996 1000 1000 8638 1000 1000 2004 2017 2170 2009 2001 2404 2039 1999 1996 2260 1011 6122 2301 2138 1010 1999 2112 1010 1996 2277 2288 5458 1997 3974 3200 1998 8351 2000 1996 4587 2365 1997 2496 8656 3005 11600 2018 4961 1999 2455 1998 11468 2083 1996 5096 1997 27427 5313 17905 2015 1012 2044 1996 5011 2351 2172 1997 1996 2455 1998 5350 2253 2000 102
INFO:tens

INFO:tensorflow:  name = bert/encoder/layer_0/output/LayerNorm/beta:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/LayerNorm/gamma:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/query/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/query/bias:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/key/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/key/bias:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/value/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/value/bias:0, shape = (128,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/output/dense/kernel:0, shape = (128, 128), *INIT_FROM_CKPT*
INFO:tensorfl