Useful reading:  
The illustrated transformer: http://jalammar.github.io/illustrated-transformer/  
Attention is all you need: https://arxiv.org/pdf/1706.03762.pdf  
BERT: https://arxiv.org/pdf/1810.04805.pdf  
A Visual Guide to Using BERT for the First Time: http://jalammar.github.io/a-visual-guide-to-using-bert-for-the-first-time/  
Tensorflow hub: https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2

In [21]:
import pandas as pd
from bert import tokenization
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping

In [12]:
train_df = pd.read_csv("nlp-getting-started/train.csv")
test_df = pd.read_csv("nlp-getting-started/test.csv")

In [13]:
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2", trainable = True)
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)

In [17]:
def encode_text(text_input):
    for text in text_input:
        input_ids = []
        sequence_ids = []
        attention_mask_ids = []
        input_tokens = tokenizer.tokenize(text)
        input_tokens = ['[CLS]'] + input_tokens + ['[SEP]']
        padding_len = max_len - len(input_tokens)
        token_ids = tokenizer.convert_tokens_to_ids(input_tokens)
        token_ids += [0] * padding_len
        attention_mask = [1] * len(input_tokens) + [0] * padding_len
        sequence = [1] * max_len

        attention_mask_ids.append(attention_mask)
        sequence_ids.append(sequence)
        input_ids.append(token_ids)
    
        sequence_ids = np.array(sequence_ids)
        input_ids = np.array(input_ids)
        attention_mask_ids = np.array(attention_mask_ids)
        return input_ids, attention_mask_ids, sequence_ids

In [18]:
max_len = 512
labels = np.array(train_df.target)
train_input = encode_text(train_df.text)
test_input = encode_text(test_df.text)

In [21]:
max_seq_length = 512
input_tokens_ids = Input(shape=(max_seq_length,), dtype=tf.int32, name="input_word_ids")
attention_mask_ids= Input(shape=(max_seq_length,), dtype=tf.int32, name="input_mask")  
sequence_ids = Input(shape=(max_seq_length,), dtype=tf.int32, name="segment_ids")
inputs = [input_tokens_ids, attention_mask_ids, sequence_ids]
pooled_output, sequence_output = bert_layer(inputs)
outputs = Dense(1, activation = 'sigmoid')(sequence_output[:, 0, :])
model = Model(inputs=inputs, outputs=outputs)
opt = keras.optimizers.Adam(learning_rate = 0.01)
model.compile(optimizer = opt, loss = 'binary_crossentropy', metric = 'accuracy')

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_word_ids (InputLayer)     [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_mask (InputLayer)         [(None, 512)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 512)]        0                                            
__________________________________________________________________________________________________
keras_layer_1 (KerasLayer)      [(None, 768), (None, 109482241   input_word_ids[0][0]             
                                                                 input_mask[0][0]           

In [23]:
checkpoint_file_path = "model_checkpoint_weights/ weights.{epoch:02d}-{val_loss:.2f}.h5"
model_checkpoint = ModelCheckpoint(filepath = checkpoint_file_path, 
                                   save_weights_only = True,
                                   monitor = 'val_acc', 
                                   mode = 'max',
                                   save_best_only = True,
                                   verbose = True)
early_stopping = EarlyStopping(monitor="val_loss",
                                min_delta=0.01,
                                patience=3,
                                verbose=1,
                                mode="auto",
                                baseline=None,
                                restore_best_weights=True)

In [None]:
model.fit(input_data, labels, epochs = 10, batch_size = 32, callbacks = [model_checkpoint, early_stoppping], validation_split = 0.3)

Train on 7613 samples


In [None]:
weight_file = ""
model.load_weights(weight_file)
pred = model.predict(test_input)

In [20]:
kaggle_submission = pd.DataFrame(columns = ['target'])
kaggle_submission['target'] = pred.round().astype(int)
kaggle_submission.to_csv('kaggle_submission.csv', index = True)

NameError: name 'pred' is not defined