In [1]:
import sys
import random
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras

!git clone https://github.com/kpe/bert-for-tf2.git
!pip install -r bert-for-tf2/requirements.txt
!pip install sentencepiece

sys.path.append("bert-for-tf2/")

import bert
from bert.model import BertModelLayer
from bert.loader import params_from_pretrained_ckpt, load_stock_weights
from bert.tokenization.bert_tokenization import FullTokenizer

from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

Cloning into 'bert-for-tf2'...
remote: Enumerating objects: 118, done.[K
remote: Counting objects: 100% (118/118), done.[K
remote: Compressing objects: 100% (82/82), done.[K
remote: Total 997 (delta 49), reused 78 (delta 24), pack-reused 879[K
Receiving objects: 100% (997/997), 295.08 KiB | 443.00 KiB/s, done.
Resolving deltas: 100% (559/559), done.
Collecting py-params>=0.9.6
  Downloading py-params-0.10.2.tar.gz (7.4 kB)
Collecting params-flow>=0.8.0
  Downloading params-flow-0.8.2.tar.gz (22 kB)
Building wheels for collected packages: py-params, params-flow
  Building wheel for py-params (setup.py) ... [?25ldone
[?25h  Created wheel for py-params: filename=py_params-0.10.2-py3-none-any.whl size=7911 sha256=31f0fa6a9526bcb46f30ba5b931f4c3fb5e0b599d114c618b605524b9debe8c3
  Stored in directory: /Users/kejinglin/Library/Caches/pip/wheels/ac/26/e9/df16869ccbd4abf517f1ff3be9a2c7ee5c5980fc87eea04fb1
  Building wheel for params-flow (setup.py) ... [?25ldone
[?25h  Created wheel for

In [2]:
df_train = pd.read_csv("/Users/kejinglin/Desktop/personal/apple-twitter-sentiment-texts.csv")
df_train.head()

data = df_train['text'].values
labels = df_train['sentiment'].values+1 # if there is -1 in labels, loss could be nan

x_train_text, x_valid_text, y_train, y_valid = train_test_split(data, labels, test_size=0.10, shuffle= True)

In [6]:
SEQ_LEN = 128
CLASS = 3
MODEL_PATH = '/Users/kejinglin/Desktop/personal/uncased_L-12_H-768_A-12/'

In [7]:
tokenizer = FullTokenizer(MODEL_PATH + 'vocab.txt', do_lower_case=False)

train_tokens = []
for row in x_train_text:
    train_tokens.append( ["[CLS]"] + tokenizer.tokenize(str(row)) + ["[SEP]"] )

train_token_ids = list(map(tokenizer.convert_tokens_to_ids, train_tokens))
train_token_ids = map(lambda tids: tids + [0] * (SEQ_LEN - len(tids)), train_token_ids)
train_token_ids = np.array([np.array(xi) for xi in list(train_token_ids)])

valid_tokens = []
for row in x_valid_text:
    valid_tokens.append( ["[CLS]"] + tokenizer.tokenize(str(row)) + ["[SEP]"] )

valid_token_ids = list(map(tokenizer.convert_tokens_to_ids, valid_tokens))
valid_token_ids = map(lambda tids: tids + [0] * (SEQ_LEN - len(tids)), valid_token_ids)
valid_token_ids = np.array([np.array(xi) for xi in list(valid_token_ids)])

x_train = train_token_ids
x_valid = valid_token_ids

In [8]:
bert_params = params_from_pretrained_ckpt(MODEL_PATH)
bert_layer = BertModelLayer.from_params(bert_params, name="bert")
bert_layer.apply_adapter_freeze()

def create_model(max_seq_length, classes):
    inputs = Input(shape=(max_seq_length,), dtype='int32', name='input_ids')
    bert = bert_layer(inputs)
    cls_out = Lambda(lambda seq: seq[:, 0, :])(bert)
    dr_1 = Dropout(0.3)(cls_out)
    fc_1 = Dense(64, activation=tf.nn.relu)(dr_1)
    dr_2 = Dropout(0.3)(fc_1)
    outputs = Dense(classes, activation='softmax')(dr_2)
    
    model = Model(inputs, outputs)
    
    return model

model = create_model(SEQ_LEN, CLASS)
model.build(input_shape=(None, SEQ_LEN))

load_stock_weights(bert_layer, MODEL_PATH+"bert_model.ckpt")

def flatten_layers(root_layer):
    if isinstance(root_layer, keras.layers.Layer):
        yield root_layer
    for layer in root_layer._layers:
        for sub_layer in flatten_layers(layer):
            yield sub_layer

for layer in flatten_layers(bert_layer):
        if layer.name in ["LayerNorm", "adapter-down", "adapter-up"]:
            layer.trainable = True
        else:
            layer.trainable = False

bert_layer.embeddings_layer.trainable = False

model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.00001), metrics=['accuracy'])

print(model.summary())

Done loading 196 BERT weights from: /Users/kejinglin/Desktop/personal/uncased_L-12_H-768_A-12/bert_model.ckpt into <bert.model.BertModelLayer object at 0x7fbe479b8070> (prefix:bert). Count of weights not found in the checkpoint was: [0]. Count of weights with mismatched shape: [0]
Unused weights from checkpoint: 
	bert/embeddings/token_type_embeddings
	bert/pooler/dense/bias
	bert/pooler/dense/kernel
	cls/predictions/output_bias
	cls/predictions/transform/LayerNorm/beta
	cls/predictions/transform/LayerNorm/gamma
	cls/predictions/transform/dense/bias
	cls/predictions/transform/dense/kernel
	cls/seq_relationship/output_bias
	cls/seq_relationship/output_weights
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_ids (InputLayer)       [(None, 128)]             0         
_________________________________________________________________
bert (BertModelLayer)        (None, 128, 768)         

In [9]:
checkpointName = "bert_fine-tuning.ckpt"

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpointName,
                                                  save_weights_only=True,
                                                  verbose=1)

history = model.fit(x_train, y_train, 
                    epochs=4, batch_size=16,
                    validation_data=(x_valid, y_valid),
                    verbose=1, callbacks=[cp_callback]
)

Epoch 1/4

Epoch 00001: saving model to bert_fine-tuning.ckpt
Epoch 2/4

Epoch 00002: saving model to bert_fine-tuning.ckpt
Epoch 3/4

Epoch 00003: saving model to bert_fine-tuning.ckpt
Epoch 4/4

Epoch 00004: saving model to bert_fine-tuning.ckpt


In [None]:
model.predict(x_test)