A text similarity classification example for ELECTRA.  
The data is AFQMC from https://github.com/CLUEbenchmark/CLUE.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
%tensorflow_version 2.x

import os
import warnings
import time
import json
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras

os.chdir('./drive/My Drive/Python/Research/bert')
warnings.filterwarnings('ignore')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

import mymodels as mm

In [2]:
MODEL = 'electra'
MODE = 'cls'
MAXLEN = 128
CATE = 2
DROP = 0.5
LRATE = 2e-5
BATCH = 16
EPOCH = 3
VOCAB = 'models/electra_small_ch/vocab.txt'
CONFIG = 'models/electra_small_ch/electra_config.json'
CKPT = 'models/electra_small_ch/electra_small'

In [3]:
def file_loading(file):
  reader1 = open(file, 'r', encoding='utf-8').readlines()
  return [json.loads(i1.strip()) for i1 in reader1]


def data_processing(data, tokenizer, maxlen, batch, training, key):
  text1, seg1, mask1, label1 = [], [], [], []
  
  for i1 in data:
    text2, seg2, mask2 = tokenizer.encoding(i1['sentence1'], i1['sentence2'], maxlen)
    text1.append(text2)
    seg1.append(seg2)
    mask1.append(mask2)
    label1.append(int(i1[key]))

  text1, seg1, mask1, label1 = np.array(text1), np.array(seg1), np.array(mask1), np.array(label1)
  data1 = tf.data.Dataset.from_tensor_slices((text1, seg1, mask1, label1))
  return data1.shuffle(len(text1)).batch(batch) if training else data1.batch(batch)


tokenizer_1 = mm.Tokenizer()
tokenizer_1.loading(VOCAB)
training_1 = file_loading('tasks/datasets/afqmc/train.json')
dev_1 = file_loading('tasks/datasets/afqmc/dev.json')
training_2 = data_processing(training_1, tokenizer_1, MAXLEN, BATCH, True, 'label')
dev_2 = data_processing(dev_1, tokenizer_1, MAXLEN, BATCH, False, 'label')
print(training_1[0])

{'sentence1': '蚂蚁借呗等额还款可以换成先息后本吗', 'sentence2': '借呗有先息到期还本吗', 'label': '0'}


In [4]:
class MyModel(keras.Model):
  def __init__(self, model, mode, config, drop, category):
    super(MyModel, self).__init__()
    self.bert = mm.BERT(config, model, mode)
    self.drop = keras.layers.Dropout(drop)
    self.dense = keras.layers.Dense(category, activation='softmax')

  def propagating(self, text, segment, mask, training):
    x1 = self.bert.propagating(text, segment, mask, training)
    return self.dense(self.drop(x1, training=training))


model_1 = MyModel(MODEL, MODE, CONFIG, DROP, CATE)
model_1.bert.loading(CKPT)
function_1 = keras.losses.SparseCategoricalCrossentropy()
optimizer_1 = mm.AdamWV2(EPOCH*(int(len(training_1)/BATCH)+1), LRATE)

loss_1 = tf.keras.metrics.Mean(name='training_loss')
acc_1 = tf.keras.metrics.SparseCategoricalAccuracy(name='training_accuracy')
acc_2 = tf.keras.metrics.SparseCategoricalAccuracy(name='dev_accuracy')

In [5]:
@tf.function
def step_training(text, segment, mask, y):
  with tf.GradientTape() as tape_1:
    pred_1 = model_1.propagating(text, segment, mask, True)
    value_1 = function_1(y, pred_1)

  grad_1 = tape_1.gradient(value_1, model_1.trainable_variables)
  grad_1, _ = tf.clip_by_global_norm(grad_1, 1.0)
  optimizer_1.apply_gradients(zip(grad_1, model_1.trainable_variables))
  loss_1(value_1)
  acc_1(y, pred_1)


@tf.function
def step_evaluating(text, segment, mask, y):
  pred_1 = model_1.propagating(text, segment, mask, False)
  acc_2(y, pred_1)


temp_1 = 'Training loss is {:.4f}, accuracy is {:.4f}.'
temp_2 = 'Dev accuracy is {:.4f}, and epoch cost is {:.4f}.'
count_1 = 0

for e_1 in range(EPOCH):
  print('Epoch {} running.'.format(e_1+1))
  time_0 = time.time()

  for x_1, x_2, x_3, y_1 in training_2:
    time_1, count_1 = time.time(), count_1+1
    step_training(x_1, x_2, x_3, y_1)

    if count_1 % 500 == 0:
      print(temp_1.format(float(loss_1.result()), float(acc_1.result())))

  for x_1, x_2, x_3, y_1 in dev_2:
    step_evaluating(x_1, x_2, x_3, y_1)

  print(temp_2.format(float(acc_2.result()), time.time()-time_0))
  print('**********')
  acc_1.reset_states()
  acc_2.reset_states()

Epoch 1 running.
Training loss is 0.8222, accuracy is 0.5970.
Training loss is 0.7478, accuracy is 0.6211.
Training loss is 0.7083, accuracy is 0.6389.
Training loss is 0.6856, accuracy is 0.6458.
Dev accuracy is 0.6872, and epoch cost is 171.9196.
**********
Epoch 2 running.
Training loss is 0.6642, accuracy is 0.6880.
Training loss is 0.6496, accuracy is 0.6844.
Training loss is 0.6377, accuracy is 0.6869.
Training loss is 0.6276, accuracy is 0.6879.
Dev accuracy is 0.7004, and epoch cost is 153.7921.
**********
Epoch 3 running.
Training loss is 0.6206, accuracy is 0.6971.
Training loss is 0.6129, accuracy is 0.6982.
Training loss is 0.6056, accuracy is 0.7043.
Training loss is 0.5994, accuracy is 0.7065.
Dev accuracy is 0.7027, and epoch cost is 154.0852.
**********
