A simple text correction example for ELECTRA using Google Colab.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
%tensorflow_version 2.x

import os
import warnings
import tensorflow as tf
import tensorflow.keras as keras

os.chdir('./drive/My Drive/Python/Research/bert')
warnings.filterwarnings('ignore')

import mymodels as mm

In [2]:
MODEL = 'electra'
DIM = 768
MAXLEN = 128
VOCAB = 'models/electra_base_ch/vocab.txt'
CONFIG = 'models/electra_base_ch/electra_config.json'
CKPT = 'models/electra_base_ch/electra_base'

In [3]:
class MyModel(keras.layers.Layer):
  def __init__(self, model, config, dim, ckpt):
    super(MyModel, self).__init__()
    self.ckpt = ckpt
    self.bert = mm.BERT(config, model)
    self.name1 = 'discriminator_predictions/dense'
    self.name2 = 'discriminator_predictions/dense_1'
    self.dense1 = keras.layers.Dense(dim, activation=mm.gelu_activating, name=self.name1)
    self.dense2 = keras.layers.Dense(1, activation='sigmoid', name=self.name2)

  def loading(self):
    self.bert.loading(self.ckpt)
    _ = self.propagating(tf.ones((2, 2)), tf.zeros((2, 2)), tf.zeros((2, 2)), False)
    tens1 = self.weights[-4:]
    name1 = [i1.name[:-2] for i1 in tens1]
    valu1 = [tf.train.load_variable(self.ckpt, i1) for i1 in name1]
    keras.backend.batch_set_value(zip(tens1, valu1))

  def propagating(self, text, segment, mask, training=False):
    x1 = self.bert.propagating(text, segment, mask, False, training)
    return self.dense2(self.dense1(x1))


tokenizer_1 = mm.Tokenizer()
tokenizer_1.loading(VOCAB)
model_1 = MyModel(MODEL, CONFIG, DIM, CKPT)
model_1.loading()

In [4]:
sentence_1 = '今天天气真差，阳光明妹，风和日立，天朗气青，适合出游。'
text_1, seg_1, mask_1 = tokenizer_1.encoding(sentence_1, None, MAXLEN)
p_1 = model_1.propagating(tf.constant([text_1]), tf.constant([seg_1]), tf.constant([mask_1]))
p_1 = tf.squeeze(p_1)

for i in range(len(sentence_1)):
  prob_1 = p_1[i+1].numpy()
  print(sentence_1[i], '[REPLACED!!!]' if prob_1 > 0.5 else '[ORIGINAL]', prob_1)

今 [ORIGINAL] 0.0034534659
天 [ORIGINAL] 0.002484034
天 [ORIGINAL] 0.000903035
气 [ORIGINAL] 0.0005843059
真 [ORIGINAL] 0.02739149
差 [REPLACED!!!] 0.7542808
， [ORIGINAL] 0.005225526
阳 [ORIGINAL] 0.0033263383
光 [ORIGINAL] 0.006835327
明 [ORIGINAL] 0.28822166
妹 [REPLACED!!!] 0.8502968
， [ORIGINAL] 0.0009310552
风 [ORIGINAL] 0.00020707023
和 [ORIGINAL] 0.00063004077
日 [ORIGINAL] 0.0022070203
立 [REPLACED!!!] 0.664864
， [ORIGINAL] 0.0008412881
天 [ORIGINAL] 0.0006401022
朗 [ORIGINAL] 0.0011243327
气 [ORIGINAL] 0.004930166
青 [REPLACED!!!] 0.7770749
， [ORIGINAL] 0.0021017483
适 [ORIGINAL] 7.511114e-05
合 [ORIGINAL] 0.007640966
出 [ORIGINAL] 0.006268836
游 [ORIGINAL] 0.0064419443
。 [ORIGINAL] 0.015129656
