# BERT embedding 层抄写 (未验证)

目的

- 理解keras层的编写
- 理解bert的构成
- 培养能更改、编辑 层的能力

In [10]:
import keras
import keras.backend as K
from keras_pos_embd import PositionEmbedding

In [11]:
class TokenEmbedding(keras.layers.Embedding):
    """ Embedding 同时返回 weights参数 """
    
    def compute_output_shape(self, input_shape):
        """ 由于在返回Embedding的同时还需要返回参数weights 所以在返回原来shahe的同时还需要返回 weight的shape """
        return [super(TokenEmbedding, self).compute_output_shape(input_shape), (self.input_dim, self.output_dim)]
    
    def compute_mask(self, inputs, mask=None):
        return [super(TokenEmbedding, self).compute_mask(inputs, mask), None]
    
    def call(self, inputs):
        # 返回 embedding 的同时返回weights
        # K.identity 返回与输入张量相同的张量
        return [super(TokenEmbedding, self).call(inputs), K.identity(self.embeddings)]

In [14]:
def get_embedding(inputs, token_num, pos_num, embed_dim, dropout_rate=0.1, trainable=True):
    """ 获取embedding层
    See: https://arxiv.org/pdf/1810.04805.pdf
    
    :param inputs：输入层(bert网络会有3个输入 [token_input, segment_input, masked_input] )
    :param token_num: token的数量(一般vocab size)
    :param pos_num: 位置数量(句子长度， 默认为512)
    :param embed_dim: 编码dim维度 （每一个词的向量长度，默认为512）
    :param dropout_rate: dropout比例
    :param trainable: 是否为可训练
    :return: embedding后的层 和 weight参数
    """
    
    # embeddings = [[token_embedding, token_weights], segment_embedding]
    embeddings = [
        TokenEmbedding(
            input_dim=token_dim,
            output_dim=embed_dim,
            mask_zero=True,
            trainable=trainable,
            name="Embedding-Token",
        )(inputs[0]),
        keras.layers.Embedding(
            input_dim=2,
            output_dim=embed_dim,
            trainable=trainable,
            name="Embedding-Segment",
        )(inputs[1])
    ]
    
    # embeddings[0], embed_weights = [token_embedding, token_weights]
    embeddings[0], embed_weights = embeddings[0]
    # 两层相加 toekn_embedding、segment_embedding
    embed_layer = keras.layers.Add(name="Embedding-Token-Segment")(embeddings)
    # 进行 PositionEmbedding (位置编码)
    embed_layer = PostionEmbedding(
        input_dim=pos_num,
        output_dim=embed_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        name='Embedding-Position',
    )(embed_layer)
    return embed_layer, embed_weights

In [15]:
class EmbeddingSimilarity(keras.layers.Layer):
    """ 计算特征和toekn embeddings的相似度 """
    
    def __init__(self,
                 initializer='zeros',
                 regularizer=None,
                 constraint=None,
                 **kwargs):
        """ 初始化 layer
        
        :param output_dim: Same as embedding output dimension.
        :param initializer: Initializer for bias.
        :param regularizer: Regularizer for bias.
        :param constraint: Constraint for bias.
        :param kwargs: Arguments for parent class.
        """
        super(EmbeddingSimilarity, self).__init__(**kwargs)
        self.supports_masking = True
        self.initializer = keras.initializers.get(initializer)
        self.regularizer = keras.regularizers.get(regularizer)
        self.constraint = keras.constraints.get(constraint)
        self.bias = None
        
    def get_config(self):
        config = {
            'initializer': keras.initializers.serialize(self.initializer),
            'regularizer': keras.regularizers.serialize(self.regularizer),
            'constraint': keras.constraints.serialize(self.constraint),
        }
        base_config = super(EmbeddingSimilarity, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def build(self, input_shape):
        """ Bert网络这个层为两个输入 """
        self.bias = self.add_weight(
            shape=(int(input_shape[1][0])),
            initializer=self.initializer,
            regularizer=self.regularizer,
            constraint=self.constraint,
            name='bias',
        )
        super(EmbeddingSimilarity, self).build(input_shape)
        
    def compute_output_shape(self, input_shape):
        """  Bert网络输入一把为 (Lr(None, 512, 768 ), weight(30000, 768))
        所以输出为: (None, 512, 30000)
        """
        return input_shape[0][:2] + (input_shape[1][0])
    
    def comput_mask(self, inputs, mask=None):
        return mask[0]
    
    def call(self, inputs, mask=None, **kwargs):
        # Bert网络输入一把为 (LN(None, 512, 768 ), weight(30000, 768))
        # inputs, embeddings = (LN(None, 512, 768 ), weight(30000, 768))
        inputs, embeddings = inputs
        # outputs = LN * weight + b  ======== (None, 512, 768)*(768, 30000) -> (None, 512, 30000)
        outputs = K.bias_add(K.dot(inputs, K.transpose(embeddings)), self.bias)
        return keras.activations.softmax(outputs)

# Embedding

- 来自: keras_embed_sim -> embeddings
- keras_embed_sim是一个库

In [2]:
import keras
import keras.backend as K

Using TensorFlow backend.


In [3]:
class EmbeddingRet(keras.layers.Embedding):
    def compute_output_shape(self, input_shape):
        return [
            super(EmbeddingRet, slef).compute_output_shape(input_shape),
            (self.input_dim, self.output_dim),
        ]
    
    def compute_mask(self, inputs, mask=None):
        return [
            super(EmbeddingRet, self).compute_mask(inputs, mask),
            None,
        ]
    
    def call(self, inputs):
        return [
            super(EmbeddingRet, self).call(inputs),
            K.identity(self.embeddings),
        ]

In [6]:
class EmbeddingSim(keras.layers.Layer):
    def __init__(self,
                 use_bias=True,
                 initializer='zeros',
                 regularizer=None,
                 constraint=None,
                 stop_gradient=False,
                 **kwargs
                ):
        """Initialize the layer.

        :param output_dim: Same as embedding output dimension.
        :param use_bias: Whether to use bias term.
        :param initializer: Initializer for bias.
        :param regularizer: Regularizer for bias.
        :param constraint: Constraint for bias.
        :param stop_gradient: Whether to stop gradient for input embedding.
        :param kwargs: Arguments for parent class.
        """
        super(EmbeddingSim, self).__init__(**kwargs)
        self.supports_masking = True
        self.use_bias = use_bias
        self.initializer = keras.initializers.get(initializer)
        self.regularizer = keras.regularizers.get(regularizer)
        self.constraint = keras.constraints.get(constraint)
        self.stop_gradient = stop_gradient
        self.bias = None
        
    def get_config(self):
        config = {
            'use_bias': self.use_bias,
            'initializer': keras.initializers.serialize(self.initializer),
            'regularizer': keras.regularizers.serialize(self.regularizer),
            'constraint': keras.constraints.serialize(self.constraint),
            'stop_gradient': self.stop_gradient,
        }
        base_config = super(EmbeddingSim, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def build(self, input_shape):
        if self.bias:
            embed_shape = input_shape[1]
            token_num = int(embed_shape[0])
            self.bais = self.add_weight( 
                shape=(token_num,),
                initializer=self.initializer,
                regularizer=self.regularizer,
                constraint=self.constraint,
                name='bias',
            )
        super(EmbeddingSim, self).build(input_shape)          
    
    def compute_output_shape(self, input_shape):
        feature_shape, embed_shape = input_shape
        token_num = embed_shape[0]
        return feature_shape[:-1] + (token_num,)
    
    def compute_mask(self, inputs, mask=None):
        if mask is None:
            return None
        return mask[0]
    
    def call(self, inputs, mask=None, **kwargs):
        inputs, embeddings = inputs
        if self.stop_gradient:
            embeddings = K.stop_gradient(embeddings)
        outputs = K.dot(inputs, K.transpose(embeddings))
        if self.use_bias:
            outputs = K.bias_add(outputs, self.bias)
        return keras.activations.softmax(output)

# keras Embedding
- 源码中的Embedding

- 略有修改(主要是import的原因)

In [1]:
import keras
import keras.backend as K

Using TensorFlow backend.


In [4]:
class Embedding(keras.layers.Layer):

    def __init__(self,
                 input_dim,
                 output_dim,
                 embeddings_initializer='uniform',
                 embeddings_regularizer=None,
                 activaity_regularizer=None,
                 embedding_constranint=None,
                 mask_zero=False,
                 input_length=None,
                 **kwargs
                ):
        if 'input_shape' not in kwargs:
            if input_length:
                kwargs['input_shape'] = (input_lenght,)
            else:
                kwargs['input_shape'] = (None,)
        super(Embedding, self).__init__(**kwargs)
        
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        self.embeddings_initializer = keras.initializers.get(embeddings_initializer)
        self.embeddings_regularizer = keras.regularizers.get(embeddings_regularizer)
        self.activaity_regularizer = keras.regularizers.get(activaity_regularizer)
        self.embeddings_constraint = keras.constraints.get(embeddings_constraint)
        self.mask_zero = mask_zero
        self.supports = mask_zero
        self.input_length = input_length
        
    def build(self, input_shape):
        self.embeddings = self.add_weight(
            shape=(self.input_dim, self.output_dim),
            initializer=self.embeddings_initializer,
            constraint=self.embeddings_regularizer,
            dtype=self.dtype
        )
        self.built = True
        
    def compute_mask(self, inputs, mask=None):
        if not self.mask_zero:
            return None
        output_mask = K.not_equal(inputs, 0)
        return output_mask
    
    def compute_output_shape(self, input_shape):
        # 这个方法没怎么弄懂的=======
        if self.input_length is None:
            return input_shape + (self.output_dim,)
        else:
            # input_length can be tuple if input is 3D or higher
            if isinstance(self.input_length, (list, tuple)):
                in_lens = list(self.input_length)
            else:
                in_lens = [self.input_length]
            if len(in_lens) != len(input_shape) - 1:
                ValueError('"input_length" is %s, but received input has shape %s' %
                           (str(self.input_length), str(input_shape)))
            else:
                for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])):
                    if s1 is not None and s2 is not None and s1 != s2:
                        ValueError('"input_length" is %s, but received input has shape %s' %
                                   (str(self.input_length), str(input_shape)))
                    elif s1 is None:
                        in_lens[i] = s2
            return (input_shape[0],) + tuple(in_lens) + (self.output_dim,)
    
    def call(self, inputs):
        if K.dtype(inputs) != 'int32':
            inputs = K.cast(inputs, 'int32')
        out = K.gather(self.embeddings, inputs)
        return out
    
    def get_config(self):
        config = {'input_dim': self.input_dim,
                  'output_dim': self.output_dim,
                  'embeddings_initializer': initializers.serialize(self.embeddings_initializer),
                  'embeddings_regularizer': regularizers.serialize(self.embeddings_regularizer),
                  'activity_regularizer': regularizers.serialize(self.activity_regularizer),
                  'embeddings_constraint': constraints.serialize(self.embeddings_constraint),
                  'mask_zero': self.mask_zero,
                  'input_length': self.input_length}
        base_config = super(Embedding, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
        