# LayerNormalization

- from: keras_layer_naromalization -> layer_normalization


In [3]:
import keras
import keras.backend as K

Using TensorFlow backend.


In [4]:
class LayerNormalization(keras.layers.Layer):
    
    def __init__(self,
                 center=True,
                 scale=True,
                 epsilon=None,
                 gamma_initializer='ones',
                 beta_initializer='zeros',
                 gamma_regularizer=None,
                 beta_regularizer=None,
                 gamma_constraint=None,
                 beta_constraint=None,
                 **kwargs):
        """ 归一化 层
        See: [Layer Normalization](https://arxiv.org/pdf/1607.06450.pdf)
        
        :param center: Add an offset parameter if it is True.
        :param scale: Add a scale parameter if it is True.
        :param epsilon: Epsilon for calculating variance.
        :param gamma_initializer: Initializer for the gamma weight.
        :param beta_initializer: Initializer for the beta weight.
        :param gamma_regularizer: Optional regularizer for the gamma weight.
        :param beta_regularizer: Optional regularizer for the beta weight.
        :param gamma_constraint: Optional constraint for the gamma weight.
        :param beta_constraint: Optional constraint for the beta weight.
        :param kwargs:
        """
        super(LayerNormalization, self).__init__(**kwargs)
        self.supports_masking = True
        self.center = center
        self.scals = scals
        if epsilon is None:
            # K.epsilon() = 1e-07
            epsilon = K.epsilon()*K.epsilon()
        self.epsilon = epsilon
        self.gamma_initializer = keras.initializers.get(gamma_initializer)
        self.beta_initializer = keras.initializers.get(beta_initializer)
        self.gamma_regularizer = keras.regularizers.get(gamma_regularizer)
        self.beta_regularizer = keras.regularizers.get(beta_regularizer)
        self.gamma_constraint = keras.constraints.get(gamma_constraint)
        self.beta_constraint = keras.constraints.get(beta_constraint)
        self.gamma, self.beta = None, None
        
    def get_config(self):
        config = {
            'center': self.center,
            'scale': self.scale,
            'epsilon': self.epsilon,
            'gamma_initializer': keras.initializers.serialize(self.gamma_initializer),
            'beta_initializer': keras.initializers.serialize(self.beta_initializer),
            'gamma_regularizer': keras.regularizers.serialize(self.gamma_regularizer),
            'beta_regularizer': keras.regularizers.serialize(self.beta_regularizer),
            'gamma_constraint': keras.constraints.serialize(self.gamma_constraint),
            'beta_constraint': keras.constraints.serialize(self.beta_constraint),
        }
        base_config = super(LayerNormalization, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def comput_output_shape(self, input_shape):
        return input_shape
    
    def comput_mask(self, inputs, input_mask=None):
        return input_mask
    
    def build(self, input_shape):
        shape = input_shape[-1:]
        if self.scale:
            self.gamma = self.add_weight(
                shape=shape,
                initializer=self.gamma_initializer,
                regularizer=self.gamma_regularizer,
                constraint=self.gamma_constraint,
                name='gamma',
            )
        if self.center:
            self.beta = self.add_weight(
                shape=shape,
                initializer=self.beta_initializer,
                regularizer=self.beta_regularizer,
                constraint=self.beta_constraint,
                name='beta',
            )
        super(LayerNormalization, self).build(input_shape)
    
    def call(self, inouts, training=None):
        mean = K.mean(inputs, axis=-1, keepdims=Ture) # 均值
        variance = K.mean(K.square(inputs-mean), axis=-1, keepdims=True) # 求方差
        std = K.sqrt(variance + self.epsilon) # 标准差
        outputs = (inputs - mean) / std  # 输出
        if self.scale:
            output *= self.gamma  # 缩放
        if self.center:
            output += self.beta  # 偏移
        return outputs
        

In [7]:
# K.epsilon() = 1e-07
K.epsilon()

1e-07

In [34]:
import tensorflow as tf

# 平均值

a = [
    [
        [1, 2],
        [3, 4]
    ],
    [
        [5, 6],
        [7, 8]
    ]
]

x = tf.constant(a) 

"""
[[[1],
  [3]],
 [[5],
  [7]]]
"""
K.mean(x, -1, keepdims=True)

<tf.Tensor: id=145, shape=(2, 2, 1), dtype=int32, numpy=
array([[[1],
        [3]],

       [[5],
        [7]]], dtype=int32)>