In [1]:
import numpy as np
import tensorflow as tf

In [2]:
np.__version__, tf.__version__

('1.18.5', '2.3.0')

参考  
https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/keras/layers/BatchNormalization

<img src="./images/bn_eq.png" alt="drawing" width="750"/>

#### 最简形式

```
m = 3, x feature is scaler value
```

In [3]:
xs = np.array([[1.],
               [2.],
               [3.]], dtype=np.float)
m, _ = xs.shape

xs, m

(array([[1.],
        [2.],
        [3.]]),
 3)

In [4]:
miu_b = np.zeros_like(xs[0])
for i in range(m):
    miu_b += xs[i]
miu_b /= m
miu_b

array([2.])

In [5]:
sig_b2 = np.zeros_like(xs[0])
for i in range(m):
    sig_b2 += (xs[i] - miu_b) ** 2
sig_b2 /= m
sig_b2

array([0.66666667])

In [6]:
threshold = 1e-8
epsilon = np.where(sig_b2 < threshold, threshold, 0)
epsilon

array([0.])

In [7]:
xshat = (xs - miu_b) / (sig_b2 + epsilon) ** 0.5
xshat

array([[-1.22474487],
       [ 0.        ],
       [ 1.22474487]])

#### 封装为函数
```
满足多个特征
```

In [8]:
def normal_equation(Xs, threshold=1e-8):
    """
    对数据进行batch 归一化
    batch在0轴
    """
    
    m = Xs.shape[0]
    
    miu_b = np.zeros_like(Xs[0])
    for i in range(m):
        miu_b += Xs[i]
    miu_b /= m
    
    sig_b2 = np.zeros_like(Xs[0])
    for i in range(m):
        sig_b2 += (Xs[i] - miu_b) ** 2
    sig_b2 /= m

    epsilon = np.where(sig_b2 < threshold, threshold, 0)
    Xshat = (Xs - miu_b) / (sig_b2 + epsilon) ** 0.5
    return Xshat

In [9]:
Xs = np.array([[1.0, 10.0, 100.0, 1000.0],
               [2.0, 20.0, 200.0, 2000.0],
               [3.0, 30.0, 300.0, 3000.0]])
normal_equation(Xs)

array([[-1.22474487, -1.22474487, -1.22474487, -1.22474487],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.22474487,  1.22474487,  1.22474487,  1.22474487]])

In [10]:
Xs = np.array([
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]],
    
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]]
])
Xs.shape, normal_equation(Xs)

((2, 3, 4),
 array([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],
 
        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]))

In [11]:
Xs = np.array([
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]]])
Xs.shape, normal_equation(Xs)

((1, 3, 4),
 array([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]))

#### 使用模块

```
tf.keras.layers.BatchNormalization(
    axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,
    beta_initializer='zeros', gamma_initializer='ones',
    moving_mean_initializer='zeros', moving_variance_initializer='ones',
    beta_regularizer=None, gamma_regularizer=None, beta_constraint=None,
    gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99,
    fused=None, trainable=True, virtual_batch_size=None, adjustment=None, name=None,
    **kwargs
)
```

In [12]:
model = tf.keras.Sequential([
    tf.keras.layers.BatchNormalization(epsilon=0, scale=False)
])
xs = np.array([[1.],
               [2.],
               [3.]], dtype=np.float)
tf_xs = tf.convert_to_tensor(xs)
model(tf_xs, training=True)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[-1.2247448],
       [ 0.       ],
       [ 1.2247448]], dtype=float32)>

In [13]:
model = tf.keras.Sequential([
    tf.keras.layers.BatchNormalization(epsilon=0, scale=False)
])
Xs = np.array([[1.0, 10.0, 100.0, 1000.0],
               [2.0, 20.0, 200.0, 2000.0],
               [3.0, 30.0, 300.0, 3000.0]])
tf_xs = tf.convert_to_tensor(Xs)
model(tf_xs, training=True)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[-1.2247449, -1.2247448, -1.2247449, -1.2247448],
       [ 0.       ,  0.       ,  0.       ,  0.       ],
       [ 1.224745 ,  1.224745 ,  1.2247448,  1.2247448]], dtype=float32)>

In [14]:
model = tf.keras.Sequential([
    tf.keras.layers.BatchNormalization(axis=(1, 2), scale=False)
])
Xs = np.array([
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]],
    
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]]
])
print(Xs.shape)
tf_xs = tf.convert_to_tensor(Xs)
model(tf_xs, training=True)

(2, 3, 4)


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]], dtype=float32)>

#### 区别

In [15]:
model = tf.keras.Sequential([
    tf.keras.layers.BatchNormalization(axis=-1, scale=False)
])
Xs = np.array([
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]],
    
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]]
])
print(Xs.shape)
tf_xs = tf.convert_to_tensor(Xs)
model(tf_xs, training=True)

(2, 3, 4)


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[-1.2238274, -1.2247357, -1.2247448, -1.2247448],
        [ 0.       ,  0.       ,  0.       ,  0.       ],
        [ 1.2238274,  1.2247355,  1.2247448,  1.2247448]],

       [[-1.2238274, -1.2247357, -1.2247448, -1.2247448],
        [ 0.       ,  0.       ,  0.       ,  0.       ],
        [ 1.2238274,  1.2247355,  1.2247448,  1.2247448]]], dtype=float32)>

In [16]:
Xs = np.array([
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]],
    
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]]
])

Xs2 = Xs.reshape([-1, 4])
normal_equation(Xs2).reshape(Xs.shape)

array([[[-1.22474487, -1.22474487, -1.22474487, -1.22474487],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 1.22474487,  1.22474487,  1.22474487,  1.22474487]],

       [[-1.22474487, -1.22474487, -1.22474487, -1.22474487],
        [ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 1.22474487,  1.22474487,  1.22474487,  1.22474487]]])

In [17]:
Xs2

array([[1.e+00, 1.e+01, 1.e+02, 1.e+03],
       [2.e+00, 2.e+01, 2.e+02, 2.e+03],
       [3.e+00, 3.e+01, 3.e+02, 3.e+03],
       [1.e+00, 1.e+01, 1.e+02, 1.e+03],
       [2.e+00, 2.e+01, 2.e+02, 2.e+03],
       [3.e+00, 3.e+01, 3.e+02, 3.e+03]])

#### 结论

```
tf.keras.layers.BatchNormalization 指定的属性axis为特征轴，其他都会转化为batch数据的矩阵进行计算

```

In [18]:
model = tf.keras.Sequential([
    tf.keras.layers.LayerNormalization(axis=(1, 2), scale=False)
])
Xs = np.array([
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]],
    
    [[1.0, 10.0, 100.0, 1000.0],
     [2.0, 20.0, 200.0, 2000.0],
     [3.0, 30.0, 300.0, 3000.0]]
])
print(Xs.shape)
tf_xs = tf.convert_to_tensor(Xs)
model(tf_xs, training=True)

(2, 3, 4)


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[-0.5945305 , -0.58488077, -0.48838347,  0.4765894 ],
        [-0.5934583 , -0.57415885, -0.38116428,  1.5487815 ],
        [-0.5923861 , -0.5634369 , -0.27394506,  2.6209736 ]],

       [[-0.5945305 , -0.58488077, -0.48838347,  0.4765894 ],
        [-0.5934583 , -0.57415885, -0.38116428,  1.5487815 ],
        [-0.5923861 , -0.5634369 , -0.27394506,  2.6209736 ]]],
      dtype=float32)>

In [19]:
tmparr = np.array([[1.0, 10.0, 100.0, 1000.0],
                   [2.0, 20.0, 200.0, 2000.0],
                   [3.0, 30.0, 300.0, 3000.0]])

miu = tmparr.mean()
sig = np.mean((tmparr - miu) ** 2)
(tmparr - miu) / sig ** 0.5

array([[-0.59453051, -0.58488078, -0.48838349,  0.47658938],
       [-0.59345832, -0.57415886, -0.38116428,  1.54878146],
       [-0.59238612, -0.56343694, -0.27394508,  2.62097354]])