In [1]:
# -*-conding: utf8 -*-
# !/usr/bin/python
# Author: Selvaria

# 批量归一化层

import tensorflow as tf
import numpy as np
print(tf.__version__)

2.1.0


In [4]:
# 5.10.1.1. 对全连接层做批量归一化

import numpy as np

#这里要传入的是numpy格式的矩阵而非tf格式的tensor
def batch_norm(is_training,X, gamma, beta, moving_mean, moving_var, eps, momentum):
    # 判断是当前模式是训练模式还是预测模式
    if not is_training:
        # 如果是在预测模式下，直接使用传入的移动平均所得的均值和方差
        X_hat = (X - moving_mean) / np.sqrt(moving_var + eps)
    else:
        assert len(X.shape) in (2, 4)
        if len(X.shape) == 2:
            # 使用全连接层的情况，计算特征维上的均值和方差
            mean = X.mean(axis=0)
            var = ((X - mean) ** 2).mean(axis=0)
        else:
            # 使用二维卷积层的情况，计算通道维上（axis=1）的均值和方差。这里我们需要保持X的形状以便后面可以做广播运算
            # 但是tf是把通道数放在最后一个轴上的
            mean = X.mean(axis=(0, 2, 3), keepdims=True)
            var = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)
        # 训练模式下用当前的均值和方差做标准化
        X_hat = (X - mean) / np.sqrt(var + eps)
        # 更新移动平均的均值和方差
        moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
        moving_var = momentum * moving_var + (1.0 - momentum) * var
    Y = gamma * X_hat + beta  # 拉伸和偏移
    return Y, moving_mean, moving_var

In [11]:
#

# X = tf.random.uniform((2,4)) 
X = np.random.uniform(0,5,size=(2,4,2)) 
print(X)
print(X.shape)
print(X.mean(axis=0))
print(X.mean(axis=(0, 1, 2)))

[[[2.65484187 1.24216043]
  [4.13411042 3.52088189]
  [1.92389752 4.99148122]
  [0.47979447 3.7273825 ]]

 [[0.43150668 2.68431344]
  [0.3172725  1.29193138]
  [0.25944173 3.9302805 ]
  [0.46150003 4.55554984]]]
(2, 4, 2)
[[1.54317427 1.96323694]
 [2.22569146 2.40640663]
 [1.09166962 4.46088086]
 [0.47064725 4.14146617]]
2.2878966511115326


In [15]:
# tf实现

net = tf.keras.models.Sequential()
net.add(tf.keras.layers.Conv2D(filters=6,kernel_size=5, input_shape=(28, 28, 1)))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Conv2D(filters=16,kernel_size=5))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.Dense(84))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.Dense(10,activation='sigmoid'))

net.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 24, 24, 6)         156       
_________________________________________________________________
batch_normalization_12 (Batc (None, 24, 24, 6)         24        
_________________________________________________________________
activation_12 (Activation)   (None, 24, 24, 6)         0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 12, 12, 6)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 8, 8, 16)          2416      
_________________________________________________________________
batch_normalization_13 (Batc (None, 8, 8, 16)          64        
_________________________________________________________________
activation_13 (Activation)   (None, 8, 8, 16)         

In [22]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape((60000, 28, 28, 1)).astype('float32') / 255
x_test = x_test.reshape((10000, 28, 28, 1)).astype('float32') / 255

net.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(),
              metrics=['accuracy'])
history = net.fit(x_train, y_train,
                    batch_size=64,
                    epochs=5,
                    validation_split=0.2)
test_scores = net.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

Train on 48000 samples, validate on 12000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
10000/10000 - 0s - loss: 0.1163 - accuracy: 0.9711
Test loss: 0.1162910664813593
Test accuracy: 0.9711


In [17]:
# tf实现

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(filters=6,kernel_size=5, input_shape=(28, 28, 1), activation='sigmoid'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
model.add(tf.keras.layers.Conv2D(filters=16,kernel_size=5, activation='sigmoid'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(120, activation='sigmoid'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(84, activation='sigmoid'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(10,activation='sigmoid'))

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 24, 24, 6)         156       
_________________________________________________________________
batch_normalization_16 (Batc (None, 24, 24, 6)         24        
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 12, 12, 6)         0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 8, 8, 16)          2416      
_________________________________________________________________
batch_normalization_17 (Batc (None, 8, 8, 16)          64        
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 4, 4, 16)          0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 256)              

In [23]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(),
              metrics=['accuracy'])
history = model.fit(x_train, y_train,
                    batch_size=64,
                    epochs=5,
                    validation_split=0.2)
test_scores = model.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

Train on 48000 samples, validate on 12000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
10000/10000 - 0s - loss: 0.0489 - accuracy: 0.9866
Test loss: 0.04888061691381736
Test accuracy: 0.9866
