# Batch_Normal
Batch_Normal可以加快神经网络的收敛速度

In [7]:
import torch
from torch import nn

In [8]:
data = torch.arange(0,20,dtype=torch.float32).reshape(5,4)
data

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])

# 手动实现 Batch_Normal

In [9]:
y = (data-torch.mean(data,0,keepdim=True))/torch.sqrt(torch.var(data,0,keepdim=True,unbiased=False))  # batch_normal的标准差var是通过有偏估计计算的
y

tensor([[-1.4142, -1.4142, -1.4142, -1.4142],
        [-0.7071, -0.7071, -0.7071, -0.7071],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.7071,  0.7071,  0.7071,  0.7071],
        [ 1.4142,  1.4142,  1.4142,  1.4142]])

# torch验证

In [10]:
Batch_Norm = nn.BatchNorm1d(4,eps=0)

In [11]:
bn_result = Batch_Norm(data)  # 这里的输入数据类型只能为float32
bn_result

tensor([[-1.4142e+00, -1.4142e+00, -1.4142e+00, -1.4142e+00],
        [-7.0711e-01, -7.0711e-01, -7.0711e-01, -7.0711e-01],
        [ 0.0000e+00,  4.4703e-08, -2.9802e-08,  1.4901e-08],
        [ 7.0711e-01,  7.0711e-01,  7.0711e-01,  7.0711e-01],
        [ 1.4142e+00,  1.4142e+00,  1.4142e+00,  1.4142e+00]],
       grad_fn=<NativeBatchNormBackward0>)

In [12]:
mean = torch.mean(data,0,keepdim=True)
mean

tensor([[ 8.,  9., 10., 11.]])

In [13]:
var = torch.var(data,0,keepdim=True,unbiased=False)
var

tensor([[32., 32., 32., 32.]])

# Batch_Normal2d

In [14]:
data_1 = torch.arange(0,24,dtype=torch.float32).reshape(2,3,2,2)
data_1

tensor([[[[ 0.,  1.],
          [ 2.,  3.]],

         [[ 4.,  5.],
          [ 6.,  7.]],

         [[ 8.,  9.],
          [10., 11.]]],


        [[[12., 13.],
          [14., 15.]],

         [[16., 17.],
          [18., 19.]],

         [[20., 21.],
          [22., 23.]]]])

In [15]:
y_2d = (data_1-torch.mean(data_1,[0,2,3],keepdim=True))/torch.sqrt(torch.var(data_1,[0,2,3],keepdim=True,unbiased=False))
y_2d

tensor([[[[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]]],


        [[[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]]]])

In [16]:
Batch_Norm2d = nn.BatchNorm2d(3)

In [17]:
bn_2d = Batch_Norm2d(data_1)
bn_2d

tensor([[[[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]]],


        [[[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]]]], grad_fn=<NativeBatchNormBackward0>)

# Summary
batch_normal 是对整个batch在特征维度上的平均
特别是对于3维度向量(2,3,2,2)
即对3的每个通道进行平均
第一个通道 (0+1+2+3+12+13+14+15)/8 = 7.5

In [18]:
mean = torch.mean(data_1,[0,2,3],keepdim=True)
mean

tensor([[[[ 7.5000]],

         [[11.5000]],

         [[15.5000]]]])

In [16]:
y_2d = (data_1-torch.mean(data_1,[0,2,3],keepdim=True))/torch.sqrt(torch.var(data_1,[0,2,3],keepdim=True,unbiased=False))
y_2d

tensor([[[[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]]],


        [[[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]]]])

In [18]:
Batch_Norm2d = nn.BatchNorm2d(3)

In [19]:
bn_2d = Batch_Norm2d(data_1)
bn_2d

tensor([[[[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]],

         [[-1.2288, -1.0650],
          [-0.9012, -0.7373]]],


        [[[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]],

         [[ 0.7373,  0.9012],
          [ 1.0650,  1.2288]]]], grad_fn=<NativeBatchNormBackward0>)

# Summary
batch_normal 是对整个batch在特征维度上的平均
特别是对于3维度向量(2,3,2,2)
即对3的每个通道进行平均
第一个通道 (0+1+2+3+12+13+14+15)/8 = 7.5

In [20]:
mean = torch.mean(data_1,[0,2,3],keepdim=True)
mean

tensor([[[[ 7.5000]],

         [[11.5000]],

         [[15.5000]]]])