In [1]:
# import transformers
import torch
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# epsilon
eps = 1e-8

# 1.二维数据的情况

In [3]:
# 定义一个随机二维输入
batch_size = 3
feature_num = 4
torch.manual_seed(0)  # 设置随机种子，方便复现
inputs = torch.randn(batch_size, feature_num)
print('二维输入:\n', inputs)

二维输入:
 tensor([[ 1.5410, -0.2934, -2.1788,  0.5684],
        [-1.0845, -1.3986,  0.4033,  0.8380],
        [-0.7193, -0.4033, -0.5966,  0.1820]])


## 1.1.torch batchnorm

In [4]:
# torch自带的batchnorm
torch_bn = nn.BatchNorm1d(num_features=feature_num, affine=True)  # 注意完整的batchnorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(1)  # 设置随机种子，方便复现
torch_bn.weight = nn.Parameter(torch_bn.weight * torch.randn(feature_num))
torch_bn.bias = nn.Parameter(torch_bn.bias + torch.randn(feature_num))
print('weight:\n', torch_bn.weight)
print('bias:\n', torch_bn.bias, '\n')

# 结果
torch_normed = torch_bn(inputs)
print('torch bn结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([0.6614, 0.2669, 0.0617, 0.6213], requires_grad=True)
bias:
 Parameter containing:
tensor([-0.4519, -0.1661, -1.5228,  0.3817], requires_grad=True) 

torch bn结果:
 tensor([[ 0.4756,  0.0513, -1.6033,  0.4715],
        [-1.0197, -0.5421, -1.4535,  1.0937],
        [-0.8117, -0.0077, -1.5115, -0.4202]],
       grad_fn=<NativeBatchNormBackward0>)


## 1.2.manual batchnorm

In [5]:
# 手动bn

# 计算均值和标准差
mean = torch.mean(inputs, dim=0, keepdim=True)
print('均值:\n', mean)
std = torch.std(inputs, dim=0, keepdim=True, unbiased=False)
print('标准差:\n', std, '\n')

manual_normed = (inputs - mean) / (std + eps) * torch_bn.weight + torch_bn.bias
print('手动bn结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

均值:
 tensor([[-0.0876, -0.6985, -0.7907,  0.5295]])
标准差:
 tensor([[1.1612, 0.4971, 1.0630, 0.2692]]) 

手动bn结果:
 tensor([[ 0.4756,  0.0514, -1.6033,  0.4715],
        [-1.0197, -0.5421, -1.4535,  1.0937],
        [-0.8117, -0.0077, -1.5115, -0.4202]], grad_fn=<AddBackward0>)
验证结果:
 tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## 1.3.torch layernorm

In [6]:
# torch自带的layernorm
torch_ln = nn.LayerNorm(normalized_shape=feature_num, elementwise_affine=True)  # 注意完整的layernorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(2)  # 设置随机种子，方便复现
torch_ln.weight = nn.Parameter(torch_ln.weight * torch.randn(feature_num))
torch_ln.bias = nn.Parameter(torch_ln.bias + torch.randn(feature_num))
print('weight:\n', torch_ln.weight)
print('bias:\n', torch_ln.bias, '\n')

# 结果
torch_normed = torch_ln(inputs)
print('torch ln结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([ 0.3923, -0.2236, -0.3195, -1.2050], requires_grad=True)
bias:
 Parameter containing:
tensor([ 1.0445, -0.6332,  0.5731,  0.5409], requires_grad=True) 

torch ln结果:
 tensor([[ 1.5120, -0.6001,  1.0604, -0.0392],
        [ 0.7249, -0.3772,  0.3331, -0.9155],
        [ 0.6645, -0.6209,  0.7693, -1.4324]],
       grad_fn=<NativeLayerNormBackward0>)


## 1.4.manual layernorm

In [7]:
# 手动ln

# 计算均值
mean = torch.mean(inputs, dim=1, keepdim=True)
print('均值:\n', mean)
std = torch.std(inputs, dim=1, keepdim=True, unbiased=False)
print('标准差:\n', std, '\n')

manual_normed = (inputs - mean) / (std + eps) * torch_ln.weight + torch_ln.bias
print('手动ln结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

均值:
 tensor([[-0.0907],
        [-0.3104],
        [-0.3843]])
标准差:
 tensor([[1.3691],
        [0.9502],
        [0.3458]]) 

手动ln结果:
 tensor([[ 1.5120, -0.6001,  1.0604, -0.0392],
        [ 0.7249, -0.3772,  0.3331, -0.9155],
        [ 0.6645, -0.6209,  0.7693, -1.4325]], grad_fn=<AddBackward0>)
验证结果:
 tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


# 2.CV数据的情况

In [8]:
# 定义一个随机四维输入，[N,C,H,W]
batch_size = 2
channel = 2
height = 2
width = 3
torch.manual_seed(3)  # 设置随机种子，方便复现
inputs = torch.randn(batch_size, channel, height, width)
print('四维输入:\n', inputs)

四维输入:
 tensor([[[[-0.0766,  0.3599, -0.7820],
          [ 0.0715,  0.6648, -0.2868]],

         [[ 1.6206, -1.5967,  0.4046],
          [ 0.6113,  0.7604, -0.0336]]],


        [[[-0.3448,  0.4937, -0.0776],
          [-1.8054,  0.4851,  0.2052]],

         [[ 0.3384,  1.3528,  0.3736],
          [ 0.0134,  0.7737, -0.1092]]]])


## 2.1.torch batchnorm

In [9]:
# torch自带的batchnorm
torch_bn = nn.BatchNorm2d(num_features=channel, affine=True)  # 注意完整的batchnorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(4)  # 设置随机种子，方便复现
torch_bn.weight = nn.Parameter(torch_bn.weight * torch.randn(channel))
torch_bn.bias = nn.Parameter(torch_bn.bias + torch.randn(channel))
print('weight:\n', torch_bn.weight)
print('bias:\n', torch_bn.bias, '\n')

# 结果
torch_normed = torch_bn(inputs)
print('torch bn结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([-1.6053,  0.2325], requires_grad=True)
bias:
 Parameter containing:
tensor([2.2399, 0.8473], requires_grad=True) 

torch bn结果:
 tensor([[[[2.2043, 1.1275, 3.9442],
          [1.8388, 0.3753, 2.7226]],

         [[1.2185, 0.2591, 0.8559],
          [0.9175, 0.9620, 0.7252]]],


        [[[2.8658, 0.7975, 2.2066],
          [6.4684, 0.8186, 1.5090]],

         [[0.8362, 1.1387, 0.8467],
          [0.7392, 0.9660, 0.7027]]]], grad_fn=<NativeBatchNormBackward0>)


## 2.2.manual batchnorm

In [10]:
# 手动bn

manual_normed = []
# 每个channel分别处理
for c in range(channel):
    # 计算均值和标准差
    mean = torch.mean(inputs[:, c, :, :])
    std = torch.std(inputs[:, c, :, :], unbiased=False)
    normed = (inputs[:, c, :, :] - mean) / (std + eps) * torch_bn.weight[c] + torch_bn.bias[c]
    normed = normed.unsqueeze(1)
    manual_normed.append(normed)
manual_normed = torch.cat(manual_normed, 1)
print('手动bn结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

手动bn结果:
 tensor([[[[2.2043, 1.1275, 3.9442],
          [1.8388, 0.3752, 2.7226]],

         [[1.2185, 0.2591, 0.8559],
          [0.9175, 0.9620, 0.7252]]],


        [[[2.8658, 0.7975, 2.2066],
          [6.4685, 0.8186, 1.5089]],

         [[0.8362, 1.1387, 0.8467],
          [0.7392, 0.9660, 0.7027]]]], grad_fn=<CatBackward0>)
验证结果:
 tensor([[[[True, True, True],
          [True, True, True]],

         [[True, True, True],
          [True, True, True]]],


        [[[True, True, True],
          [True, True, True]],

         [[True, True, True],
          [True, True, True]]]])


## 2.3.torch layernorm

In [11]:
# torch自带的layernorm
torch_ln = nn.LayerNorm(
    normalized_shape=[channel, height, width], 
    elementwise_affine=True
)  # 注意完整的layernorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(5)  # 设置随机种子，方便复现
torch_ln.weight = nn.Parameter(torch_ln.weight * torch.randn(channel, height, width))
torch_ln.bias = nn.Parameter(torch_ln.bias + torch.randn(channel, height, width))
print('weight:\n', torch_ln.weight)
print('bias:\n', torch_ln.bias, '\n')

# 结果
torch_normed = torch_ln(inputs)
print('torch ln结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([[[-0.4868, -0.6038, -0.5581],
         [ 0.6675, -0.1974,  1.9428]],

        [[-1.4017, -0.7626,  0.6312],
         [-0.8991, -0.5578,  0.6907]]], requires_grad=True)
bias:
 Parameter containing:
tensor([[[ 0.2225, -0.6662,  0.6846],
         [ 0.5740, -0.5829,  0.7679]],

        [[ 0.0571, -1.1894, -0.5659],
         [-0.8327,  0.9014,  0.2116]]], requires_grad=True) 

torch ln结果:
 tensor([[[[ 0.3594, -0.8338,  1.3456],
          [ 0.5128, -0.7147, -0.3012]],

         [[-2.5939,  0.5089, -0.3546],
          [-1.3715,  0.4607,  0.0553]]],


        [[[ 0.5477, -0.9583,  0.8526],
          [-1.2112, -0.6760,  0.9378]],

         [[-0.3219, -2.4580, -0.3647],
          [-0.6744,  0.4171, -0.0264]]]], grad_fn=<NativeLayerNormBackward0>)


## 2.4.manual layernorm

In [12]:
# 手动ln

manual_normed = []
# 每个channel分别处理
for b in range(batch_size):
    # 计算均值和标准差
    mean = torch.mean(inputs[b, :, :, :])
    std = torch.std(inputs[b, :, :, :], unbiased=False)
    normed = (inputs[b, :, :, :] - mean) / (std + eps) * torch_ln.weight + torch_ln.bias
    normed = normed.unsqueeze(0)
    manual_normed.append(normed)
manual_normed = torch.cat(manual_normed, 0)
print('手动ln结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

手动ln结果:
 tensor([[[[ 0.3594, -0.8338,  1.3456],
          [ 0.5128, -0.7147, -0.3012]],

         [[-2.5939,  0.5090, -0.3546],
          [-1.3715,  0.4607,  0.0553]]],


        [[[ 0.5477, -0.9583,  0.8527],
          [-1.2112, -0.6760,  0.9378]],

         [[-0.3219, -2.4581, -0.3647],
          [-0.6744,  0.4171, -0.0264]]]], grad_fn=<CatBackward0>)
验证结果:
 tensor([[[[True, True, True],
          [True, True, True]],

         [[True, True, True],
          [True, True, True]]],


        [[[True, True, True],
          [True, True, True]],

         [[True, True, True],
          [True, True, True]]]])


## 2.5.torch instancenorm

In [13]:
# torch自带的instancenorm
torch_in = nn.InstanceNorm2d(num_features=channel, affine=True)  # 注意完整的instancenorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(5)  # 设置随机种子，方便复现
torch_in.weight = nn.Parameter(torch_in.weight * torch.randn(channel))
torch_in.bias = nn.Parameter(torch_in.bias + torch.randn(channel))
print('weight:\n', torch_in.weight)
print('bias:\n', torch_in.bias, '\n')

# 结果
torch_normed = torch_in(inputs)
print('torch in结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([-0.4868, -0.6038], requires_grad=True)
bias:
 Parameter containing:
tensor([-0.5581,  0.6675], requires_grad=True) 

torch in结果:
 tensor([[[[-0.4858, -0.9466,  0.2587],
          [-0.6423, -1.2685, -0.2640]],

         [[-0.1489,  1.8317,  0.5997],
          [ 0.4724,  0.3807,  0.8695]]],


        [[[-0.4525, -0.9706, -0.6177],
          [ 0.4499, -0.9653, -0.7924]],

         [[ 0.8137, -0.4360,  0.7704],
          [ 1.2142,  0.2775,  1.3653]]]], grad_fn=<ViewBackward0>)


## 2.6.manual instancenorm

In [14]:
# 手动in

mean = torch.mean(inputs, dim=(2, 3),keepdim=True)
std = torch.std(inputs, dim=(2, 3), keepdim=True, unbiased=False)
in_weight = torch_in.weight.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
in_bias = torch_in.bias.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
manual_normed = (inputs - mean) / (std + eps) * in_weight + in_bias
print('手动in结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

手动in结果:
 tensor([[[[-0.4858, -0.9466,  0.2587],
          [-0.6423, -1.2686, -0.2640]],

         [[-0.1489,  1.8317,  0.5997],
          [ 0.4724,  0.3807,  0.8695]]],


        [[[-0.4525, -0.9706, -0.6177],
          [ 0.4499, -0.9653, -0.7924]],

         [[ 0.8138, -0.4360,  0.7704],
          [ 1.2142,  0.2775,  1.3653]]]], grad_fn=<AddBackward0>)
验证结果:
 tensor([[[[True, True, True],
          [True, True, True]],

         [[True, True, True],
          [True, True, True]]],


        [[[True, True, True],
          [True, True, True]],

         [[True, True, True],
          [True, True, True]]]])


# 3.NLP数据的情况

In [15]:
# 定义一个随机三维输入，[N,S,H]
batch_size = 2
seq_len = 3
hidden_size = 4
torch.manual_seed(6)  # 设置随机种子，方便复现
inputs = torch.randn(batch_size, seq_len, hidden_size)
print('三维输入:\n', inputs)

三维输入:
 tensor([[[-1.2113,  0.6304, -1.4713, -1.3352],
         [-0.4897,  0.1317,  0.3295,  0.3264],
         [ 1.0322,  0.8266,  0.1186, -0.6232]],

        [[-0.3106,  0.0627,  0.8672, -0.0738],
         [-0.9251,  0.5594, -0.6340, -1.8015],
         [ 0.6142,  1.0554, -0.7899,  0.2525]]])


## 3.1.torch batchnorm

In [16]:
# torch自带的batchnorm
torch_bn = nn.BatchNorm1d(num_features=hidden_size, affine=True)  # 注意完整的batchnorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(7)  # 设置随机种子，方便复现
torch_bn.weight = nn.Parameter(torch_bn.weight * torch.randn(hidden_size))
torch_bn.bias = nn.Parameter(torch_bn.bias + torch.randn(hidden_size))
print('weight:\n', torch_bn.weight)
print('bias:\n', torch_bn.bias, '\n')

# # 结果
torch_normed = torch_bn(inputs.transpose(1, 2)).transpose(1, 2)
print('torch bn结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([-0.1468,  0.7861,  0.9468, -1.1143], requires_grad=True)
bias:
 Parameter containing:
tensor([ 1.6908, -0.8948, -0.3556,  1.2324], requires_grad=True) 

torch bn结果:
 tensor([[[ 1.8740, -0.7037, -1.8222,  2.3385],
         [ 1.7413, -1.8119,  0.3641,  0.0200],
         [ 1.4615, -0.2676,  0.1081,  1.3450]],

        [[ 1.7084, -1.9653,  1.0169,  0.5785],
         [ 1.8213, -0.8614, -0.8056,  2.9892],
         [ 1.5383,  0.2409, -0.9949,  0.1231]]], grad_fn=<TransposeBackward0>)


## 3.2.manual batchnorm

In [17]:
# 手动bn

# 计算均值
mean = torch.mean(inputs, dim=(0, 1) , keepdim=True)
print('均值:\n', mean)
std = torch.std(inputs, dim=(0, 1), keepdim=True, unbiased=False)
print('标准差:\n', std, '\n')

manual_normed = (inputs - mean) / (std + eps) * torch_bn.weight + torch_bn.bias
print('手动bn结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

均值:
 tensor([[[-0.2151,  0.5444, -0.2633, -0.5424]]])
标准差:
 tensor([[[0.7984, 0.3537, 0.7799, 0.7986]]]) 

手动bn结果:
 tensor([[[ 1.8740, -0.7037, -1.8222,  2.3385],
         [ 1.7413, -1.8119,  0.3641,  0.0200],
         [ 1.4615, -0.2676,  0.1081,  1.3450]],

        [[ 1.7084, -1.9653,  1.0169,  0.5785],
         [ 1.8213, -0.8614, -0.8056,  2.9892],
         [ 1.5383,  0.2409, -0.9950,  0.1231]]], grad_fn=<AddBackward0>)
验证结果:
 tensor([[[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]],

        [[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]])


## 3.3.torch layernorm

In [18]:
# torch自带的layernorm
torch_ln = nn.LayerNorm(normalized_shape=hidden_size, elementwise_affine=True)  # 注意完整的layernorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(8)  # 设置随机种子，方便复现
torch_ln.weight = nn.Parameter(torch_ln.weight * torch.randn(hidden_size))
torch_ln.bias = nn.Parameter(torch_ln.bias + torch.randn(hidden_size))
print('weight:\n', torch_ln.weight)
print('bias:\n', torch_ln.bias, '\n')

# 结果
torch_normed = torch_ln(inputs)
print('torch ln结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([ 0.2713, -1.2729,  0.5027,  0.4181], requires_grad=True)
bias:
 Parameter containing:
tensor([-0.6394, -0.6608, -0.1433, -0.1043], requires_grad=True) 

torch ln结果:
 tensor([[[-0.7547, -2.8528, -0.5092, -0.3423],
         [-1.0957, -0.8780,  0.2388,  0.2097],
         [-0.3502, -1.6158, -0.3133, -0.7224]],

        [[-0.9134, -0.4490,  0.6868, -0.3029],
         [-0.7116, -2.5589, -0.1039, -0.6493],
         [-0.5076, -2.1031, -0.9346, -0.1230]]],
       grad_fn=<NativeLayerNormBackward0>)


## 3.4.manual layernorm

In [19]:
# 手动ln

# 计算均值
mean = torch.mean(inputs, dim=2, keepdim=True)
print('均值:\n', mean)
std = torch.std(inputs, dim=2, keepdim=True, unbiased=False)
print('标准差:\n', std, '\n')

manual_normed = (inputs - mean) / (std + eps) * torch_ln.weight + torch_ln.bias
print('手动ln结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

均值:
 tensor([[[-0.8469],
         [ 0.0745],
         [ 0.3386]],

        [[ 0.1364],
         [-0.7003],
         [ 0.2831]]])
标准差:
 tensor([[[0.8578],
         [0.3354],
         [0.6505]],

        [[0.4426],
         [0.8448],
         [0.6816]]]) 

手动ln结果:
 tensor([[[-0.7547, -2.8528, -0.5092, -0.3423],
         [-1.0957, -0.8780,  0.2388,  0.2097],
         [-0.3502, -1.6158, -0.3133, -0.7224]],

        [[-0.9134, -0.4490,  0.6868, -0.3029],
         [-0.7116, -2.5590, -0.1039, -0.6493],
         [-0.5076, -2.1031, -0.9347, -0.1230]]], grad_fn=<AddBackward0>)
验证结果:
 tensor([[[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]],

        [[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]])


## 3.5.torch instancenorm

In [20]:
# torch自带的instancenorm
torch_in = nn.InstanceNorm1d(num_features=seq_len, affine=True)  # 注意完整的instancenorm要包括仿射变换

# 仿射变化初始化的weigh=1，bias=0，相当于没有进行变换，看不出效果
# 手动改成别的值，用于对比包含仿射变换的效果
torch.manual_seed(9)  # 设置随机种子，方便复现
torch_in.weight = nn.Parameter(torch_in.weight * torch.randn(seq_len))
torch_in.bias = nn.Parameter(torch_in.bias + torch.randn(seq_len))
print('weight:\n', torch_in.weight)
print('bias:\n', torch_in.bias, '\n')

# 结果
torch_normed = torch_in(inputs)
print('torch in结果:\n', torch_normed)

weight:
 Parameter containing:
tensor([ 0.0447,  1.9112, -0.2310], requires_grad=True)
bias:
 Parameter containing:
tensor([0.3459, 1.3180, 0.3696], requires_grad=True) 

torch in结果:
 tensor([[[ 0.3269,  0.4230,  0.3134,  0.3205],
         [-1.8964,  1.6442,  2.7709,  2.7534],
         [ 0.1233,  0.1963,  0.4477,  0.7112]],

        [[ 0.3008,  0.3385,  0.4198,  0.3247],
         [ 0.8094,  4.1681,  1.4680, -1.1734],
         [ 0.2574,  0.1079,  0.7333,  0.3800]]], grad_fn=<ViewBackward0>)


## 3.6.manual instancenorm

In [21]:
# 手动in

mean = torch.mean(inputs, dim=2, keepdim=True)
std = torch.std(inputs, dim=2, keepdim=True, unbiased=False)
in_weight = torch_in.weight.unsqueeze(0).unsqueeze(-1)
in_bias = torch_in.bias.unsqueeze(0).unsqueeze(-1)
manual_normed = (inputs - mean) / (std + eps) * in_weight + in_bias
print('手动in结果:\n', manual_normed)

# 手动操作和torch自带操作有点误差，<1e-4
isclose = torch.isclose(torch_normed, manual_normed, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

手动in结果:
 tensor([[[ 0.3269,  0.4230,  0.3134,  0.3205],
         [-1.8965,  1.6442,  2.7710,  2.7535],
         [ 0.1233,  0.1963,  0.4477,  0.7112]],

        [[ 0.3008,  0.3385,  0.4198,  0.3247],
         [ 0.8094,  4.1681,  1.4680, -1.1734],
         [ 0.2574,  0.1079,  0.7333,  0.3800]]], grad_fn=<AddBackward0>)
验证结果:
 tensor([[[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]],

        [[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]])


# 能否变回去

## 二维的情况

In [22]:
# 定义一个随机二维输入
batch_size = 3
feature_num = 4
torch.manual_seed(0)  # 设置随机种子，方便复现
inputs = torch.randn(batch_size, feature_num)
print('二维输入:\n', inputs)

# 计算均值和标准差
mean = torch.mean(inputs, dim=0, keepdim=True)
# print('均值:\n', mean)
std = torch.std(inputs, dim=0, keepdim=True, unbiased=False)
# print('标准差:\n', std, '\n')

# torch自带的batchnorm
torch_bn = nn.BatchNorm1d(num_features=feature_num, affine=True)

# 把仿射变换的缩放和平移替换为标准差和均值
torch_bn.weight = nn.Parameter(std)
torch_bn.bias =  nn.Parameter(mean)
# print('weight:\n', torch_bn.weight)
# print('bias:\n', torch_bn.bias, '\n')

# 结果
torch_normed = torch_bn(inputs)
print('torch bn结果:\n', torch_normed)

isclose = torch.isclose(torch_normed, inputs, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

二维输入:
 tensor([[ 1.5410, -0.2934, -2.1788,  0.5684],
        [-1.0845, -1.3986,  0.4033,  0.8380],
        [-0.7193, -0.4033, -0.5966,  0.1820]])
torch bn结果:
 tensor([[ 1.5410, -0.2934, -2.1788,  0.5684],
        [-1.0845, -1.3986,  0.4033,  0.8380],
        [-0.7193, -0.4033, -0.5966,  0.1821]],
       grad_fn=<NativeBatchNormBackward0>)
验证结果:
 tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


In [23]:
print('二维输入:\n', inputs)

# 计算均值和标准差
mean = torch.mean(inputs, dim=1, keepdim=True)
# print('均值:\n', mean)
std = torch.std(inputs, dim=1, keepdim=True, unbiased=False)
# print('标准差:\n', std, '\n')

# torch自带的layernorm
torch_ln = nn.LayerNorm(normalized_shape=feature_num, elementwise_affine=True)  # 注意完整的layernorm要包括仿射变换

# 把仿射变换的缩放和平移替换为标准差和均值
torch_bn.weight = nn.Parameter(std)
torch_bn.bias =  nn.Parameter(mean)
# print('weight:\n', torch_bn.weight)
# print('bias:\n', torch_bn.bias, '\n')

# 结果
torch_normed = torch_ln(inputs)
print('torch ln结果:\n', torch_normed)

isclose = torch.isclose(torch_normed, inputs, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

二维输入:
 tensor([[ 1.5410, -0.2934, -2.1788,  0.5684],
        [-1.0845, -1.3986,  0.4033,  0.8380],
        [-0.7193, -0.4033, -0.5966,  0.1820]])
torch ln结果:
 tensor([[ 1.1918, -0.1481, -1.5251,  0.4814],
        [-0.8146, -1.1451,  0.7512,  1.2086],
        [-0.9685, -0.0551, -0.6140,  1.6376]],
       grad_fn=<NativeLayerNormBackward0>)
验证结果:
 tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


## CV数据的情况

In [24]:
# 定义一个随机四维输入，[N,C,H,W]
batch_size = 2
channel = 2
height = 2
width = 3
torch.manual_seed(3)  # 设置随机种子，方便复现
inputs = torch.randn(batch_size, channel, height, width)
print('四维输入:\n', inputs)

# 计算均值和标准差
mean = torch.mean(inputs, dim=(0, 2, 3), keepdim=True)
print('均值:\n', mean)
std = torch.std(inputs, dim=(0, 2, 3), keepdim=True, unbiased=False)
print('标准差:\n', std, '\n')

# torch自带的batchnorm
torch_bn = nn.BatchNorm2d(num_features=channel, affine=True)  # 注意完整的batchnorm要包括仿射变换

# 把仿射变换的缩放和平移替换为标准差和均值
torch_bn.weight = nn.Parameter(std)
torch_bn.bias =  nn.Parameter(mean)
print('weight:\n', torch_bn.weight)
print('bias:\n', torch_bn.bias, '\n')

# 结果
torch_normed = torch_bn(inputs)
print('torch bn结果:\n', torch_normed)

isclose = torch.isclose(torch_normed, inputs, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

四维输入:
 tensor([[[[-0.0766,  0.3599, -0.7820],
          [ 0.0715,  0.6648, -0.2868]],

         [[ 1.6206, -1.5967,  0.4046],
          [ 0.6113,  0.7604, -0.0336]]],


        [[[-0.3448,  0.4937, -0.0776],
          [-1.8054,  0.4851,  0.2052]],

         [[ 0.3384,  1.3528,  0.3736],
          [ 0.0134,  0.7737, -0.1092]]]])
均值:
 tensor([[[[-0.0911]],

         [[ 0.3758]]]])
标准差:
 tensor([[[[0.6508]],

         [[0.7796]]]]) 

weight:
 Parameter containing:
tensor([[[[0.6508]],

         [[0.7796]]]], requires_grad=True)
bias:
 Parameter containing:
tensor([[[[-0.0911]],

         [[ 0.3758]]]], requires_grad=True) 

torch bn结果:
 tensor([[[[-0.0766,  0.3599, -0.7820],
          [ 0.0715,  0.6648, -0.2868]],

         [[ 1.6205, -1.5967,  0.4046],
          [ 0.6113,  0.7604, -0.0336]]],


        [[[-0.3448,  0.4936, -0.0776],
          [-1.8053,  0.4851,  0.2052]],

         [[ 0.3384,  1.3527,  0.3736],
          [ 0.0134,  0.7737, -0.1092]]]], grad_fn=<NativeBatchNormBackward0>)

## NLP数据的情况

In [25]:
# 定义一个随机三维输入，[N,S,H]
batch_size = 2
seq_len = 3
hidden_size = 4
inputs = torch.randn(batch_size, seq_len, hidden_size)
print('三维输入:\n', inputs)

# 计算均值
mean = torch.mean(inputs, dim=(0, 1) , keepdim=True)
print('均值:\n', mean)
std = torch.std(inputs, dim=(0, 1), keepdim=True, unbiased=False)
print('标准差:\n', std, '\n')

# torch自带的batchnorm
torch_bn = nn.BatchNorm1d(num_features=hidden_size, affine=True)  # 注意完整的batchnorm要包括仿射变换

# # 把仿射变换的缩放和平移替换为标准差和均值
torch_bn.weight = nn.Parameter(torch.squeeze(std))
torch_bn.bias =  nn.Parameter(torch.squeeze(mean))
print('weight:\n', torch_bn.weight)
print('bias:\n', torch_bn.bias, '\n')

# 结果
torch_normed = torch_bn(inputs.transpose(1, 2)).transpose(1, 2)
print('torch bn结果:\n', torch_normed)

isclose = torch.isclose(torch_normed, inputs, rtol=1e-4, atol=1e-4)
print('验证结果:\n', isclose)

三维输入:
 tensor([[[-1.1963,  1.0280,  0.0719, -0.1845],
         [-1.5159,  0.9125,  0.2539, -0.6924],
         [-0.0752, -0.4233,  0.4217, -0.2576]],

        [[-1.5835,  1.3960, -1.0319,  1.1391],
         [ 0.5125, -0.0198, -1.1216, -0.4891],
         [-0.6336, -0.7893, -0.8977, -1.8876]]])
均值:
 tensor([[[-0.7487,  0.3507, -0.3840, -0.3953]]])
标准差:
 tensor([[[0.7685, 0.8065, 0.6444, 0.8890]]]) 

weight:
 Parameter containing:
tensor([0.7685, 0.8065, 0.6444, 0.8890], requires_grad=True)
bias:
 Parameter containing:
tensor([-0.7487,  0.3507, -0.3840, -0.3953], requires_grad=True) 

torch bn结果:
 tensor([[[-1.1963,  1.0280,  0.0719, -0.1845],
         [-1.5159,  0.9125,  0.2539, -0.6924],
         [-0.0752, -0.4233,  0.4217, -0.2576]],

        [[-1.5835,  1.3960, -1.0319,  1.1391],
         [ 0.5125, -0.0198, -1.1216, -0.4891],
         [-0.6336, -0.7893, -0.8977, -1.8876]]], grad_fn=<TransposeBackward0>)
验证结果:
 tensor([[[True, True, True, True],
         [True, True, True, True],
      