![norm区别](images/norm.jpg)

In [1]:
import  torch
import torch.nn as nn

# BatchNorm

## 1d

In [2]:
input1d = torch.rand([1, 16, 8])

In [3]:
class BatchNorm1d(nn.Module):
    def __init__(self, num_features, eps=1e-5, affine=True, momentum=0.1):
        super(BatchNorm1d, self).__init__()
        self.num_features = num_features
        self.eps = eps
        self.affine = affine
        self.momentum = momentum
        if self.affine:
            self.gamma = nn.Parameter(torch.ones(num_features))
            self.beta = nn.Parameter(torch.zeros(num_features))
        self.running_mean = torch.zeros(num_features)
        self.running_var = torch.ones(num_features)

    def forward(self, x):
        if self.training:
            mean = x.mean(dim=(0, 2), keepdim=True)
            var = x.var(dim=(0, 2), keepdim=True)

            self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean
            self.running_var = (1 - self.momentum) * self.running_var + self.momentum * var

            x = (x - mean) / torch.sqrt(var + self.eps)
        else:
            x = (x - self.running_mean) / torch.sqrt(self.running_var + self.eps)
            
        if self.affine:
            gamma = self.gamma.view(-1, self.num_features, 1)
            beta = self.beta.view(-1, self.num_features, 1)
            x = x * gamma + beta 
        else:
            x = x

        return x
        

In [4]:
model = BatchNorm1d(16)
output1d = model(input1d)
print(output1d)

tensor([[[ 0.5960,  0.8313,  0.6152, -1.4633,  0.4611,  0.9951, -1.2995,
          -0.7359],
         [-0.9752,  1.1073,  0.8393, -1.2079,  0.2049,  1.2339, -1.0320,
          -0.1703],
         [-0.7298, -1.0172, -1.0992,  1.4411,  0.4301, -0.3318,  1.3597,
          -0.0529],
         [ 0.3910,  0.8435,  1.3500, -1.4165, -1.2262,  0.0171,  0.6535,
          -0.6125],
         [ 1.2534, -0.8185,  0.4294,  1.4548, -0.1749, -0.6165, -0.1008,
          -1.4270],
         [-0.7613, -0.8938,  0.8515,  0.1755,  0.8813, -0.2646, -1.4117,
           1.4232],
         [ 1.8254, -0.7065, -1.0122,  0.1860,  0.1590,  0.5990,  0.2404,
          -1.2911],
         [-0.5346, -1.2030, -0.8009,  0.8029,  0.9485,  0.9422,  0.9668,
          -1.1218],
         [-1.4419,  1.2935,  0.6110,  0.9958, -1.0204,  0.0730, -0.8488,
           0.3379],
         [-0.6304, -0.1720,  0.2143, -1.8982,  1.4405,  0.9035,  0.1749,
          -0.0328],
         [ 0.6675, -1.5449,  0.7047, -0.1570, -1.5521,  0.8093,  0.592

## 2d

In [5]:
input2d = torch.rand([1, 16, 8, 8])

In [6]:
mean = input2d.mean(dim=(0, 2, 3), keepdim=True)
print(mean.shape)
std = input2d.std(dim=(0, 2, 3), keepdim=True)
print(std.shape)

torch.Size([1, 16, 1, 1])
torch.Size([1, 16, 1, 1])


In [7]:
eps = 1e-5
output2d = (input2d - mean) / (std + eps)

In [8]:
gamma = nn.Parameter(torch.ones(1))
beta = nn.Parameter(torch.zeros(1))

In [9]:
output2d = gamma * output2d + beta
print(output2d.shape)
print(output2d)

torch.Size([1, 16, 8, 8])
tensor([[[[-0.7997, -0.7983,  0.6877,  ...,  0.4897, -0.3405,  1.0869],
          [ 0.2796,  0.3648, -0.7188,  ..., -1.5560, -0.5061, -0.7525],
          [ 1.2428, -0.2352,  0.8057,  ..., -0.2242,  0.2509,  1.2705],
          ...,
          [-1.6327, -1.2960,  0.4697,  ..., -0.7320,  1.5242, -0.6756],
          [ 1.2305, -1.2716, -0.9035,  ..., -1.6069, -0.5505,  1.2934],
          [ 0.2192, -0.2585, -1.8619,  ...,  1.3145, -0.8481, -0.5181]],

         [[ 0.3205, -0.9347,  0.2362,  ..., -0.0274,  0.4405, -0.6519],
          [ 0.2510,  0.6514, -1.3852,  ...,  1.5202,  0.5433,  0.0304],
          [ 1.6673,  0.4863,  1.6395,  ...,  1.3789, -0.2588, -0.4868],
          ...,
          [ 1.4345, -0.8684, -0.3006,  ..., -0.6162,  0.5256,  0.8682],
          [ 0.2931,  1.0929, -0.9162,  ...,  1.0609,  1.5486,  0.0683],
          [-1.6798, -0.9895, -1.6714,  ...,  1.3219,  0.8478, -0.6009]],

         [[-0.9378, -1.7379,  0.3574,  ...,  1.2035, -0.7857,  1.1044],
    

In [10]:
class BatchNorm2d(nn.Module):
    def __init__(self, num_features, eps=1e-5, affine=True, momentum=0.1):
        super(BatchNorm2d, self).__init__()
        self.num_features = num_features
        self.eps = eps
        self.affine = affine
        self.momentum = momentum
        if affine:
            self.gamma = nn.Parameter(torch.ones(num_features))
            self.beta = nn.Parameter(torch.zeros(num_features))
        self.running_mean = torch.zeros(num_features)
        self.running_var = torch.ones(num_features)

    def forward(self, x):
        if self.training:
            mean = x.mean(dim=(0, 2, 3), keepdim=True)
            var = x.var(dim=(0, 2, 3), keepdim=True)

            self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean
            self.running_var = (1 - self.momentum) * self.running_var + self.momentum * var

            x = (x - mean) / torch.sqrt(var + self.eps)
        else:
            x = (x - self.running_mean) / torch.sqrt(self.running_var + self.eps)
        
        if self.affine:
            gamma = self.gamma.view(-1, self.num_features, 1, 1)
            beta = self.beta.view(-1, self.num_features, 1, 1)
            x = x * gamma + beta
        else:
            x = x

        return  x

In [11]:
model = BatchNorm2d(16)
output2d = model(input2d)
print(output2d.shape)

torch.Size([1, 16, 8, 8])


## LayerNorm

## 1d

In [12]:
class LayerNorm1D(nn.Module):
    def __init__(self, num_features, eps=1e-5):
        super(LayerNorm1D, self).__init__()
        self.eps = eps
        self.num_features = num_features
        self.gamma = nn.Parameter(torch.ones(num_features))  # 缩放参数
        self.beta = nn.Parameter(torch.zeros(num_features))  # 偏移参数

    def forward(self, x):
        # 计算输入x的均值和方差
        mean = x.mean(dim=(1, 2), keepdim=True)
        var = x.var(dim=(1, 2), keepdim=True, unbiased=False)

        # 归一化
        x_normalized = (x - mean) / torch.sqrt(var + self.eps)
        
        gamma = self.gamma.view(-1, self.num_features, 1)
        beta = self.beta.view(-1, self.num_features, 1)
        
        # 缩放和偏移
        y = gamma * x_normalized + beta
        
        return y

In [13]:
input1d = torch.randn([1, 16, 8])
model = LayerNorm1D(16)
output1d = model(input1d)
print(output1d.shape)

torch.Size([1, 16, 8])


## 2d

In [14]:
class LayerNorm2D(nn.Module):
    def __init__(self, num_features, eps=1e-5):
        super(LayerNorm2D, self).__init__()
        self.eps = eps
        self.num_features = num_features
        self.gamma = nn.Parameter(torch.ones(num_features))  # 缩放参数
        self.beta = nn.Parameter(torch.zeros(num_features))  # 偏移参数

    def forward(self, x):
        # 计算输入x的均值和方差
        mean = x.mean(dim=(1, 2, 3), keepdim=True)
        var = x.var(dim=(1, 2, 3), keepdim=True, unbiased=False)

        # 归一化
        x_normalized = (x - mean) / torch.sqrt(var + self.eps)
        
        gamma = self.gamma.view(-1, self.num_features, 1, 1)
        beta = self.beta.view(-1, self.num_features, 1, 1)
        
        # 缩放和偏移
        y = gamma * x_normalized + beta
        
        return y

In [15]:
input2d = torch.randn([1, 16, 8, 8])
model = LayerNorm2D(16)
output2d = model(input2d)
print(output2d.shape)

torch.Size([1, 16, 8, 8])


## InstanceNorm

In [16]:
class InstanceNorm2d(nn.Module):
    def __init__(self, num_features, eps=1e-5, affine=True, momentum=0.1):
        super(InstanceNorm2d, self).__init__()
        self.num_features = num_features
        self.eps = eps
        self.affine = affine
        self.gamma = nn.Parameter(torch.ones(num_features))
        self.beta = nn.Parameter(torch.zeros(num_features))

    def forward(self, x):
        mean = x.mean(dim=(2, 3), keepdim=True)
        var = x.var(dim=(2, 3), keepdim=True)

        x = (x - mean) / torch.sqrt(var + self.eps)
        
        gamma = self.gamma.view(-1, self.num_features, 1, 1)
        beta = self.beta.view(-1, self.num_features, 1, 1)

        return x * gamma + beta if self.affine else x

In [17]:
input2d = torch.randn([1, 16, 8, 8])
model = InstanceNorm2d(16)
output2d = model(input2d)
print(output2d.shape)

torch.Size([1, 16, 8, 8])


# WeightNorm

In [18]:
model = nn.Embedding(10, 20)
model.weight.shape

torch.Size([10, 20])

# GroupNorm

In [ ]:
class GroupNorm(nn.Module):
    def __init__(self, num_channels, num_groups, eps=1e-5, affine=True):
        super(GroupNorm, self).__init__()
        self.num_groups = num_groups
        self.eps = eps
        self.affine = affine
        if self.affine:
            self.gamma = nn.Parameter(torch.ones(1, num_channels, 1, 1))
            self.beta = nn.Parameter(torch.zeros(1, num_channels, 1, 1))
        
    def forward(self, x):
        N, C, H, W = x.shape
        # 确保通道数能被组数整除
        assert C % self.num_groups == 0, 'num_channels must be divisible by num_groups'
        
        # 重塑x以便将通道分组，形状为(N, G, C // G, H, W)
        x = x.view(N, self.num_groups, C // self.num_groups, H, W)
        # 计算每组的均值和方差
        mean = x.mean(dim=(2, 3, 4), keepdim=True)
        var = x.var(dim=(2, 3, 4), keepdim=True, unbiased=False)
        
        # 归一化
        x = (x - mean) / torch.sqrt(var + self.eps)
        
        # 改变形状回(N, C, H, W)
        x = x.view(N, C, H, W)
        return x * self.gamma + self.beta if self.affine else x
