In [10]:
import torch
import torch.nn as nn

BATCH_SIZE = 2
CHANNEL_IN = 6
CHANNEL_OUT = 3
HEIGHT_IN = 8
WIDTH_IN = 8

KERNEL_SIZE = 3
PADDING = 0
STRIDE = 1
GROUP = 3

HEIGHT_OUT = int((HEIGHT_IN - KERNEL_SIZE + 2 * PADDING) / STRIDE + 1)
WIDTH_OUT = int((WIDTH_IN - KERNEL_SIZE + 2 * PADDING) / STRIDE + 1)

# shape of running_mean and running_var should be same as channel_outs
# RUNNING_MEAN = [0.2, 0.2, 0.2]
# RUNNING_VAR = [0.25, 0.25, 0.25]
RUNNING_MEAN = [ 82, 227, 444]
RUNNING_VAR = [ 945, 3780, 8505]
gamma = 0.5
beta = 0.2
momentum = 0

In [6]:
x = torch.zeros(BATCH_SIZE, CHANNEL_IN, HEIGHT_IN, WIDTH_IN)

for n in range(BATCH_SIZE):
    for c in range(CHANNEL_IN):
        for h in range(HEIGHT_IN):
            for w in range(WIDTH_IN):
                x[n, c, h, w] = h + c

KERNEL_CHANNEL = int(CHANNEL_IN / GROUP)
kernel = torch.zeros((CHANNEL_OUT, KERNEL_CHANNEL, KERNEL_SIZE, KERNEL_SIZE))
for k in range(CHANNEL_OUT):
    for l in range(KERNEL_CHANNEL):
        for i in range(KERNEL_SIZE):
            for j in range(KERNEL_SIZE):
                kernel[k, l, i, j] = j + k

bias = torch.zeros((CHANNEL_OUT,))
for c in range(CHANNEL_OUT):
    bias[c] = c + 10

conv = torch.nn.Conv2d(in_channels=CHANNEL_IN, out_channels=CHANNEL_OUT,
                       kernel_size=KERNEL_SIZE, bias=True, stride=STRIDE, padding=PADDING, groups=GROUP)
conv.weight.data = kernel
conv.bias.data = bias

afterConv = conv(x)

print(afterConv)

tensor([[[[ 37.,  37.,  37.,  37.,  37.,  37.],
          [ 55.,  55.,  55.,  55.,  55.,  55.],
          [ 73.,  73.,  73.,  73.,  73.,  73.],
          [ 91.,  91.,  91.,  91.,  91.,  91.],
          [109., 109., 109., 109., 109., 109.],
          [127., 127., 127., 127., 127., 127.]],

         [[137., 137., 137., 137., 137., 137.],
          [173., 173., 173., 173., 173., 173.],
          [209., 209., 209., 209., 209., 209.],
          [245., 245., 245., 245., 245., 245.],
          [281., 281., 281., 281., 281., 281.],
          [317., 317., 317., 317., 317., 317.]],

         [[309., 309., 309., 309., 309., 309.],
          [363., 363., 363., 363., 363., 363.],
          [417., 417., 417., 417., 417., 417.],
          [471., 471., 471., 471., 471., 471.],
          [525., 525., 525., 525., 525., 525.],
          [579., 579., 579., 579., 579., 579.]]],


        [[[ 37.,  37.,  37.,  37.,  37.,  37.],
          [ 55.,  55.,  55.,  55.,  55.,  55.],
          [ 73.,  73.,  73.,  73

In [7]:
import math
import numpy as np
import torch


def my_batch_norm_2d_detail(features, eps=1e-5):

    n, c, h, w = features.shape
    running_var = np.zeros((c))
    running_mean = np.zeros((c))
    gamma = 0.5
    beta = 0.2
    for ci in range(c):  # 分别 处理每一个通道
        mean = 0  # 均值
        var = 0  # 方差

        _sum = 0
        # 对一个 batch 中，特征图相同位置 channel 的每一个元素求和
        for ni in range(n):
            for hi in range(h):
                for wi in range(w):
                    _sum += features[ni, ci, hi, wi]
        mean = _sum / (n * h * w)
        running_mean[ci] = mean

        _sum = 0
        # 对一个 batch 中，特征图相同位置 channel 的每一个元素求平方和，用于计算方差
        for ni in range(n):
            for hi in range(h):
                for wi in range(w):
                    _sum += (features[ni, ci, hi, wi] - mean) ** 2  # 均差平方和
        var = _sum / (n * h * w)
        running_var[ci] = var

        # 更新元素
        for ni in range(n):
            for hi in range(h):
                for wi in range(w):
                    features[ni, ci, hi, wi] = (
                        (features[ni, ci, hi, wi] - mean) / math.sqrt(var + eps)) * gamma + beta

    return features, running_mean, running_var


if __name__ == "__main__":

    my_bn_output, running_mean, running_var = my_batch_norm_2d_detail(afterConv)

    # my_bn_output = torch.tensor(my_bn_output)
    # my_bn_output = my_bn_output.permute(0, 3, 1, 2)
    # print(my_bn_output.shape)
    print(my_bn_output)
    print(running_mean)
    print(running_var)

tensor([[[[-0.5319, -0.5319, -0.5319, -0.5319, -0.5319, -0.5319],
          [-0.2392, -0.2392, -0.2392, -0.2392, -0.2392, -0.2392],
          [ 0.0536,  0.0536,  0.0536,  0.0536,  0.0536,  0.0536],
          [ 0.3464,  0.3464,  0.3464,  0.3464,  0.3464,  0.3464],
          [ 0.6392,  0.6392,  0.6392,  0.6392,  0.6392,  0.6392],
          [ 0.9319,  0.9319,  0.9319,  0.9319,  0.9319,  0.9319]],

         [[-0.5319, -0.5319, -0.5319, -0.5319, -0.5319, -0.5319],
          [-0.2392, -0.2392, -0.2392, -0.2392, -0.2392, -0.2392],
          [ 0.0536,  0.0536,  0.0536,  0.0536,  0.0536,  0.0536],
          [ 0.3464,  0.3464,  0.3464,  0.3464,  0.3464,  0.3464],
          [ 0.6392,  0.6392,  0.6392,  0.6392,  0.6392,  0.6392],
          [ 0.9319,  0.9319,  0.9319,  0.9319,  0.9319,  0.9319]],

         [[-0.5319, -0.5319, -0.5319, -0.5319, -0.5319, -0.5319],
          [-0.2392, -0.2392, -0.2392, -0.2392, -0.2392, -0.2392],
          [ 0.0536,  0.0536,  0.0536,  0.0536,  0.0536,  0.0536],
      

In [11]:
batch_norm = nn.BatchNorm2d(num_features=CHANNEL_OUT, momentum=momentum)
batch_norm.running_mean = torch.tensor(RUNNING_MEAN, dtype=torch.float)
batch_norm.running_var = torch.tensor(RUNNING_VAR, dtype=torch.float)
batch_norm.weight.data.fill_(gamma)
batch_norm.bias.data.fill_(beta)

afterBN = batch_norm(afterConv)

print("BN output:")
print(batch_norm.running_mean)
print(batch_norm.running_var)
print(afterBN.shape)
print(afterBN)

BN output:
tensor([ 82., 227., 444.])
tensor([ 945., 3780., 8505.])
torch.Size([2, 3, 6, 6])
tensor([[[[-0.5319, -0.5319, -0.5319, -0.5319, -0.5319, -0.5319],
          [-0.2391, -0.2391, -0.2391, -0.2391, -0.2391, -0.2391],
          [ 0.0536,  0.0536,  0.0536,  0.0536,  0.0536,  0.0536],
          [ 0.3464,  0.3464,  0.3464,  0.3464,  0.3464,  0.3464],
          [ 0.6391,  0.6391,  0.6391,  0.6391,  0.6391,  0.6391],
          [ 0.9319,  0.9319,  0.9319,  0.9319,  0.9319,  0.9319]],

         [[-0.5319, -0.5319, -0.5319, -0.5319, -0.5319, -0.5319],
          [-0.2391, -0.2391, -0.2391, -0.2391, -0.2391, -0.2391],
          [ 0.0536,  0.0536,  0.0536,  0.0536,  0.0536,  0.0536],
          [ 0.3464,  0.3464,  0.3464,  0.3464,  0.3464,  0.3464],
          [ 0.6391,  0.6391,  0.6391,  0.6391,  0.6391,  0.6391],
          [ 0.9319,  0.9319,  0.9319,  0.9319,  0.9319,  0.9319]],

         [[-0.5319, -0.5319, -0.5319, -0.5319, -0.5319, -0.5319],
          [-0.2391, -0.2391, -0.2391, -0.2391

In [12]:
relu = nn.ReLU()
out = relu(afterBN)

print(out)

tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.0536, 0.0536, 0.0536, 0.0536, 0.0536, 0.0536],
          [0.3464, 0.3464, 0.3464, 0.3464, 0.3464, 0.3464],
          [0.6391, 0.6391, 0.6391, 0.6391, 0.6391, 0.6391],
          [0.9319, 0.9319, 0.9319, 0.9319, 0.9319, 0.9319]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.0536, 0.0536, 0.0536, 0.0536, 0.0536, 0.0536],
          [0.3464, 0.3464, 0.3464, 0.3464, 0.3464, 0.3464],
          [0.6391, 0.6391, 0.6391, 0.6391, 0.6391, 0.6391],
          [0.9319, 0.9319, 0.9319, 0.9319, 0.9319, 0.9319]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.0536, 0.0536, 0.0536, 0.0536, 0.0536, 0.0536],
          [0.3464, 0.3464, 0.3464, 0.3464, 0.3464, 0.3464],
          [0.6391, 0.6391, 0.6391, 0