# Batch Normalization

In [3]:
import torch
import torch.nn as nn
import numpy as np

class CustomBatchNorm2d(nn.Module):
    def __init__(self, num_features, eps=1e-5, momentum=0.1):
        super().__init__()
        self.num_features = num_features
        self.eps = eps
        self.momentum = momentum
        self.weight = nn.Parameter(torch.ones(num_features))
        self.bias = nn.Parameter(torch.zeros(num_features))
        self.register_buffer('running_mean', torch.zeros(num_features))
        self.register_buffer('running_var', torch.ones(num_features))

    def forward(self, input):
        if self.training:
            mean = input.mean([0, 2, 3])
            var = input.var([0, 2, 3], unbiased=False)

            with torch.no_grad():
                self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean
                self.running_var = (1 - self.momentum) * self.running_var + self.momentum * var
        else:
            mean = self.running_mean
            var = self.running_var

        input_normalized = (input - mean[None, :, None, None]) / torch.sqrt(var[None, :, None, None] + self.eps)
        return self.weight[None, :, None, None] * input_normalized + self.bias[None, :, None, None]

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Create a random input tensor
batch_size, channels, height, width = 4, 3, 2, 2
x = torch.randn(batch_size, channels, height, width)

# Create custom BatchNorm2d layer
custom_bn = CustomBatchNorm2d(channels, eps=1e-5, momentum=0.1)

# Create PyTorch's BatchNorm2d layer
torch_bn = nn.BatchNorm2d(channels, eps=1e-5, momentum=0.1)

# Ensure both are in training mode
custom_bn.train()
torch_bn.train()

# Forward pass
custom_output = custom_bn(x)
torch_output = torch_bn(x)

print("Input shape:", x.shape)
print("\nInput:")
print(x)

print("\nCustom BatchNorm output:")
print(custom_output)

print("\nPyTorch BatchNorm output:")
print(torch_output)

print("\nDifference between custom and PyTorch implementations:")
print(torch.abs(custom_output - torch_output).max().item())

print("\nCustom BatchNorm running mean:")
print(custom_bn.running_mean)

print("\nPyTorch BatchNorm running mean:")
print(torch_bn.running_mean)

print("\nCustom BatchNorm running variance:")
print(custom_bn.running_var)

print("\nPyTorch BatchNorm running variance:")
print(torch_bn.running_var)

# Test in eval mode
custom_bn.eval()
torch_bn.eval()

custom_eval_output = custom_bn(x)
torch_eval_output = torch_bn(x)

print("\nDifference between custom and PyTorch implementations (eval mode):")
print(torch.abs(custom_eval_output - torch_eval_output).max().item())

Input shape: torch.Size([4, 3, 2, 2])

Input:
tensor([[[[ 1.9269,  1.4873],
          [ 0.9007, -2.1055]],

         [[ 0.6784, -1.2345],
          [-0.0431, -1.6047]],

         [[-0.7521,  1.6487],
          [-0.3925, -1.4036]]],


        [[[-0.7279, -0.5594],
          [-0.7688,  0.7624]],

         [[ 1.6423, -0.1596],
          [-0.4974,  0.4396]],

         [[-0.7581,  1.0783],
          [ 0.8008,  1.6806]]],


        [[[ 1.2791,  1.2964],
          [ 0.6105,  1.3347]],

         [[-0.2316,  0.0418],
          [-0.2516,  0.8599]],

         [[-1.3847, -0.8712],
          [-0.2234,  1.7174]]],


        [[[ 0.3189, -0.4245],
          [ 0.3057, -0.7746]],

         [[-1.5576,  0.9956],
          [-0.8798, -0.6011]],

         [[-1.2742,  2.1228],
          [-1.2347, -0.4879]]]])

Custom BatchNorm output:
tensor([[[[ 1.5237e+00,  1.1110e+00],
          [ 5.6032e-01, -2.2619e+00]],

         [[ 9.2737e-01, -1.2135e+00],
          [ 1.1991e-01, -1.6278e+00]],

         [[-6.2543e-0

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Create a random tensor
batch_size = 4
channels = 8
height = 4
width = 4
x = torch.randn(batch_size, channels, height, width)

print("Input tensor shape:", x.shape)

# Define normalization layers
num_groups = 4  # for Group Normalization
batch_norm = nn.BatchNorm2d(channels)
group_norm = nn.GroupNorm(num_groups, channels)

# Apply normalizations
batch_norm_output = batch_norm(x)
group_norm_output = group_norm(x)

print("\nBatch Normalization output shape:", batch_norm_output.shape)
print("Group Normalization output shape:", group_norm_output.shape)

# Manual calculation for Group Normalization
def manual_group_norm(x, num_groups, eps=1e-5):
    batch_size, channels, height, width = x.shape
    x = x.view(batch_size, num_groups, -1)

    mean = x.mean(dim=2, keepdim=True)
    var = x.var(dim=2, keepdim=True)
    x = (x - mean) / torch.sqrt(var + eps)

    return x.view(batch_size, channels, height, width)

manual_group_norm_output = manual_group_norm(x, num_groups)

print("\nManual Group Normalization output shape:", manual_group_norm_output.shape)

# Compare results
print("\nMax difference between PyTorch and manual Group Norm:")
print(torch.max(torch.abs(group_norm_output - manual_group_norm_output)))

# Visualize a slice of the data
slice_idx = 0
print(f"\nOriginal data (first channel, first sample):\n{x[slice_idx, 0]}")
print(f"\nBatch Normalized data (first channel, first sample):\n{batch_norm_output[slice_idx, 0]}")
print(f"\nGroup Normalized data (first channel, first sample):\n{group_norm_output[slice_idx, 0]}")

Input tensor shape: torch.Size([4, 8, 4, 4])

Batch Normalization output shape: torch.Size([4, 8, 4, 4])
Group Normalization output shape: torch.Size([4, 8, 4, 4])

Manual Group Normalization output shape: torch.Size([4, 8, 4, 4])

Max difference between PyTorch and manual Group Norm:
tensor(0.0471, grad_fn=<MaxBackward1>)

Original data (first channel, first sample):
tensor([[-0.9138, -0.6581,  0.0780,  0.5258],
        [-0.4880,  1.1914, -0.8140, -0.7360],
        [-1.4032,  0.0360, -0.0635,  0.6756],
        [-0.0978,  1.8446, -1.1845,  1.3835]])

Batch Normalized data (first channel, first sample):
tensor([[-0.7922, -0.5238,  0.2491,  0.7192],
        [-0.3452,  1.4179, -0.6874, -0.6055],
        [-1.3061,  0.2050,  0.1005,  0.8765],
        [ 0.0645,  2.1037, -1.0764,  1.6197]], grad_fn=<SelectBackward0>)

Group Normalized data (first channel, first sample):
tensor([[-1.0120, -0.7520, -0.0033,  0.4520],
        [-0.5789,  1.1288, -0.9105, -0.8311],
        [-1.5097, -0.0461, -0.14