In [20]:
import numpy as np
import copy
from Layers import FullyConnected, Flatten
class L2Loss:

    def __init__(self):
        self.input_tensor = None

    def forward(self, input_tensor, label_tensor):
        self.input_tensor = input_tensor
        return np.sum(np.square(input_tensor - label_tensor))

    def backward(self, label_tensor):
        return 2*np.subtract(self.input_tensor, label_tensor)
class BatchNormalization:
    """
    Batch Normalization layer 
    """

    def __init__(self, channels):
        """
        Constructor.

        Args:
            channels (int): Number of channels in the input tensor.
        """
        self.channels = channels
        self.trainable = True
        self.testing_phase = False
        self.moving_mean = None
        self.moving_var = None
        self.moving_avg_decay = 0.8 #alpha

        #Initialize
        self.weights =  np.ones(self.channels) # gamma
        self.bias =  np.zeros(self.channels) # beta

    def forward(self, input_tensor):
        """
        Performs the forward pass for the Batch Normalization layer during the training phase.

        Args:
            input_tensor (array/tensor): The input tensor to the Batch Normalization layer.

        Returns:
            array/tensor: The output tensor after applying Batch Normalization during the training phase.
        """
        epsilon = 1e-15
        need_conv = False
        
        if input_tensor.ndim == 4:
            need_conv = True
            input_tensor = self.reformat(input_tensor)
            
        self.input_tensor = input_tensor
            
        if self.testing_phase:
            self.mean = self.moving_mean
            self.var = self.moving_var
        else:
            self.mean = np.mean(input_tensor, axis= 0)
            self.var = np.var(input_tensor, axis=0)
            if self.moving_mean is None:
                self.moving_mean = copy.deepcopy(self.mean)
                self.moving_var = copy.deepcopy(self.var)
            else:
                self.moving_mean = self.moving_mean * self.moving_avg_decay + self.mean * (1 - self.moving_avg_decay)
                self.moving_var = self.moving_var * self.moving_avg_decay + self.var * (1 - self.moving_avg_decay)
                
        self.input_tensor_hat = (input_tensor - self.mean) / np.sqrt(self.var + epsilon)
        output_tensor = self.weights * self.input_tensor_hat + self.bias
        
        if need_conv:
            output_tensor = self.reformat(output_tensor)
            
        return output_tensor

    def reformat(self, input_tensor):
        if input_tensor.ndim == 4:
            self.reformat_shape = input_tensor.shape
            B, H, M, N = input_tensor.shape
            input_tensor = input_tensor.reshape(B, H, M * N)
            input_tensor = input_tensor.transpose(0, 2, 1)
            input_tensor = input_tensor.reshape(B * M * N, H)
            return input_tensor
        else:
            B, H, M, N = self.reformat_shape
            input_tensor = input_tensor.reshape(B, M * N, H)
            input_tensor = input_tensor.transpose(0, 2, 1)
            input_tensor = input_tensor.reshape(B, H, M, N)
            return input_tensor

In [21]:
batch_size = 200
channels = 2
input_shape = (channels, 3, 3)
input_size = np.prod(input_shape)

np.random.seed(0)
input_tensor = np.abs(np.random.random((input_size, batch_size))).T
input_tensor_conv = np.random.uniform(-1, 1, (batch_size, *input_shape))

categories = 5
label_tensor = np.zeros([categories, batch_size]).T
for i in range(batch_size):
    label_tensor[i, np.random.randint(0, categories)] = 1

layers = list()
layers.append(None)
layers.append(Flatten.Flatten())
layers.append(FullyConnected.FullyConnected(input_size, categories))
layers.append(L2Loss())

plot_shape = (input_shape[1], input_shape[0] * np.prod(input_shape[2:]))

In [4]:
def _channel_moments(tensor, channels):
    tensor = np.transpose(tensor, (0, *range(2, tensor.ndim), 1))
    tensor = tensor.reshape(-1, channels)
    mean = np.mean(tensor, axis=0)
    var = np.var(tensor, axis=0)
    return mean, var

In [28]:
layer = BatchNormalization(channels)

In [29]:
output = layer.forward(input_tensor_conv)

ValueError: operands could not be broadcast together with shapes (2,) (200,2,3,3) 

In [30]:
input_tensor_conv.ndim

4