In [None]:
import numpy as np

### Batch Normalization:
- step-1: Calculate the mean `mean = (1 / N) * sum(x_i)`
- step-2: Subtract the mean from each input `x-mean`
- setep-3: calculate the varience
- step-4: normalize the input
- step-5: calculate the scale and shif
- for training cache the output for backpropagation

### forward propagation

In [55]:
# a simple mini-batch implementation using numpy 
def batch_normalization_forward(x, gamma, beta, running_mean, running_var, eps=1e-8, training=True, momentum=0.9):
    '''In Batch Normalization, the momentum parameter is used to control the update of running statistics (mean and variance) during training. It determines the contribution of the current batch's statistics to the overall running statistics.(0<=momemntum<=1)'''
    if(training):
        
        mu = np.mean(x, axis=0) # Step 1: Calculate mean
        
        xmu = x - mu   # Step 2: Subtract mean
        
        var = np.mean(xmu**2, axis=0)   # Step 3: Calculate variance

        std = np.sqrt(var + eps)         # Step 4: Calculate standard deviation
        
        istd = 1.0 / std # Step 5: Invert standard deviation
        
        xhat = xmu*istd         # Step 6: Normalize
       
        out =xhat*gamma + beta         # Step 7: Scale and shift using element wise multiplication

        '''Update the running mean'''
        running_mean= momentum*running_mean + (1-momentum)*mu
        running_var = momentum*running_var+(1-momentum)*running_var

        cache = (xhat, gamma, xmu, istd, std, var, eps)         # Store intermediate values for backpropagation
    else:
        xhat= (x- running_mean)/np.sqrt(running_var+eps)
        out= gamma*xhat+ beta
        cache=None
    
    return out, cache

def calcute_mean(a):
    am= np.mean(a, axis=0)
    asd= np.std(a, axis=0)
    return am, asd

# Initialize
np.random.seed(100)
N, D1, D2, D3 = 200, 50, 60, 3
X = np.random.randn(N, D1)
W1 = np.random.randn(D1, D2)
W2 = np.random.randn(D2, D3)
W1Xo= X.dot(W1)

a = np.maximum(0, W1Xo).dot(W2)

a_mean, a_asd=calcute_mean(a)
print(f"Before normalized:\n mean={a_mean} std:{a_asd}")


gamma = np.ones(D3)           # Scale parameter
beta = np.zeros((D3,))           # Shift parameter

# Initialize running mean and variance as zeros
running_mean = np.zeros(D3)
running_var = np.zeros(D3)


out, cache= batch_normalization_forward(a, gamma, beta, running_mean, running_var)
a_mean, a_sd=calcute_mean(out)
print(f'After batch normalization mean close to (beta=0) and std near to (gamma=1)\n mean:{a_mean} std:{a_sd}')

gamma = np.asarray([1.0, 2.0, 3.0])
beta = np.asarray([11.0, 12.0, 13.0])

# # Perform batch normalization forward pass
out, cache = batch_normalization_forward(a, gamma, beta, running_mean, running_var)

# Now means should be close to beta and stds close to gamma
a_mean, a_sd=calcute_mean(out)
print(f'After batch normalization mean should close to beta=(',beta,')and std should close to gamam (gamma=', gamma,')\n (mean:', a_mean, 'std:',a_sd, ') ')

Before normalized:
 mean=[-10.33110021 -13.35263687  14.88236006] std:[28.55721837 30.99596294 34.26431573]
After batch normalization mean close to (beta=0) and std near to (gamma=1)
 mean:[ 2.03725925e-16  2.30648833e-16 -8.88178420e-18] std:[1. 1. 1.]
After batch normalization mean should close to beta=( [11. 12. 13.] )and std should close to gamam (gamma= [1. 2. 3.] )
 (mean: [11. 12. 13.] std: [1. 2. 3.] ) 


In [None]:
### Batch Normalization (Back Propagation)

In [1]:
import numpy as np

class BatchNormalization:
    def __init__(self, input_size, eps=1e-8, momentum=0.9) -> None:
        self.eps = eps
        self.momentum = momentum
        self.running_mean = np.zeros(input_size)
        self.running_var = np.zeros(input_size)
        self.gamma = np.ones(input_size)
        self.beta = np.zeros(input_size)
        
        self.cache = None

    def forward(self, X, training=True):
        if(training):
            batch_mean=np.mean(X, axis=0)
            batch_var=np.var(X, axis=0)
            x_normalized=(X-batch_mean)/np.sqrt(batch_var+self.eps)
            x_out=x_normalized*self.gamma+ self.beta
            self.running_mean=self.momentum*batch_mean+(1-self.momentum)*batch_mean
            self.running_var=self.momentum*batch_var+(1-self.momentum)*batch_var

            self.cache=(x_normalized, batch_mean, batch_var)
        else:
            x_normalized=(X-self.running_mean)/np.sqrt(self.running_var+self.eps)
            x_out= x_normalized*self.gamma+ self.beta
        return x_out
    
    def back(self,d_out):
        x_normalized, batch_mean, batch_var = self.cache
        




In [16]:
import numpy as np
np.random.seed(100)
a= np.random.randint(5, size=(2, 4))
print(f"shape:{a.ndim}\n array:\n{a}")

N,M=a.shape # here N= batch size and M= input size
print(f"R:{N} M:{M}")

shape:2
 array:
[[0 0 3 0]
 [2 4 2 2]]
R:2 M:4
