# Weight Initialization

In [1]:
import numpy as np

In [19]:
a=(4,2)
print(np.random.rand(*a))
print(np.random.rand(4,2))

[[0.65812853 0.00545102]
 [0.6862947  0.85389954]
 [0.14421876 0.50489621]
 [0.09314335 0.70847665]]
[[0.64343498 0.07536501]
 [0.78783789 0.79926634]
 [0.88237086 0.94832781]
 [0.41434246 0.79713029]]


In [11]:
shape = (2, 3)  # Shape of the weight matrix for a given layer

def initialize_zeros(shape):   # Zero Initialization
    return np.zeros(shape)

def initialize_random(shape):  # Random Initialization
    print(np.random.randn(shape))
    return np.random.randn(*shape)

def initialize_xavier(shape):  # Xavier Initialization
    fan_in, fan_out = shape[0], shape[1]
    variance = np.sqrt(2.0 / (fan_in + fan_out))
    return np.random.randn(*shape) * variance

def initialize_he(shape):  # He Initialization
    fan_in = shape[0]
    variance = np.sqrt(2.0 / fan_in)
    return np.random.randn(*shape) * variance

def initialize_lecun(shape):  # LeCun Initialization
    fan_in = shape[0]
    variance = np.sqrt(1.0 / fan_in)
    return np.random.randn(*shape) * variance

In [12]:
print("Zero Initialization: \n", initialize_zeros(shape))
print("\nRandom Initialization: \n", initialize_random(shape))
print("\nXavier Initialization: \n", initialize_xavier(shape))
print("\nHe Initialization: \n", initialize_he(shape))
print("\nLeCun Initialization: \n", initialize_lecun(shape))

Zero Initialization: 
 [[0. 0. 0.]
 [0. 0. 0.]]


TypeError: 'tuple' object cannot be interpreted as an integer

# Dropout

In [5]:
def dropout_forward(X, dropout_rate):
    mask = np.random.rand(*X.shape) < (1 - dropout_rate)
    out = X * mask / (1 - dropout_rate)
    cache = (mask, dropout_rate)
    return out, cache

def dropout_backward(dout, cache):
    mask, dropout_rate = cache
    dX = dout * mask / (1 - dropout_rate)
    return dX

In [6]:
# Forward pass with dropout
dropout_rate = 0.2
X = np.random.randn(4, 5)  # Example hidden layer output
out, cache = dropout_forward(X, dropout_rate)

In [7]:
# Backward pass with dropout
dout = np.random.randn(*out.shape)  # Example gradient from subsequent layer
dX = dropout_backward(dout, cache)

In [8]:
print("Original X:\n", X)
print("\nOutput with dropout:\n", out)
print("\nGradient after dropout:\n", dX)

Original X:
 [[ 0.25201914  0.80854546 -1.34124056  0.86687993  0.46476839]
 [-0.30579249  0.42610705 -0.45343857  0.74199622 -0.81874628]
 [ 0.53047232 -0.64510749  1.43688164  0.67406    -1.00536273]
 [-0.17507648 -0.24368014  1.37282937  0.36368145 -0.20829586]]

Output with dropout:
 [[ 0.31502392  1.01068183 -1.6765507   1.08359991  0.58096049]
 [-0.          0.53263382 -0.56679821  0.92749528 -1.02343285]
 [ 0.6630904  -0.80638437  1.79610205  0.842575   -1.25670341]
 [-0.2188456  -0.30460018  1.71603671  0.45460181 -0.26036982]]

Gradient after dropout:
 [[ 0.98511863 -0.13150842 -2.34711316 -0.17806453  2.09773349]
 [-0.          0.66754021  0.18147485 -0.40298255  0.8095372 ]
 [ 0.89662527  0.07915891 -1.62586999 -0.70468984  0.02585771]
 [ 1.19908708  0.51825097 -0.41474305  0.49836445 -0.56691434]]


# Batch Normalization

In [9]:
class BatchNormalization:
    def __init__(self, epsilon=1e-8):
        self.epsilon = epsilon
        self.gamma = None
        self.beta = None
        self.mean = None
        self.var = None
        self.x_normalized = None

    def forward(self, x, training=True):
        if self.mean is None:
            self.mean = np.mean(x, axis=0)
            self.var = np.var(x, axis=0)

        if training:
            x_normalized = (x - self.mean) / np.sqrt(self.var + self.epsilon)
            self.x_normalized = x_normalized

            if self.gamma is None:
                self.gamma = np.ones_like(x[0])
                self.beta = np.zeros_like(x[0])

            out = self.gamma * x_normalized + self.beta
        else:
            x_normalized = (x - self.mean) / np.sqrt(self.var + self.epsilon)
            out = self.gamma * x_normalized + self.beta

        return out

    def backward(self, dout):
        dx_normalized = dout * self.gamma
        dx = (1.0 / len(dout)) * (1.0 / np.sqrt(self.var + self.epsilon)) * (
                len(dout) * dx_normalized - np.sum(dx_normalized, axis=0)
                - self.x_normalized * np.sum(dx_normalized * self.x_normalized, axis=0))
        dgamma = np.sum(dout * self.x_normalized, axis=0)
        dbeta = np.sum(dout, axis=0)

        self.gamma -= dgamma
        self.beta -= dbeta

        return dx

In [10]:
# Create an instance of BatchNormalization
bn = BatchNormalization()

# Assume we have an input tensor x and its gradient dout
x = np.random.randn(100, 10)  # Example input
dout = np.random.randn(100, 10)  # Example gradient from subsequent layer

# Forward pass
out = bn.forward(x, training=True)

# Backward pass
dx = bn.backward(dout)