In [1]:
import numpy as np 

In [2]:
np.random.seed(42)


samples= 1000
fan_in=1000
fan_out=50


X= np.random.randn(samples, fan_in)



In [6]:
def tanh(x):
    return np.tanh(x)

def tanh_deriv(x):
    return 1.0 - np.tanh(x)**2

def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return (x > 0).astype(float)





In [7]:
inits = {
    "Zero Init": np.zeros((fan_in, fan_out)),
    "Small Init": np.ones((fan_in, fan_out)) * 1e-4,
    "Large Init": np.ones((fan_in, fan_out)) * 1.0,
    "Xavier Init (tanh)": np.random.randn(fan_in, fan_out) * np.sqrt(1 / fan_in),
    "He Init (ReLU)": np.random.randn(fan_in, fan_out) * np.sqrt(2 / fan_in)
}

In [9]:
for name, W in inits.items():

    z= X @ W 

    if "ReLU" in name:
        a= relu(z)
        deriv= relu_deriv(z)
    else:
        a= tanh(z)
        deriv= tanh_deriv(z)


    print(f"--- {name} ---")
    print(f"z mean: {np.mean(z):.4f}, z std: {np.std(z):.4f}")
    print(f"activation mean: {np.mean(a):.4f}, activation std: {np.std(a):.4f}")
    print(f"gradient mean: {np.mean(deriv):.4f}, gradient std: {np.std(deriv):.4f}")
    print()

--- Zero Init ---
z mean: 0.0000, z std: 0.0000
activation mean: 0.0000, activation std: 0.0000
gradient mean: 1.0000, gradient std: 0.0000

--- Small Init ---
z mean: -0.0000, z std: 0.0031
activation mean: -0.0000, activation std: 0.0031
gradient mean: 1.0000, gradient std: 0.0000

--- Large Init ---
z mean: -0.2433, z std: 30.9538
activation mean: -0.0322, activation std: 0.9865
gradient mean: 0.0257, gradient std: 0.1225

--- Xavier Init (tanh) ---
z mean: 0.0007, z std: 1.0017
activation mean: -0.0002, activation std: 0.6291
gradient mean: 0.6042, gradient std: 0.3131

--- He Init (ReLU) ---
z mean: 0.0021, z std: 1.4219
activation mean: 0.5669, activation std: 0.8331
gradient mean: 0.5002, gradient std: 0.5000

