In [8]:
import numpy as np
def relu(x): 
    return np.maximum(0, x)
def relu_derivative(x): 
    return (x > 0).astype(float)
def leaky_relu(x, alpha=0.01): 
    return np.where(x > 0, x, alpha * x)
def leaky_relu_derivative(x, alpha=0.01): 
    return np.where(x > 0, 1, alpha)
def sigmoid(x): 
    return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x): 
    s = sigmoid(x); 
    return s * (1 - s)
def tanh(x): 
    return np.tanh(x)
def tanh_derivative(x): 
    return 1 - np.tanh(x)**2
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)
def swish(x): 
    return x * sigmoid(x)
def swish_derivative(x): 
    s = sigmoid(x); 
    return s + x * s * (1 - s)
activations = {
    "relu": (relu, relu_derivative),
    "leaky_relu": (leaky_relu, leaky_relu_derivative),
    "sigmoid": (sigmoid, sigmoid_derivative),
    "tanh": (tanh, tanh_derivative),
    "swish": (swish, swish_derivative),
    "softmax": (softmax, None) 
}
act_choice = input("Choose activation (relu, leaky_relu, sig, tanh, swish, softmax): ").strip().lower()
epochs = int(input("Number of epochs: "))
activation, activation_derivative = activations.get(act_choice, (relu, relu_derivative))
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])
np.random.seed(42)
W1 = np.random.randn(2, 4)  
b1 = np.zeros((1, 4))
W2 = np.random.randn(4, 4)   
b2 = np.zeros((1, 4))
W3 = np.random.randn(4, 1)   
b3 = np.zeros((1, 1))
lr = 0.59
for epoch in range(epochs):
   
    z1 = X.dot(W1) + b1
    a1 = activation(z1)

    z2 = a1.dot(W2) + b2
    a2 = activation(z2)

    z3 = a2.dot(W3) + b3
    if act_choice == "softmax":
        a3 = softmax(z3)
    else:
        a3 = sigmoid(z3) 
    loss = np.mean((y - a3)**2)
    d_a3 = -(y - a3)
    d_z3 = d_a3 * sigmoid_derivative(z3)
    dW3 = a2.T.dot(d_z3)
    db3 = np.sum(d_z3, axis=0, keepdims=True)
    d_a2 = d_z3.dot(W3.T)
    d_z2 = d_a2 * activation_derivative(z2)
    dW2 = a1.T.dot(d_z2)
    db2 = np.sum(d_z2, axis=0, keepdims=True)
    d_a1 = d_z2.dot(W2.T)
    d_z1 = d_a1 * activation_derivative(z1)
    dW1 = X.T.dot(d_z1)
    db1 = np.sum(d_z1, axis=0, keepdims=True)
    W3 -= lr * dW3
    b3 -= lr * db3
    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.2f}")
    print("Updated W1:", W1)
    print("Updated W2:", W2)
    print("Updated W3:", W3)
    print("-"*50)


Choose activation (relu, leaky_relu, sig, tanh, swish, softmax):  swish
Number of epochs:  5


Epoch 1/5, Loss: 0.26
Updated W1: [[ 0.51537752 -0.11390824  0.67478805  1.51221133]
 [-0.24212984 -0.22524013  1.55901307  0.81771142]]
Updated W2: [[-0.47755669  0.54283854 -0.47345262 -0.46642902]
 [ 0.24026927 -1.91316844 -1.72161829 -0.56191802]
 [-0.97033434  0.31019343 -0.9372278  -1.41570426]
 [ 1.47875914 -0.22941512  0.01586603 -1.42903952]]
Updated W3: [[-0.52120353]
 [ 0.10809215]
 [-1.15806113]
 [ 0.34960697]]
--------------------------------------------------
Epoch 2/5, Loss: 0.25
Updated W1: [[ 0.53218905 -0.0927852   0.70023662  1.50576619]
 [-0.24919108 -0.21613608  1.54421925  0.86461404]]
Updated W2: [[-0.48509068  0.54314993 -0.48222493 -0.46718349]
 [ 0.23861198 -1.91309279 -1.71894352 -0.5615942 ]
 [-0.93138879  0.3068365  -0.96686383 -1.41923023]
 [ 1.49392813 -0.23250632 -0.03123094 -1.43316188]]
Updated W3: [[-0.50697125]
 [ 0.10663308]
 [-1.17033876]
 [ 0.32655326]]
--------------------------------------------------
Epoch 3/5, Loss: 0.24
Updated W1: [[ 0.54754