In [180]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_deriv(z):
    s = sigmoid(z)
    return s * (1 - s)


In [181]:
class MLP:

    def __init__(self, hidden_units=2, lr=0.01, epochs=20):
        self.hidden_units = hidden_units
        self.lr = lr
        self.epochs = epochs
        self.W1 = None
        self.W2 = None

    # -------- Activation --------
    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    # -------- Training --------
    def fit(self, X, y):
        n, d = X.shape
        h = self.hidden_units

        # Xavier initialization
        self.W1 = np.random.randn(h, d + 1) * np.sqrt(1 / d)
        self.W2 = np.random.randn(1, h + 1) * np.sqrt(1 / h)

        for _ in range(self.epochs):
            for _ in range(n):
                r = np.random.randint(0, n)

                # ---- Forward ----
                x = np.insert(X[r], 0, 1)      # bias
                z1 = self.W1 @ x
                a1 = z1                        # LINEAR hidden
                a1b = np.insert(a1, 0, 1 , axis=1)
                z2 = self.W2 @ a1b
                y_pred = self._sigmoid(z2)

                # ---- Backward ----
                error = y_pred - y[r]          # correct for sigmoid + BCE

                dW2 = error * a1b
                d_hidden = (self.W2[:, 1:].T * error).flatten()
                dW1 = d_hidden[:, None] @ x[None, :]

                # ---- Update ----
                self.W2 -= self.lr * dW2
                self.W1 -= self.lr * dW1

        return self

    # -------- Prediction --------
    def predict(self, X, threshold=0.5):
        preds = []

        for i in range(X.shape[0]):
            x = np.insert(X[i], 0, 1)
            z1 = self.W1 @ x
            a1 = z1                          # LINEAR hidden
            a1b = np.insert(a1, 0, 1)
            z2 = self.W2 @ a1b
            y_hat = self._sigmoid(z2)

            preds.append(1 if y_hat >= threshold else 0)

        return np.array(preds)


In [182]:
# import numpy as np 
# np.random.seed(40)
# x = np.random.randint(1, 4 , (2,2))
# n, d = X.shape
# h = 2
# W1 = np.random.randn(h, d + 1) * np.sqrt(1 / d)
# W2 = np.random.randn(1, h + 1) * np.sqrt(1 / h)
# x = np.insert(x , 0 ,1, axis= 1  )
# z1 = W1 @ x
# a1 = z1                        # LINEAR hidden
# a1b = np.insert(a1, 0, 1 , axis=1)
# z2 = W2 @ a1b
# y_pred = sigmoid(z2)

In [183]:
import numpy as np 
np.random.seed(2)

In [184]:
X = np.random.randint(1, 4 , (2,2))
X

array([[1, 2],
       [1, 3]], dtype=int32)

In [185]:
r = np.random.randint(1,2)
r

1

In [186]:
n, d = X.shape ;h = 2
n,d , h

(2, 2, 2)

In [187]:
W1 = np.random.randn(h, d + 1) * np.sqrt(1 / d)
W2 = np.random.randn(1, h + 1) * np.sqrt(1 / h)
W1 , W2

(array([[-0.01595568,  0.46842133, -0.62755019],
        [-0.32590013, -0.49061966,  0.25671908]]),
 array([[ 1.6147905 , -0.18033908,  0.39190957]]))

In [188]:
x = np.insert(X[r] , 0 ,1)
x

array([1, 1, 3], dtype=int32)

In [189]:
z1 = W1 @ x
z1

array([-1.43018493, -0.04636256])

In [190]:
a1 = z1 
a1

array([-1.43018493, -0.04636256])

In [191]:
a1b = np.insert(a1, 0, 1 )
a1b

array([ 1.        , -1.43018493, -0.04636256])

In [192]:
z2 = W2 @ a1b
z2

array([1.85453881])

In [193]:
y_pred = sigmoid(z2)
y_pred

array([0.86465913])

In [194]:
# we did are forward pass let's define a y 
y = np.array([1,0])

In [195]:
error = y_pred - y[r]
error

array([0.86465913])

In [196]:
dw2 = error * a1b
dw2 

array([ 0.86465913, -1.23662246, -0.04008781])

In [206]:
W2[:, 1:]* error

array([[-0.15593183,  0.33886819]])

In [217]:
d_hidden = (W2[:, 1:].T * error).flatten()
d_hidden

array([-0.15593183,  0.33886819])

In [218]:
# understanding step 
W2[:, 1:].T * error, d_hidden[:, None] ,(W2[:, 1:].T * error ).shape == d_hidden[:, None].shape

(array([[-0.15593183],
        [ 0.33886819]]),
 array([[-0.15593183],
        [ 0.33886819]]),
 True)

In [175]:
dW1 = d_hidden[:, None] @ x[None, :]
dW1

array([[-0.15593183, -0.15593183, -0.4677955 ],
       [ 0.33886819,  0.33886819,  1.01660456]])

In [176]:
d_hidden[:, None]

array([[-0.15593183],
       [ 0.33886819]])

In [177]:
 x[None, :]

array([[1, 1, 3]], dtype=int32)

In [178]:
 x[None , :]

array([[1, 1, 3]], dtype=int32)

In [203]:
d_hidden = (W2[:, 1:].T * error) 
d_hidden 

array([[-0.15593183],
       [ 0.33886819]])

In [204]:
x

array([1, 1, 3], dtype=int32)

In [179]:
# x = np.random.randint(1, 4, (2, 2))
# np.concat((np.ones((x.shape[0], 1)), x), axis=1)