In [5]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_deriv(z):
    s = sigmoid(z)
    return s * (1 - s)

def mlp(X, y, lr=0.01, epochs=20):
    n, d = X.shape
    h = 2

    # Xavier init
    W1 = np.random.randn(h, d + 1) * np.sqrt(1 / d)
    W2 = np.random.randn(1, h + 1) * np.sqrt(1 / h)

    for _ in range(epochs):
        for _ in range(n):
            r = np.random.randint(0, n)

            # ---- Forward ----
            x = np.insert(X[r], 0, 1)          # (d+1,)
            z1 = W1 @ x                        # (h,)
            a1 = sigmoid(z1)
            a1b = np.insert(a1, 0, 1)          # (h+1,)
            z2 = W2 @ a1b                      # (1,)
            y_pred = sigmoid(z2)

            # ---- Backward ----
            error = y_pred - y[r]

            dW2 = error * sigmoid_deriv(z2) * a1b
            d_hidden = (W2[:,1:].T * error * sigmoid_deriv(z2)).flatten()
            dW1 = (d_hidden * sigmoid_deriv(z1))[:,None] @ x[None,:]

            # ---- Update ----
            W2 -= lr * dW2
            W1 -= lr * dW1

    return W1, W2


In [4]:
def mlp(X, y, lr=0.01, epochs=20):
    n, d = X.shape
    h = 2

    # Xavier init
    W1 = np.random.randn(h, d + 1) * np.sqrt(1 / d)
    W2 = np.random.randn(1, h + 1) * np.sqrt(1 / h)

    for _ in range(epochs):
        for _ in range(n):
            r = np.random.randint(0, n)

            # -------- Forward pass --------
            x = np.insert(X[r], 0, 1)      # (d+1,)
            z1 = W1 @ x                    # (h,)
            a1 = z1                        # LINEAR activation
            a1b = np.insert(a1, 0, 1)      # (h+1,)
            z2 = W2 @ a1b                  # (1,)
            y_pred = sigmoid(z2)

            # -------- Backprop --------
            error = y_pred - y[r]          # BCE gradient

            dW2 = error * sigmoid_deriv(z2) * a1b

            d_hidden = (W2[:,1:].T * error * sigmoid_deriv(z2)).flatten()
            dW1 = d_hidden[:, None] @ x[None, :]   # no activation derivative

            # -------- Update --------
            W2 -= lr * dW2
            W1 -= lr * dW1

    return W1, W2


In [6]:
def predict(X, W1, W2, threshold=0.5):
    preds = []

    for i in range(X.shape[0]):
        # ---- Forward pass only ----
        x = np.insert(X[i], 0, 1)     # bias
        z1 = W1 @ x                  # hidden pre-activation
        a1 = z1                      # LINEAR activation
        a1b = np.insert(a1, 0, 1)    # bias for output layer
        z2 = W2 @ a1b
        y_hat = 1 / (1 + np.exp(-z2))

        preds.append(1 if y_hat >= threshold else 0)

    return np.array(preds)


In [7]:


class MLP:

    def __init__(self, hidden_units=2, lr=0.01, epochs=20):
        self.hidden_units = hidden_units
        self.lr = lr
        self.epochs = epochs
        self.W1 = None
        self.W2 = None

    # -------- Activation --------
    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _sigmoid_deriv(self, z):
        s = self._sigmoid(z)
        return s * (1 - s)

    # -------- Training --------
    def fit(self, X, y):
        n, d = X.shape
        h = self.hidden_units

        # Xavier initialization
        self.W1 = np.random.randn(h, d + 1) * np.sqrt(1 / d)
        self.W2 = np.random.randn(1, h + 1) * np.sqrt(1 / h)

        for _ in range(self.epochs):
            for _ in range(n):
                r = np.random.randint(0, n)

                # ---- Forward ----
                x = np.insert(X[r], 0, 1)     # bias
                z1 = self.W1 @ x
                a1 = z1                       # LINEAR hidden
                a1b = np.insert(a1, 0, 1)
                z2 = self.W2 @ a1b
                y_pred = self._sigmoid(z2)

                # ---- Backward ----
                error = y_pred - y[r]         # BCE gradient

                dW2 = error * self._sigmoid_deriv(z2) * a1b
                d_hidden = (self.W2[:, 1:].T * error * self._sigmoid_deriv(z2)).flatten()
                dW1 = d_hidden[:, None] @ x[None, :]

                # ---- Update ----
                self.W2 -= self.lr * dW2
                self.W1 -= self.lr * dW1

        return self

    # -------- Prediction --------
    def predict(self, X, threshold=0.5):
        preds = []

        for i in range(X.shape[0]):
            x = np.insert(X[i], 0, 1)
            z1 = self.W1 @ x
            a1 = z1                          # LINEAR hidden
            a1b = np.insert(a1, 0, 1)
            z2 = self.W2 @ a1b
            y_hat = self._sigmoid(z2)

            preds.append(1 if y_hat >= threshold else 0)

        return np.array(preds)
