In [34]:
import numpy as np

class BinaryClassifier: 
    def __init__(self, layer_sizes, mu=0.0, sigma=0.01): 
        self.layer_sizes = layer_sizes 
        self.mu = mu
        self.sigma = sigma 
        self.weights = []
        self.biases  = []
        
        # Generate start matrix for weights and biases 
        for i in range(len(self.layer_sizes) - 1):
            n_in = self.layer_sizes[i]           
            n_out = self.layer_sizes[i + 1]      
            self.weights.append(np.random.normal(mu, sigma, size=(n_out, n_in)))
            self.biases.append(np.random.normal(mu, sigma, size=(n_out, 1)))

        self.A_list = None 
        self.Z_list = None 
        self.loss_history = []
    
    # -- Activation function --- 
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_prime(self, z): 
        return self.sigmoid(z) * (1 - self.sigmoid(z))
    
    # --- loss function ---
    def bce_loss(self, y, a, eps=1e-12):
        # y, a: (1, m)
        a = np.clip(a, a_min=eps, a_max=1-eps)  # Ensures that a never becomes 0
        m = y.shape[1]  # Number of inputs 
        return -1/m * np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))

    # --- Feedforward--- 
    def feedforward(self, X):
        A = X           # A^(0)
        A_list = [X]    # stores activations
        Z_list = []     # stores pre-activations
        
        for l in range(len(weights)): 
            Z = self.weights[l] @ A + self.biases[l]
            A = sigmoid(Z)
            Z_list.append(Z)
            A_list.append(A)
            
            self.A_list, self.Z_list =A_list, Z_list
        return A
    
    # --- Backward propagation --- 
    def backward(self, y): 
        A_list, Z_list = self.A_list, self.Z_list
        m = y.shape[1]

        # BP1: Compute delta for output layer 
        delta_L = A_list[-1] - y

        # Store deltas backwards 
        deltas = [delta_L]

        # BP2: Backpropagate trough hidden layers 
        for l in range(len(self.weights) - 1, 0, -1): 
            delta_next = deltas[0]  # Most recently computed delta (from layer l+1)
            z = Z_list[l - 1]       # Pre-activation at layer l
            delta = (self.weights[l].T @ delta_next) * self.sigmoid_prime(z)
            deltas.insert(0, delta) # Insert at the beginning

        # Store gradients
        nable_weights, nabla_biases = [], []

        for l in range(len(self.weights)):
            nable_weights.append((deltas[l] @ A_list[l].T) / m)                 # matches weights[l]
            nabla_biases.append(np.sum(deltas[l], axis=1, keepdims=True) / m)  # matches biases[l]
        return nable_weights, nabla_biases
    
    # --- update --- 
    def step(self, nabla_w, nabla_b, lr):
        for l in range(len(self.weights)):
            self.weights[l] -= lr * nabla_w[l]
            self.biases[l]  -= lr * nabla_b[l]
    
    # --- training --- 
    def fit(self, X, y, lr=1.0, epochs=1000, verbose=100): 
        for epoch in range(epochs): 
            yhat = self.feedforward(X)
            loss = self.bce_loss(y, yhat)
            self.loss_history.append(loss)
            
            nabla_w, nabla_b = self.backward(y)
            self.step(nabla_w, nabla_b, lr)
            if verbose is not None and epoch % verbose == 0:
                print(epoch, loss)
        return self
    
    def predict_proba(self, X): 
        return self.feedforward(X)
    
    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) > threshold).astype(int)


In [35]:
X = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
y = np.array([[0, 1, 1, 0]])

net = BinaryClassifier([2, 3, 1])
net.fit(X, y, lr=1.0, epochs=5000, verbose=500)

probs = net.predict_proba(X)
preds = net.predict(X)

print("probs:", probs)
print("preds:", preds)
print("true :", y)

0 0.6932973185277789
500 0.6931471805989539
1000 0.6931471805984111
1500 0.6931471805978672
2000 0.693147180597322
2500 0.6931471805967759
3000 0.6931471805962288
3500 0.6931471805956804
4000 0.6931471805951309
4500 0.6931471805945804
probs: [[0.49998965 0.49999819 0.50000181 0.50001035]]
preds: [[0 0 1 1]]
true : [[0 1 1 0]]
