In [3]:
# single hidden-layer Neural Network Classifier
import sys
sys.path.append('..')
import numpy as np
from activation.ReLU import ReLU #for activation of neurons 
from activation.Sigmoid import Sigmoid # for final prediction "Output"

In [4]:
def initialize_params(n_features, n_hidden, n_output):
    """Initialize weights using He initialization for ReLU"""
    W1 = np.random.randn(n_hidden, n_features) * np.sqrt(2. / n_features)
    b1 = np.zeros((n_hidden, 1))
    W2 = np.random.randn(n_output, n_hidden) * np.sqrt(2. / n_hidden)
    b2 = np.zeros((n_output, 1))
    return W1, b1, W2, b2


In [23]:
def forward_propagation(X, W1, b1, W2, b2):
    # Hidden Layer
    Z1 = W1.dot(X) + b1
    A1 = ReLU.forward(Z1)
    # Layer 2 (output layer)
    Z2 = W2.dot(A1) + b2
    A2 = Sigmoid.forward(Z2)  # final output
    
    return Z1, A1, Z2, A2


In [24]:
def back_propagation(expected, A2, Z1, A1, W1, W2, X):
    m = X.shape[1]

    dZ2 = A2 - expected # since we're using sigmoid activation for output layer (Prediction) and Cross Entropy, that simplifies
                        # delta Z2 to output - expected
    dW2 = (1/m) * dZ2.dot(A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    
    # Hidden layer (ReLU)
    dA1 = W2.T.dot(dZ2)
    dZ1 = dA1 * ReLU.derivative(Z1) 
    dW1 = (1/m) * dZ1.dot(X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    
    return dW1, db1, dW2, db2 # return new weights/biases gradients so you can update old weights and biases using GD


In [25]:
def update_params(W1, W2, b1, b2, dW1, dW2, db1, db2, eta):
    # W/b(new)  = W/b(old) - eta* delta W/b 
    W1 = W1 - eta * dW1
    b1 = b1 - eta * db1
    W2 = W2 - eta * dW2
    b2 = b2 - eta * db2

    return W1, b1, W2, b2
    
    

In [32]:
def CrossEntropyLoss(y_true, y_pred):
    """
    y_true: (1, m) true labels (0 or 1)
    y_pred: (1, m) predicted probabilities from sigmoid
    """
    m = y_true.shape[1]
    # Add epsilon to prevent log(0)
    eps = 1e-8
    loss = -np.sum(y_true * np.log(y_pred + eps) + (1 - y_true) * np.log(1 - y_pred + eps)) / m
    return loss

In [33]:
""" 
Mini-batch Gradient descent with cross validation using Cross Entropy Loss Function

"""
def mini_batch_gradient_descent(X, Y, W1, b1, W2, b2, eta = 0.01, epochs = 1000, batch_size = 128):
    m = X.shape[1]
    for epoch in range(epochs):
        permutation = np.random.permutation(m)
        X_perm = X[:,permutation]
        Y_perm = Y[:,permutation] #SUS

        for i in range(0,m,batch_size):
            X_batch = X_perm[:, i:i + batch_size]
            Y_batch = Y_perm[:, i:i + batch_size]

            Z1, A1, Z2, A2 = forward_propagation(X_batch, W1, b1, W2, b2) #feed-forward
            dW1, db1, dW2, db2 = back_propagation(Y_batch, A2, Z1, A1, W1, W2, X_batch) #back-propagate
            W1, b1, W2, b2 = update_params(W1, W2, b1, b2, dW1, dW2, db1, db2, eta) # update batch

        # cross entropy for validtion        
        # Validation on full dataset every 100 epochs
        if epoch % 100 == 0:
            _, _, _, A2_full = forward_propagation(X, W1, b1, W2, b2)
            loss = cross_entropy_loss(Y, A2_full)
            loss_history.append(loss)
            print(f"Epoch {epoch:4d}, Loss: {loss:.4f}")

    return W1, b1, W2, b2,loss_history    
    

In [None]:
"""
called after training the model to Actually predi
"""

def predict(X, W1, b1, W2, b2):
    """Make predictions"""
    _, _, _, A2 = forward_propagation(X, W1, b1, W2, b2)
    return (A2 > 0.5).astype(int)
