In [1]:
# Cell 1: Imports
import numpy as np

In [2]:


# Cell 2: Activation Functions
def relu(Z):
    return np.maximum(0, Z)

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def relu_derivative(Z):
    return Z > 0

def sigmoid_derivative(Z):
    return sigmoid(Z) * (1 - sigmoid(Z))

def softmax(Z):
    return np.exp(Z) / np.sum(np.exp(Z), axis=1, keepdims=True)

In [3]:


# Cell 3: Data Preprocessing
def to_one_hot(y, num_classes):
    """
    Convert class labels to one-hot encoded vectors
    y: labels (can be 1D array, column vector, or row vector)
    num_classes: number of classes
    """
    y = np.array(y).reshape(-1)
    m = len(y)
    one_hot = np.zeros((m, num_classes))
    
    if num_classes == 1:
        return y.reshape(-1, 1)
    
    one_hot[np.arange(m), y.astype(int)] = 1
    return one_hot

In [4]:

# Cell 4: Neural Network Core Functions
def initialize_parameters(layers):
    parameters = {}
    for i in range(1, len(layers)):
        parameters[f'W{i}'] = np.random.randn(layers[i], layers[i - 1]) * 0.01
        parameters[f'b{i}'] = np.zeros((layers[i], 1))
    return parameters

In [5]:


def forward_propagation(X, parameters, layers):
    cache = {"A0": X}
    A = X
    for i in range(1, len(layers)-1):
        Z = np.dot(parameters[f'W{i}'], A.T) + parameters[f'b{i}']
        A = relu(Z.T)
        cache[f"Z{i}"], cache[f"A{i}"] = Z, A
    Z = np.dot(parameters[f'W{len(layers)-1}'], A.T) + parameters[f'b{len(layers)-1}']
    A = sigmoid(Z.T)
    cache[f"Z{len(layers)-1}"], cache[f"A{len(layers)-1}"] = Z, A
    return A, cache

In [6]:


def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss

In [7]:


def back_propagation(y, parameters, cache, layers):
    grads = {}
    m = y.shape[0]
    L = len(layers) - 1
    
    if len(y.shape) == 1 or y.shape[1] == 1:
        y = to_one_hot(y, layers[-1])
    
    A = cache[f"A{L}"]
    dZ = A - y
    
    for i in reversed(range(1, L+1)):
        dW = (1/m) * np.dot(dZ.T, cache[f"A{i-1}"])
        db = (1/m) * np.sum(dZ, axis=0).reshape(-1, 1)
        
        grads[f"dW{i}"], grads[f"db{i}"] = dW, db
        
        if i > 1:
            dA = np.dot(dZ, parameters[f"W{i}"])
            dZ = dA * relu_derivative(cache[f"Z{i-1}"].T)
    
    return grads

In [8]:


def update_parameters(parameters, grads, learning_rate):
    """
    Update network parameters using gradients
    """
    for i in range(1, len(parameters) // 2 + 1):
        parameters[f"W{i}"] -= learning_rate * grads[f"dW{i}"]
        parameters[f"b{i}"] -= learning_rate * grads[f"db{i}"]
    return parameters

In [9]:
 #Cell 5: Training Function
def train(X, y, layers, learning_rate=0.1, epochs=1000):
    parameters = initialize_parameters(layers)
    for epoch in range(epochs):
        y_pred, cache = forward_propagation(X, parameters, layers)
        loss = compute_loss(y, y_pred)
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")
        grads = back_propagation(y, parameters, cache, layers)
        parameters = update_parameters(parameters, grads, learning_rate)
    return parameters

In [10]:

#

# Cell 6: Prediction Function
def predict(X, parameters, layers):
    y_pred, _ = forward_propagation(X, parameters, layers)
    return y_pred > 0.5

# Cell 7: Example Usage

In [11]:


# Define example data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Shape: (4, 2)
y = np.array([[0], [1], [1], [1]])  # Shape: (4, 1)

# Define and train the network
layers = [2, 3, 4, 4, 1]
parameters = train(X, y, layers)

# Make predictions
predictions = predict(X, parameters, layers)
print("Predictions:", predictions)

Epoch 0, Loss: 0.6931471537880713
Epoch 100, Loss: 0.5641017845788526
Epoch 200, Loss: 0.5623718511098156
Epoch 300, Loss: 0.5623359302024674
Epoch 400, Loss: 0.5623351338422893
Epoch 500, Loss: 0.5623351158129316
Epoch 600, Loss: 0.562335115473
Epoch 700, Loss: 0.562335115207304
Epoch 800, Loss: 0.5623351149941209
Epoch 900, Loss: 0.5623351150504285
Predictions: [[ True]
 [ True]
 [ True]
 [ True]]
