In [1]:
import numpy as np

# Seed for reproducibility
np.random.seed(42)

# Network architecture
input_neurons = 3
hidden_neurons = 2
output_neurons = 1

# Initialize weights and biases
W1 = np.random.randn(hidden_neurons, input_neurons) * 0.01  # Input to hidden
b1 = np.zeros((hidden_neurons, 1))                         # Bias for hidden layer

W2 = np.random.randn(output_neurons, hidden_neurons) * 0.01  # Hidden to output
b2 = np.zeros((output_neurons, 1))                           # Bias for output layer


In [10]:
print(f"Weights in the input - hidden network: \n  {W1} \n Bias in the input - hidden network: \n  {b1}  \n Weights in the hidden - output network: \n {W2} \n Bias in the hidden - output network: \n  {b2} ")

Weights in the input - hidden network: 
  [[ 0.00496714 -0.00138264  0.00647689]
 [ 0.0152303  -0.00234153 -0.00234137]] 
 Bias in the input - hidden network: 
  [[0.]
 [0.]]  
 Weights in the hidden - output network: 
 [[0.01579213 0.00767435]] 
 Bias in the hidden - output network: 
  [[0.]] 


In [11]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def forward_pass(X, W1, b1, W2, b2):
    # Hidden layer
    Z1 = np.dot(W1, X) + b1
    A1 = sigmoid(Z1)
    
    # Output layer
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache


In [13]:
def backward_pass(X, Y, cache, W1, W2):
    """
    Performs the backpropagation step to compute gradients of the loss function
    with respect to weights and biases.

    Parameters:
    - X: Input data (shape: input_neurons x number_of_examples)
    - Y: True labels (shape: output_neurons x number_of_examples)
    - cache: Dictionary containing intermediate values from the forward pass (A1, A2, Z1)
    - W1: Weights for input-to-hidden layer
    - W2: Weights for hidden-to-output layer

    Returns:
    - gradients: Dictionary containing gradients for W1, b1, W2, b2
    """

    m = X.shape[1]  # Number of examples in the batch
    A1, A2 = cache["A1"], cache["A2"]  # Activations from hidden and output layers
    Z1 = cache["Z1"]  # Linear output from the hidden layer before activation

    # Gradients for the output layer
    dZ2 = A2 - Y  # Derivative of the loss with respect to Z2 (output layer pre-activation)
    dW2 = np.dot(dZ2, A1.T) / m  # Gradient of loss with respect to W2
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m  # Gradient of loss with respect to b2

    # Gradients for the hidden layer
    dZ1 = np.dot(W2.T, dZ2) * A1 * (1 - A1)  # Backpropagate the error to the hidden layer
    dW1 = np.dot(dZ1, X.T) / m  # Gradient of loss with respect to W1
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m  # Gradient of loss with respect to b1

    # Store the gradients in a dictionary
    gradients = {
        "dW1": dW1,  # Gradient for W1
        "db1": db1,  # Gradient for b1
        "dW2": dW2,  # Gradient for W2
        "db2": db2   # Gradient for b2
    }

    return gradients

In [14]:
def update_parameters(W1, b1, W2, b2, gradients, learning_rate):
    W1 -= learning_rate * gradients["dW1"]
    b1 -= learning_rate * gradients["db1"]
    W2 -= learning_rate * gradients["dW2"]
    b2 -= learning_rate * gradients["db2"]
    return W1, b1, W2, b2


In [None]:
def train(X, Y, W1, b1, W2, b2, iterations, learning_rate):
    for i in range(iterations):
        # Forward pass
        A2, cache = forward_pass(X, W1, b1, W2, b2)
        
        # Compute loss
        loss = -np.mean(Y * np.log(A2) + (1 - Y) * np.log(1 - A2))
        if i % 100 == 0:
            print(f"Iteration {i}, Loss: {loss}")
        
        # Backward pass
        gradients = backward_pass(X, Y, cache, W1, W2)
        
        # Update parameters
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, gradients, learning_rate)
    
    return W1, b1, W2, b2


In [15]:
import numpy as np

def sigmoid(z):
    """Sigmoid activation function."""
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    """Derivative of sigmoid function."""
    return a * (1 - a)

def neural_network(X, Y, input_neurons, hidden_neurons, output_neurons, iterations, learning_rate):
    """
    Implements a complete neural network with one hidden layer from scratch.
    
    Parameters:
    - X: Input data (shape: input_neurons x number_of_examples)
    - Y: True labels (shape: output_neurons x number_of_examples)
    - input_neurons: Number of input features
    - hidden_neurons: Number of neurons in the hidden layer
    - output_neurons: Number of output neurons
    - iterations: Number of training iterations
    - learning_rate: Learning rate for gradient descent
    
    Returns:
    - predictions: Final predictions after training
    """
    # Initialize weights and biases
    np.random.seed(42)  # For reproducibility
    W1 = np.random.randn(hidden_neurons, input_neurons) * 0.01
    b1 = np.zeros((hidden_neurons, 1))
    W2 = np.random.randn(output_neurons, hidden_neurons) * 0.01
    b2 = np.zeros((output_neurons, 1))
    
    # Training loop
    for i in range(iterations):
        # ======= Forward Pass =======
        Z1 = np.dot(W1, X) + b1  # Linear transformation for hidden layer
        A1 = sigmoid(Z1)         # Activation for hidden layer
        
        Z2 = np.dot(W2, A1) + b2  # Linear transformation for output layer
        A2 = sigmoid(Z2)          # Activation for output layer (final prediction)
        
        # ======= Compute Loss =======
        m = X.shape[1]  # Number of examples
        loss = -np.mean(Y * np.log(A2) + (1 - Y) * np.log(1 - A2))  # Binary cross-entropy loss
        if i % 100 == 0:  # Print loss every 100 iterations
            print(f"Iteration {i}, Loss: {loss}")
        
        # ======= Backpropagation =======
        dZ2 = A2 - Y  # Derivative of loss with respect to Z2
        dW2 = np.dot(dZ2, A1.T) / m  # Gradient for W2
        db2 = np.sum(dZ2, axis=1, keepdims=True) / m  # Gradient for b2
        
        dZ1 = np.dot(W2.T, dZ2) * sigmoid_derivative(A1)  # Derivative of loss with respect to Z1
        dW1 = np.dot(dZ1, X.T) / m  # Gradient for W1
        db1 = np.sum(dZ1, axis=1, keepdims=True) / m  # Gradient for b1
        
        # ======= Update Parameters =======
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
    
    # ======= Make Predictions =======
    Z1 = np.dot(W1, X) + b1  # Final forward pass for predictions
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2, A1) + b2
    predictions = sigmoid(Z2)  # Final predictions
    
    return predictions


In [40]:
# Example input data (3 features, 4 examples)
X = np.array([[0, 1, 1, 0], [1, 1, 0, 0], [0, 0, 1, 1]])
# Example labels (binary output)
Y = np.array([[0, 1, 1, 0]])

# Neural network parameters
input_neurons = 3
hidden_neurons = 2
output_neurons = 1
iterations = 1000
learning_rate = 0.5

# Train the neural network
predictions = neural_network(X, Y, input_neurons, hidden_neurons, output_neurons, iterations, learning_rate)


# Output final predictions
print("Final Predictions:", predictions)

# Let us test the models acuracy.
predicted_classes = (predictions > 0.5).astype(int)  # Convert probabilities to binary predictions
accuracy = np.mean(predicted_classes == Y) * 100
print(accuracy)



Iteration 0, Loss: 0.6931522695910078
Iteration 100, Loss: 0.6912882521817083
Iteration 200, Loss: 0.5411508721854664
Iteration 300, Loss: 0.12652797324040055
Iteration 400, Loss: 0.05122404947767995
Iteration 500, Loss: 0.030458245616755843
Iteration 600, Loss: 0.021333893293188477
Iteration 700, Loss: 0.016305106985533308
Iteration 800, Loss: 0.01314784354605019
Iteration 900, Loss: 0.010991588515036073
Final Predictions: [[0.01197406 0.99320957 0.99320977 0.01197433]]
100.0
