In [1]:
#import libraries
import numpy as np


In [13]:
# Define network parametres

# Network architecture parameters
input_size = 8      # Input layer has 8 nodes
hidden_size = 3     # Hidden layer has 3 nodes
output_size = 8     # Output layer has 8 nodes

# Learning rate
learning_rate = 0.01


In [15]:
# Initialize Weights and Biases


# Initialize weights with small random values
W1 = np.random.randn(hidden_size, input_size) * 0.01  # Weights between input and hidden layers
b1 = np.zeros((hidden_size, 1))                       # Bias for hidden layer

W2 = np.random.randn(output_size, hidden_size) * 0.01 # Weights between hidden and output layers
b2 = np.zeros((output_size, 1))                       # Bias for output layer


In [17]:
# Define Activation Functions
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))


We’ll use the sigmoid function and its derivative for both the hidden and output layers.

In [20]:
# Forward Propagation Function
def forward_propagation(X):
    # Hidden layer
    Z1 = np.dot(W1, X) + b1
    A1 = sigmoid(Z1)
    
    # Output layer
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    return Z1, A1, Z2, A2


This function computes the activations for the hidden and output layers.

In [23]:
# Backward Propagation Function

def backward_propagation(X, Y, Z1, A1, Z2, A2):
    global W1, b1, W2, b2  # Use global variables to modify the weights and biases

    # Compute the output layer error
    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T)
    db2 = np.sum(dZ2, axis=1, keepdims=True)
    
    # Backpropagate the error to the hidden layer
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * sigmoid_derivative(Z1)
    dW1 = np.dot(dZ1, X.T)
    db1 = np.sum(dZ1, axis=1, keepdims=True)
    
    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2


This function calculates the gradients for each weight and bias and performs a gradient descent update.

In [26]:
# Generate training data: each example has seven 0s and one 1
X_train = np.eye(input_size)  # Identity matrix gives each unit vector as a row
Y_train = X_train.copy()      # Outputs should match inputs


Create the 8 unique training examples and their corresponding labels (targets), which are the same as the inputs.

In [29]:
# Training Function 
def train(X_train, Y_train, epochs=10000):
    for epoch in range(epochs):
        cost = 0
        for X, Y in zip(X_train, Y_train):
            # Reshape inputs and targets for matrix operations
            X = X.reshape(-1, 1)
            Y = Y.reshape(-1, 1)
            
            # Forward propagation
            Z1, A1, Z2, A2 = forward_propagation(X)
            
            # Calculate cost for monitoring purposes
            cost += np.sum((A2 - Y) ** 2) / 2  # Mean squared error
            
            # Backward propagation
            backward_propagation(X, Y, Z1, A1, Z2, A2)

        # Print the cost at regular intervals for monitoring
        if epoch % 1000 == 0:
            print(f"Epoch {epoch}, Cost: {cost / len(X_train)}")


This function iteratively performs forward and backward propagation, updating weights at each epoch.

In [41]:
def evaluate(X_train):
    print("\nEvaluating the network...")
    for i, X in enumerate(X_train):
        X = X.reshape(-1, 1)
        Z1, A1, Z2, A2 = forward_propagation(X)
        
        # Normalize the predicted output by rounding to 0 or 1
        normalized_output = (A2 >= 0.5).astype(int)
        
        print(f"Input: {X.T}")
        print(f"Predicted Output (Raw): {A2.T}")
        print(f"Predicted Output (Normalized): {normalized_output.T}")
        print(f"Target Output: {X.T}")
        print(f"Hidden Layer Activations: {A1.T}\n")
        print("-" * 50)



After training, evaluate the network on each of the 8 training examples to see how well it has learned the reproducing function. 
This also prints hidden layer activations and weights for interpretation

In [44]:
# Print Learned Weights and Run the Training and Evaluation
# Train the network
train(X_train, Y_train, epochs=10000)

# Print the final learned weights and biases
print("\nFinal Weights and Biases After Training:")
print("Weights from Input to Hidden Layer (W1):\n", W1)
print("Biases for Hidden Layer (b1):\n", b1)
print("Weights from Hidden to Output Layer (W2):\n", W2)
print("Biases for Output Layer (b2):\n", b2)

# Evaluate the network on the training examples
evaluate(X_train)




Epoch 0, Cost: 0.01510437872077198
Epoch 1000, Cost: 0.01152731526745082
Epoch 2000, Cost: 0.00907415806764463
Epoch 3000, Cost: 0.007321861384216741
Epoch 4000, Cost: 0.006028337630612722
Epoch 5000, Cost: 0.005047199610655038
Epoch 6000, Cost: 0.00428587051750181
Epoch 7000, Cost: 0.0036835642060757084
Epoch 8000, Cost: 0.003199071657250611
Epoch 9000, Cost: 0.002803675603005892

Final Weights and Biases After Training:
Weights from Input to Hidden Layer (W1):
 [[ 0.7647384  -2.22105116 -1.51803946  4.48331713  3.66642252 -5.21270141
  -4.95597926  5.00951692]
 [-4.21187189 -4.4779626   4.72621822 -4.51647495  4.15408825 -1.68849364
   3.63925971  2.32394686]
 [-4.61789475  3.71197417 -4.30049594  2.13127103  5.54787365 -2.45835848
   2.66464629 -3.03556861]]
Biases for Hidden Layer (b1):
 [[ 0.0618744 ]
 [-0.06125757]
 [-0.35977608]]
Weights from Hidden to Output Layer (W2):
 [[  6.02106996  -9.5955226  -10.46963219]
 [ -7.49684207  -8.84409841   7.47732676]
 [ -6.98272192   9.10001