In [1]:
import numpy as np

# Sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)


epochs = 10000
lr = 0.1
inputLayerNeurons, hiddenLayerNeurons, outputLayerNeurons = 2, 2, 1

In [2]:
# Input datasets
inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
expected_output = np.array([[0],[1],[1],[0]])
inputs, expected_output

(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([[0],
        [1],
        [1],
        [0]]))

In [103]:
# Initializing weights and biases (w/ a random seed)
np.random.seed(314)
hidden_weights = np.random.uniform(size=(inputLayerNeurons,hiddenLayerNeurons))
hidden_bias =np.random.uniform(size=(1,hiddenLayerNeurons))
output_weights = np.random.uniform(size=(hiddenLayerNeurons,outputLayerNeurons))
output_bias = np.random.uniform(size=(1,outputLayerNeurons))
hidden_weights, hidden_bias, output_weights, output_bias


(array([[0.91687358, 0.58854191],
        [0.26504775, 0.78320538]]),
 array([[0.91800106, 0.82735501]]),
 array([[0.72795148],
        [0.26048042]]),
 array([[0.9117634]]))

In [528]:
# Training algorithm
# for _ in range(epochs):
# Forward Propagation
print(inputs, '\n', hidden_weights)
hidden_layer_activation = inputs @ hidden_weights # size = (4,2) * (2,2) = (4,2)
print(hidden_layer_activation)
hidden_layer_activation += hidden_bias
hidden_layer_output = sigmoid(hidden_layer_activation)

output_layer_activation = hidden_layer_output @ output_weights # size = (4,2) * (2,1) = (4,1)
output_layer_activation += output_bias
predicted_output = sigmoid(output_layer_activation)
# if _ % 1000 == 0:
    # print(f'{predicted_output:.2f}')

# Backpropagation -> d_L/d_weights = d_Loss/d_activation_output * d_activation_output/d_activation_input * d_activation_input/d_weights
error = expected_output - predicted_output
print(f'err: {error.sum():.4f}')
d_predicted_output = error * sigmoid_derivative(predicted_output)

# error_hidden_layer = d_predicted_output.dot(output_weights.T)
error_hidden_layer = d_predicted_output @ output_weights.T
d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)

# Updating Weights and Biases
output_weights += lr * (hidden_layer_output.T @ d_predicted_output)

output_bias += np.sum(d_predicted_output,axis=0, keepdims=True) * lr

hidden_weights += lr * (inputs.T @ d_hidden_layer)

hidden_bias += np.sum(d_hidden_layer,axis=0, keepdims=True) * lr

[[0 0]
 [0 1]
 [1 0]
 [1 1]] 
 [[0.91919086 0.50688659]
 [0.31820489 0.72428374]]
[[0.         0.        ]
 [0.31820489 0.72428374]
 [0.91919086 0.50688659]
 [1.23739575 1.23117033]]
err: -0.0109


In [5]:
# Print final weights for curiosity
print("Final hidden weights: ",hidden_weights)
print("Final hidden bias: ",hidden_bias)
print("Final output weights: ",output_weights)
print("Final output bias: ",output_bias)


Final hidden weights:  [[0.85230067 0.51454222]
 [0.6075769  0.71734763]]
Final hidden bias:  [[0.04617299 0.25712778]]
Final output weights:  [[0.86275212]
 [0.60983244]]
Final output bias:  [[0.0628867]]
