A basic neural network implementation for illustration. There are sophisticated algorithms like PyTorch, TensorFlow, etc. available out there, where you can just import a module and design a model with layers and nodes and activation functions, loss and accuracy metrics, etc. Here, we are just trying to understand how it works.

This code implements just one input layer, one hidden layer and one output layer to perform binary classification using the sigmoid activation function.

In [None]:
# start with importing the required library, NumPy
import numpy as np

In [None]:
# assign input values consisting of four samples with two features each
input_value = np.array([[0,0], [0,1], [1,1], [1,0]])
print(input_value.shape)
print(input_value)

(4, 2)
[[0 0]
 [0 1]
 [1 1]
 [1 0]]


In [None]:
# assign output values and reshape it to a column vector
output = np.array([0,1,1,0])
output = output.reshape(4,1)
output.shape

(4, 1)

In [None]:
# assign weights
# initial weights are randomly assigned and consist of 2 rows (1 for each input feature)
# it consists of only one column because there's only one neuron in the hidden layer
weights = np.array([[0.1], [0.2]])
print(weights)
print(weights.shape)

[[0.1]
 [0.2]]
(2, 1)


In [None]:
# add bias
# the bias is initialized to 0.3
bias = 0.3

In [None]:
# activation function
# it squashes the weighted sum of inputs to a value between 0 and 1
def sigmoid_func(x):
  return 1/(1 + np.exp(-x))

In [None]:
# derivative of sigmoid function
# it is used in the gradient descent algorith during backpropagation
def der(x):
  return sigmoid_func(x) * (1 - sigmoid_func(x))

In [None]:
# import numpy as np

# # assign input values
# input_value = np.array([[0,0], [0,1], [1,1], [1,0]])

# # assign output values
# output = np.array([0,1,1,0])
# output = output.reshape(4,1)

# # assign weights
# weights = np.array([[0.1], [0.2]])

# # add bias
# bias = 0.3

# # activation function
# def sigmoid_func(x):
#   return 1/(1 + np.exp(-x))

# # derivative of sigmoid function
# def der(x):
#   return sigmoid_func(x) * (1 - sigmoid_func(x))

# updating weights
for epochs in range(10000): # repeat the training process 10000 times
  input_arr = input_value

  # get the dot product of features and weights and add a bias term
  weighted_sum = np.dot(input_arr, weights) + bias
  # this results in a first output of the hidden layer, which is the first feed forward
  first_output = sigmoid_func(weighted_sum)

  # compare the output with the original output and compute the error
  error = first_output - output
  # total error computed as a mean-squared error
  total_error = np.square(np.subtract(first_output, output)).mean()

  # compute the derivative to incorporate in the gradient descent algorithm
  # the first derivative refers to the derivative of the loss function with
  # respect to the output of the network. this provides us with the adjustment
  # needed to reduce the loss. therefore, 'error' is used to represent this
  # first derivative, which tells us how much the output needs to change to
  # minimize the loss
  first_der = error
  # the second derivative is the derivative of the activation/sigmoid function
  # with respect to its input. it is used to compute the rate of change of output
  # with respect to the input.
  second_der = der(first_output)
  # multiply first and second derivatives for full derivative for each output.
  # Multiplying the first and second derivatives together is a step in the
  # backpropagation algorithm, specifically during the calculation of the
  # gradient of the loss function with respect to the weights of the network.
  # This operation is part of the chain rule, which allows us to compute the
  # gradient of the loss function with respect to the weights by propagating
  # the error backwards through the network.
  derivative = first_der * second_der

  # transpose of the input values to change it from (4,2) to (2,4)
  t_input = input_value.T
  # final derivative is the dot product of the input values
  final_derivative = np.dot(t_input, derivative)

  # update weights
  # weights are updated by adding/subtracting the original weight with the product
  # of learning rate and the derivative of weight function
  weights = weights - 0.05 * final_derivative

  # update bias
  for i in derivative:
    bias = bias - 0.05 * i

print(weights)
print(bias)

# # prediction
# # check the prediction for one of the input values [0,1]
# pred = np.array([0, 1])
# # keep checking for other values as well
# # in two cases, it will result values very close to 1 and in the other two,
# # values will be very close to zero.
# result = np.dot(pred, weights) + bias
# res = sigmoid_func(result)
# print(res)

[[-0.41953547]
 [ 8.98887811]]
[-4.19706344]


In [None]:
# prediction
# check the prediction for one of the input values [0,1]
pred = np.array([0, 1])
# keep checking for other values as well
# in two cases, it will result values very close to 1 and in the other two,
# values will be very close to zero.
result = np.dot(pred, weights) + bias
res = sigmoid_func(result)
print(res)

[0.99177089]
