In [1]:
import numpy as np

# Neuron

In [2]:
def sigmoid(x):
    '''
    A function that will apply the sigmoid activation function.
    
    Parameters:
        - x (int, float): The sum of the dot product with the bias.
        
    Returns:
        - float: The output of the neuron.
    '''
    return 1 / (1 + np.exp(-x))

In [3]:
class Neuron:
    def __init__(self, weights, bias):
        self.weights = weights
        self.bias = bias
        
    def feed_forward(self, inputs):
        '''
        A function that will perform the process of passing inputs forward to get an output.
        
        Parameters:
            - inputs (int, float, list of ints / floats): Neuron's input.
            
        Returns:
            - float: The output of the neuron.
        '''
        result = np.dot(a = self.weights, b = inputs) + self.bias
        
        return sigmoid(x = result)

In [4]:
# w_1 = 0, w_2 = 1
weights = [0, 1]

# b = 4
bias = 4
neuron = Neuron(weights = weights, bias = bias)

In [5]:
# x_1 = 2, x_2 = 3
inputs = [2, 3]
neuron.feed_forward(inputs = inputs)

0.9990889488055994

# Neural Network

In [6]:
class Neural_Network():
    '''
    A neural network in the form of:
        - 2 input values
        - 1 hidden layer with 2 neurons (h1, h2)
        - An output layer with 1 neuron (o1)
        
    For simplicity, each neuron will have the same weights and bias as follows:
        - weights = [0, 1]
        - bias = 0
    '''
    def __init__(self):
        weights = [0, 1]
        bias = 0
        
        self.h1 = Neuron(weights = weights, bias = bias)
        self.h2 = Neuron(weights = weights, bias = bias)
        self.o1 = Neuron(weights = weights, bias = bias)
        
    def feed_forward(self, inputs):
        '''
        A function that will perform the process of passing inputs forward to get an output.
        
        Parameters:
            - inputs (list of ints / floats): Neural network's input.
            
        Returns:
            - float: The output of the neural network.
        '''
        output_h1 = self.h1.feed_forward(inputs = inputs)
        output_h2 = self.h2.feed_forward(inputs = inputs)
        
        # The inputs to the output layer are the outputs of the hidden layer.
        output_o1 = self.o1.feed_forward(inputs = [output_h1, output_h2])
        
        return output_o1

In [7]:
ann = Neural_Network()

In [8]:
inputs = [2, 3]
ann.feed_forward(inputs = inputs)

0.7216325609518421

In [9]:
def mse_loss(y_true, y_pred):
    '''
    A function that will calculate the loss in the form of mean squared error (MSE).
    
    Parameters:
        - y_true (numpy array): The actual values of the target variable.
        - y_pred (numpy array): The predicted values of the target variable.
        
    Returns:
        - int / float: The loss.
    '''
    return ((y_pred - y_true) ** 2).mean()

In [10]:
y_true = np.array([1, 0, 0, 1])
y_pred = np.array([0, 0, 0, 0])
mse_loss(y_true = y_true, y_pred = y_pred)

0.5

# Complete Artificial Neural Network

In [11]:
def sigmoid_derivative(x):
    '''
    A function that will apply the derivative of the sigmoid activation function.

    Parameters:
        - x (int, float): The sum of the dot product with the bias.
        
    Returns:
        - float.
    '''
    fx = sigmoid(x = x)
    
    return fx * (1 - fx)

In [25]:
class C_Neural_Network():
    '''
    *** DISCLAIMER *** (As written by Victor Zhou):
    The code below is intended to be simple and educational, NOT optimal.
    Real neural network code looks nothing like this. DO NOT use this code.
    Instead, read/run it to understand how this specific neural network works.

    A neural network in the form of:
        - 2 input values
        - 1 hidden layer with 2 neurons (h1, h2)
        - An output layer with 1 neuron (o1)
    '''
    def __init__(self):
        # Weights - The neural network's weights are first set randomly.
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()
        self.w4 = np.random.normal()
        self.w5 = np.random.normal()
        self.w6 = np.random.normal()

        # Biases - The neural network's biases are first set randomly.
        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
        self.b3 = np.random.normal()

    def feed_forward(self, inputs):
        '''
        A function that will perform the process of passing inputs forward to get an output.
        
        Parameters:
            - inputs (numpy array of ints / floats): Neural network's input, which is the weight and height in our case, and thus
            the input will be a numpy array with 2 elements.

        Returns:
            - float: The output of the neural network.
        '''
        h1 = sigmoid(x = self.w1 * inputs[0] + self.w2 * inputs[1] + self.b1)
        h2 = sigmoid(x = self.w3 * inputs[0] + self.w4 * inputs[1] + self.b2)
        
        # The inputs to the output layer are the outputs of the hidden layer.
        o1 = sigmoid(x = self.w5 * h1 + self.w6 * h2 + self.b3)

        return o1

    def training(self, data, y_trues):
        '''
        A function that will perform the training process of the neural network.
        
        Parameters:
            - data (numpy array of ints / floats): Neural network's input, which is the weight and height in our case, and thus
            the input will be a numpy array with 2 elements.
            - y_trues (numpy array): The actual values of the target variable, which is the gender in our case, and thus the y_trues 
            will be a numpy array consists of 0 or 1.
        '''
        # Common values for a learning rate are usually in the range of 0.01 to 0.1.
        learning_rate = 0.01
        
        # Iterations on the entire dataset, when one epoch means an entire dataset is passed forward and backward through the neural network 
        # only once. We are using a limited dataset and to optimize the weights we are using gradient descent which is an iterative process. 
        # So, updating the weights with a single epoch will not be enough.
        epochs = 1000

        for epoch in range(1, epochs + 1):
            for y_true in y_trues:
                # For convenience, the sum of the dot product with the bias of h1, h2, and o1 will be calculated now, as we will have to use 
                # these values during the partial derivatives calculation.
                sum_h1 = self.w1 * inputs[0] + self.w2 * inputs[1] + self.b1
                h1 = sigmoid(x = sum_h1)

                sum_h2 = self.w3 * inputs[0] + self.w4 * inputs[1] + self.b2
                h2 = sigmoid(x = sum_h2)
        
                # The inputs to the output layer are the outputs of the hidden layer.
                sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
                o1 = sigmoid(x = sum_o1)
                # o1 is assigned to y_pred, as he is the last neuron, and hence it will be the neural network’s prediction.
                y_pred = o1

                # Partial derivatives calculation, when dX_dZ corresponds to the derivative of X with respect to Z.
                dL_dypred = -2 * (y_true - y_pred)

                # h1, h2 -> o1.
                dypred_dw5 = h1 * sigmoid_derivative(x = sum_o1)
                dypred_dw6 = h2 * sigmoid_derivative(x = sum_o1)
                dypred_db3 = 1 * sigmoid_derivative(x = sum_o1)

                dypred_dh1 = self.w5 * sigmoid_derivative(x = sum_o1)
                dypred_dh2 = self.w6 * sigmoid_derivative(x = sum_o1)

                # x1, x2 -> h1.
                dh1_dw1 = inputs[0] * sigmoid_derivative(x = sum_h1)
                dh1_dw2 = inputs[1] * sigmoid_derivative(x = sum_h1)
                dh1_db1 = 1 * sigmoid_derivative(x = sum_h1)

                # x1, x2 -> h2.
                dh2_dw3 = inputs[0] * sigmoid_derivative(x = sum_h2)
                dh2_dw4 = inputs[1] * sigmoid_derivative(x = sum_h2)
                dh2_db2 = 1 * sigmoid_derivative(x = sum_h2)

                # Weights and biases updation.
                # h1 related - The parameters which affect only h1.
                self.w1 -= learning_rate * dL_dypred * dypred_dh1 * dh1_dw1
                self.w2 -= learning_rate * dL_dypred * dypred_dh1 * dh1_dw2
                self.b1 -= learning_rate * dL_dypred * dypred_dh1 * dh1_db1

                # h2 related - The parameters which affect only h2.
                self.w3 -= learning_rate * dL_dypred * dypred_dh2 * dh2_dw3
                self.w4 -= learning_rate * dL_dypred * dypred_dh2 * dh2_dw4
                self.b2 -= learning_rate * dL_dypred * dypred_dh2 * dh2_db2

                # o1 related - The parameters which affect only o1.
                self.w5 -= learning_rate * dL_dypred * dypred_dw5
                self.w6 -= learning_rate * dL_dypred * dypred_dw6
                self.b3 -= learning_rate * dL_dypred * dypred_db3

            # A feedforward will be applied upon each of the input values at the end of each epoch, in order to calculate the loss of the 
            # neural network.
            y_preds = np.apply_along_axis(func1d = self.feed_forward, axis = 1, arr = data)
            loss = mse_loss(y_true = y_trues, y_pred = y_preds)
            print("Epoch %d/%d - loss: %.3f" %(epoch, epochs, loss))

In [13]:
data = np.array(object = [   
[-2, -1], # Alice
[25, 6],  # Bob
[17, 4],  # Charlie
[-15, -6] # Diana
])

y_trues = np.array(object = [
    1, # Alice
    0, # Bob
    0, # Charlie
    1  # Diana
    ])

In [28]:
c_ann = C_Neural_Network()
c_ann.training(data = data, y_trues = y_trues)

Epoch 1/1000 - loss: 0.463
Epoch 2/1000 - loss: 0.463
Epoch 3/1000 - loss: 0.463
Epoch 4/1000 - loss: 0.463
Epoch 5/1000 - loss: 0.462
Epoch 6/1000 - loss: 0.462
Epoch 7/1000 - loss: 0.462
Epoch 8/1000 - loss: 0.462
Epoch 9/1000 - loss: 0.462
Epoch 10/1000 - loss: 0.462
Epoch 11/1000 - loss: 0.462
Epoch 12/1000 - loss: 0.462
Epoch 13/1000 - loss: 0.462
Epoch 14/1000 - loss: 0.462
Epoch 15/1000 - loss: 0.461
Epoch 16/1000 - loss: 0.461
Epoch 17/1000 - loss: 0.461
Epoch 18/1000 - loss: 0.461
Epoch 19/1000 - loss: 0.461
Epoch 20/1000 - loss: 0.461
Epoch 21/1000 - loss: 0.461
Epoch 22/1000 - loss: 0.461
Epoch 23/1000 - loss: 0.461
Epoch 24/1000 - loss: 0.461
Epoch 25/1000 - loss: 0.460
Epoch 26/1000 - loss: 0.460
Epoch 27/1000 - loss: 0.460
Epoch 28/1000 - loss: 0.460
Epoch 29/1000 - loss: 0.460
Epoch 30/1000 - loss: 0.460
Epoch 31/1000 - loss: 0.460
Epoch 32/1000 - loss: 0.460
Epoch 33/1000 - loss: 0.460
Epoch 34/1000 - loss: 0.459
Epoch 35/1000 - loss: 0.459
Epoch 36/1000 - loss: 0.459
E

In [31]:
maya = np.array([-7, -3])
roy = np.array([20, 2])

# The neural network's prediction is that Maya is a Female by 75%.
print("Maya: %.3f" % c_ann.feed_forward(inputs = maya))

# The neural network's prediction is that Roy is a Male by 48%.
print("Roy: %.3f" % c_ann.feed_forward(inputs = roy))

Maya: 0.750
Roy: 0.486
