This Feed Forward Neural Network was written just for the sake of comparison for the Perceptron Post, and it may be removed or replaced in some other repository for better file structure of this Repo.

Imports

In [11]:
import numpy as np

Defining Functions:

In [12]:
def relu(x):
    return np.maximum(0, x)

def init_wt():
    return np.random.rand()

def drelu(x):
    return 1 if x > 0 else 0

def mean_squared_error(predicted, actual):
    return 0.5 * np.power(predicted - actual, 2)

def shuffle(arr):
    np.random.shuffle(arr)

def evaluate_model(hidden_weights, hidden_layer_bias, output_weights, output_layer_bias, inputs, targets):
    nHiddenNodes = hidden_weights.shape[1]
    nOutNodes = output_weights.shape[1]
    total_loss = 0.0

    for i in range(len(inputs)):
        # Forward pass
        hidden_layer = np.zeros(nHiddenNodes)
        output_layer = np.zeros(nOutNodes)

        for j in range(nHiddenNodes):
            activation = hidden_layer_bias[j]
            for k in range(inputs.shape[1]):
                activation += inputs[i][k] * hidden_weights[k][j]
            hidden_layer[j] = relu(activation)

        for j in range(nOutNodes):
            activation = output_layer_bias[j]
            for k in range(nHiddenNodes):
                activation += hidden_layer[k] * output_weights[k][j]
            output_layer[j] = relu(activation)

        loss = mean_squared_error(output_layer[0], targets[i][0])
        total_loss += loss

    average_loss = total_loss / len(inputs)
    return average_loss

Defining Training Arguments

In [14]:
nInp = 2
nHiddenNodes = 2
nOutNodes = 1
nTrainingSet = 4
learning_rate = 0.001
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
hidden_layer = np.zeros(nHiddenNodes)
output_layer = np.zeros(nOutNodes)

hidden_layer_bias = np.zeros(nHiddenNodes)
output_layer_bias = np.zeros(nOutNodes)

hidden_weights = np.random.rand(nInp, nHiddenNodes)
output_weights = np.random.rand(nHiddenNodes, nOutNodes)

m_output = np.zeros((nHiddenNodes, nOutNodes))
v_output = np.zeros((nHiddenNodes, nOutNodes))

m_hidden = np.zeros((nInp, nHiddenNodes))
v_hidden = np.zeros((nInp, nHiddenNodes))


Trainign Inputs (OR Gate truth table)

In [24]:
training_inputs = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
training_outputs = np.array([[0.0], [1.0], [1.0], [1.0]])

trainingSetOrder = np.array([0, 1, 2, 3])
numEpochs = 100

Defining, training and evaluating the model

In [25]:
for epoch in range(numEpochs):
    shuffle(trainingSetOrder)
    total_loss = 0.0

    for x in range(nTrainingSet):
        i = trainingSetOrder[x]

        # Forward pass
        for j in range(nHiddenNodes):
            activation = hidden_layer_bias[j]
            for k in range(nInp):
                activation += training_inputs[i][k] * hidden_weights[k][j]
            hidden_layer[j] = relu(activation)

        for j in range(nOutNodes):
            activation = output_layer_bias[j]
            for k in range(nHiddenNodes):
                activation += hidden_layer[k] * output_weights[k][j]
            output_layer[j] = relu(activation)

        loss = mean_squared_error(output_layer[0], training_outputs[i][0])
        total_loss += loss

        # Backpropagation
        deltaOutput = np.zeros(nOutNodes)
        for j in range(nOutNodes):
            error = training_outputs[i][j] - output_layer[j]
            deltaOutput[j] = error * drelu(output_layer[j])

        deltaHidden = np.zeros(nHiddenNodes)
        for j in range(nHiddenNodes):
            error = 0.0
            for k in range(nOutNodes):
                error += deltaOutput[k] * output_weights[j][k]
            deltaHidden[j] = error * drelu(hidden_layer[j])

        for j in range(nOutNodes):
            for k in range(nHiddenNodes):
                m_output[k][j] = beta1 * m_output[k][j] + (1 - beta1) * deltaOutput[j] * hidden_layer[k]
                v_output[k][j] = beta2 * v_output[k][j] + (1 - beta2) * deltaOutput[j] * deltaOutput[j]
                output_weights[k][j] += learning_rate * m_output[k][j] / (np.sqrt(v_output[k][j]) + epsilon)
            output_layer_bias[j] += learning_rate * deltaOutput[j]

        for j in range(nHiddenNodes):
            for k in range(nInp):
                m_hidden[k][j] = beta1 * m_hidden[k][j] + (1 - beta1) * deltaHidden[j] * training_inputs[i][k]
                v_hidden[k][j] = beta2 * v_hidden[k][j] + (1 - beta2) * deltaHidden[j] * deltaHidden[j]
                hidden_weights[k][j] += learning_rate * m_hidden[k][j] / (np.sqrt(v_hidden[k][j]) + epsilon)
            hidden_layer_bias[j] += learning_rate * deltaHidden[j]

    if (epoch + 1) % 1000 == 0:
        print(f"Epoch {epoch + 1}, Average Loss: {total_loss / nTrainingSet}")

# Evaluate the model
average_loss = evaluate_model(hidden_weights, hidden_layer_bias, output_weights, output_layer_bias, training_inputs, training_outputs)
print(f"Final Average Loss on Training Data: {average_loss}")

Final Average Loss on Training Data: 0.033696591561024426
