In [20]:
import numpy as np
import matplotlib.pyplot as plt

In [21]:
class NeuralNetwork:
  def __init__(self, neurons_per_layers, inputs, num_epochs):
    self.num_layers = len(neurons_per_layers)
    self.inputs = inputs # shape: (batch_size, num_features)
    self.current_input = inputs
    self.layers = []
    self.outputs = None
    self.dZ_prev = None
    self.learning_rate = 0.01
    self.num_epochs = num_epochs

    for i in range(self.num_layers):
      num_features = inputs.shape[1] if i == 0 else neurons_per_layers[i-1][0]
      newLayer = Layer(neurons_per_layers[i][0], num_features, neurons_per_layers[i][1])
      self.layers.append(newLayer)

    self.loss = None
    pass

  def forward_pass(self):
    self.current_input = self.inputs
    for i in range(self.num_layers):
      is_first_layer = i == 0
      is_last_layer = i == self.num_layers - 1
      self.current_input = self.layers[i].calculate_output(self.current_input, is_first_layer, is_last_layer)

    self.outputs = self.current_input
    return self.outputs


  def back_propagate(self, truth):
    self.dZ_prev = self.outputs
    for i in range(self.num_layers - 1, -1, -1):
      is_first_layer = i == 0
      is_last_layer = i == self.num_layers - 1

      self.dZ_prev = self.layers[i].loss_function(self.dZ_prev, truth, self.learning_rate, is_last_layer)

  def train(self, truth):
    for epoch in range(self.num_epochs):
      self.forward_pass()
      self.calculate_loss(truth)

      if self.loss < 0.0001:
        break

      self.back_propagate(truth)

      if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {self.loss}")

  def calculate_loss(self, truth):
    self.loss = np.mean((self.outputs - truth) ** 2)
    return self.loss


  def summary(self):
    for i, layer in enumerate(self.layers):
        print(f"Layer {i+1}: {layer.num_neurons} neurons, activation: {layer.activation}")


class Layer:
  def __init__(self, num_neurons, num_features, activation_function):
    self.num_neurons = num_neurons
    self.weights = np.random.rand(num_features, num_neurons)
    self.biases = np.zeros((1, num_neurons))
    if isinstance(activation_function, str):
      self.activation = activation_function
    else:
      self.activation = "sigmoid"

    self.loss = None
    self.inputs = None
    self.unactivated_outputs = None
    self.outputs = None
    self.dW = None
    self.db = None
    pass

  def calculate_output(self, input, is_first_layer = False, is_last_layer = False):
    self.inputs = input
    self.unactivated_outputs = np.dot(input, self.weights) + self.biases
    self.outputs = self.activation_function(self.unactivated_outputs, self.activation, is_first_layer, is_last_layer)
    return self.outputs

  def loss_function(self, dZ_prev, truth, lr, is_last_layer = False):
    dA = None

    if is_last_layer:
      dA = 2 * (self.outputs - truth) / self.outputs.shape[0]
    else:
      dA = dZ_prev

    dZ = dA * self.activation_function_derivative(self.activation, is_last_layer)
    self.dW = np.dot(self.inputs.T, dZ)
    self.db = np.sum(dZ, axis=0, keepdims=True)
    self.update_weights(lr)

    dZ = np.dot(dZ, self.weights.T)
    return dZ

  def update_weights(self, learning_rate):
    self.weights -= learning_rate * self.dW
    self.biases -= learning_rate * self.db

  def activation_function_derivative(self, activation_function, is_last_layer):
    if activation_function == "sigmoid":
        return self.outputs * (1 - self.outputs)
    elif activation_function == "relu":
        return np.where(self.unactivated_outputs > 0, 1, 0)
    elif activation_function == "tanh":
        return 1 - self.outputs ** 2
    else:
        return self.outputs

  def activation_function(self, result, activation_function, is_first_layer, is_last_layer):
    if activation_function == "sigmoid":
        return self.sigmoid(result)
    elif activation_function == "relu":
        return self.relu(result)
    elif activation_function == "tanh":
        return self.tanh(result)
    else:
        return result

  def sigmoid(self, result):
    return 1 / (1 + np.exp(-result))

  def relu(self, result):
    return np.maximum(0, result)

  def tanh(self, result):
    return np.tanh(result)


**For Back Propagation Formula**

dA = 2 * (A - Y) / m               

dA (apart from output layer) = (dZ(prev) @ W.T(prev))            

dZ = dA * A * (1 - A)              # ∂L/∂Z (sigmoid derivative)

dW = X.T @ dZ                      # ∂L/∂W

db = dZ.sum(axis=0, keepdims=True)


Where:

X: input matrix, shape (m × n) — m examples, n features

W: weight matrix, shape (n × k) — maps n inputs to k outputs (e.g., neurons)

b: bias vector, shape (1 × k) (broadcasted)

Z: linear output, shape (m × k)

A: activated output, shape (m × k)

Y: ground truth targets, shape (m × k)

In [22]:
inputs = np.random.rand(3, 4)  # batch size = 3, input features = 4
neurons_per_layers = [
    (5, "relu"),   # hidden layer: 5 neurons with ReLU
    (2, "sigmoid") # output layer: 2 neurons with sigmoid
]

targets = np.random.rand(3, 2) # batch size = 3, output features = 2

nn = NeuralNetwork(neurons_per_layers, inputs, 10000)

print("Before Training:")
initial_output = nn.forward_pass()
print("Initial Output:\n", initial_output)
print("Initial Loss:", nn.calculate_loss(targets))

# Train the network
nn.train(targets)

print("\nAfter Training:")
print("Final Loss:", nn.loss)
final_output = nn.forward_pass()
print("Final Output:\n", final_output)
print("Target Output:\n", targets)
print("Difference:\n", np.abs(final_output - targets))

print("\nNetwork Summary:")
nn.summary()

print("\nFinal Weights:")
for i, layer in enumerate(nn.layers):
    print(f"Layer {i+1} Weights Shape:", layer.weights.shape)
    print(f"Layer {i+1} Weights:\n", layer.weights)
    print(f"Layer {i+1} Biases:\n", layer.biases)
    print()

Before Training:
Initial Output:
 [[0.769309   0.90208931]
 [0.73379308 0.84585946]
 [0.80376747 0.90651676]]
Initial Loss: 0.15365358274675003
Epoch 0, Loss: 0.15365358274675003
Epoch 100, Loss: 0.12008242876999368
Epoch 200, Loss: 0.09723849173015238
Epoch 300, Loss: 0.0846215815520432
Epoch 400, Loss: 0.07771399185642236
Epoch 500, Loss: 0.07317147012552998
Epoch 600, Loss: 0.06945266708193804
Epoch 700, Loss: 0.06596708559891541
Epoch 800, Loss: 0.06250849359548154
Epoch 900, Loss: 0.05901581443791056
Epoch 1000, Loss: 0.05548406784143062
Epoch 1100, Loss: 0.05193183262903094
Epoch 1200, Loss: 0.04838861525720519
Epoch 1300, Loss: 0.045029149850032306
Epoch 1400, Loss: 0.04200836805420599
Epoch 1500, Loss: 0.039150212236878784
Epoch 1600, Loss: 0.03642842689688325
Epoch 1700, Loss: 0.03384637385812345
Epoch 1800, Loss: 0.03140469866268026
Epoch 1900, Loss: 0.02909399490649799
Epoch 2000, Loss: 0.026931034826421774
Epoch 2100, Loss: 0.02491007361411149
Epoch 2200, Loss: 0.0230365373