In [1]:
import numpy as np

# Activation Class

In [2]:
class Activation:
    def __init__(self, type):
        self.type = type

    def forward(self, inputs):
        self.inputs = inputs
        if self.type == "relu":
            self.output = np.maximum(0, inputs)
        elif self.type == "sigmoid":
            self.output = 1 / (1 + np.exp(-inputs))
        elif self.type == "linear":
            self.output = inputs
        elif self.type == "tanh":
            self.output = np.tanh(inputs)
        else:
            raise ValueError(f"Invalid activation function type: {self.type}")
        return self.output

    def derivative(self):
        if self.type == "relu":
            return np.where(self.inputs > 0, 1, 0)
        elif self.type == "sigmoid":
            return self.output * (1 - self.output)
        elif self.type == "linear":
            return np.ones_like(self.inputs)
        elif self.type == "tanh":
            return 1 - np.power(self.output, 2)
        else:
            raise ValueError(f"No derivative implemented for activation function type: {self.type}")


# Parameters Class

In [3]:
class Parameters:
    def __init__(self, input_size, num_neurons):
        self.weights = np.random.randn(input_size, num_neurons) * 0.1
        self.bias = np.random.randn(1, num_neurons) * 0.1

    def get_weights(self):
        return self.weights

    def get_bias(self):
        return self.bias

# Neuron Class

In [4]:
class Neuron:
    def __init__(self, input_size):
        self.params = Parameters(input_size, 1)  # Each neuron has one output

    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.params.get_weights()) + self.params.get_bias()
        return self.output

    def backward(self, d_output):
        # Reshape d_output if necessary to ensure it's 2D (batch_size, 1)
        if d_output.ndim == 1:
            d_output = d_output[:, np.newaxis]

        # Now d_output is properly shaped for the dot product
        self.d_weights = np.dot(self.inputs.T, d_output)
        self.d_bias = np.sum(d_output, axis=0, keepdims=True)
        self.d_inputs = np.dot(d_output, self.params.get_weights().T)

        # If d_inputs is 2D with a singleton second dimension, it can be reshaped for the next layer
        if self.d_inputs.shape[-1] == 1:
            self.d_inputs = self.d_inputs.reshape(self.inputs.shape)

        return self.d_inputs


# Layer Class

In [5]:
class Layer:
    def __init__(self, input_size, num_neurons, activation_type):
        self.neurons = [Neuron(input_size) for _ in range(num_neurons)]
        self.activation_fn = Activation(activation_type)

    def forward(self, inputs):
        self.inputs = inputs
        neuron_outputs = np.hstack([neuron.forward(inputs) for neuron in self.neurons])
        return self.activation_fn.forward(neuron_outputs)

    def backward(self, d_output):
        d_output_activation = self.activation_fn.derivative() * d_output
        # Ensure d_inputs is initialized as a float array to match gradient data types
        d_inputs = np.zeros_like(self.inputs, dtype=np.float64)
        for i, neuron in enumerate(self.neurons):
            neuron_d_inputs = neuron.backward(d_output_activation[:, i:i+1])
            d_inputs += neuron_d_inputs
        return d_inputs

# Backpropagation

In [6]:
class BackPropagation:
    @staticmethod
    def update_parameters(layers, learning_rate):
        for layer in layers:
            for neuron in layer.neurons:
                neuron.params.weights -= learning_rate * neuron.d_weights
                neuron.params.bias -= learning_rate * neuron.d_bias

# NeuralNetwork Class

In [7]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def backward(self, loss_gradient):
        for layer in reversed(self.layers):
            loss_gradient = layer.backward(loss_gradient)

    def train(self, inputs, targets, epochs, learning_rate):
        for epoch in range(epochs):
            outputs = self.forward(inputs)
            loss = LossFunction.mse(outputs, targets)
            print(f"Epoch {epoch+1}, Loss: {loss}")
            loss_gradient = LossFunction.mse_derivative(outputs, targets)
            self.backward(loss_gradient)
            BackPropagation.update_parameters(self.layers, learning_rate)

# Loss Function

In [8]:
class LossFunction:
    @staticmethod
    def mse(predicted, actual):
        return np.mean((predicted - actual) ** 2)

    @staticmethod
    def mse_derivative(predicted, actual):
        return 2 * (predicted - actual) / actual.size

In [17]:
nn = NeuralNetwork()
nn.add_layer(Layer(2, 3, "relu"))
nn.add_layer(Layer(3, 1, "tanh"))

inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
targets = np.array([[0], [1], [1], [0]])  

learning_rate = 0.01
epochs = 100
nn.train(inputs, targets, epochs, learning_rate)

Epoch 1, Loss: 0.5261628845479799
Epoch 2, Loss: 0.515145555931176
Epoch 3, Loss: 0.5045588129608033
Epoch 4, Loss: 0.49439068723076507
Epoch 5, Loss: 0.4846288642084503
Epoch 6, Loss: 0.4752607819185806
Epoch 7, Loss: 0.4662737207029771
Epoch 8, Loss: 0.4576548841302702
Epoch 9, Loss: 0.44939147126342605
Epoch 10, Loss: 0.44147074059992303
Epoch 11, Loss: 0.43388006608170404
Epoch 12, Loss: 0.4266069856321897
Epoch 13, Loss: 0.4196392427183613
Epoch 14, Loss: 0.4129648214599608
Epoch 15, Loss: 0.40657197581783194
Epoch 16, Loss: 0.4004492533918674
Epoch 17, Loss: 0.3945855143482019
Epoch 18, Loss: 0.38896994597727946
Epoch 19, Loss: 0.38359207336101914
Epoch 20, Loss: 0.378441766600089
Epoch 21, Loss: 0.37350924502257665
Epoch 22, Loss: 0.3687850787642409
Epoch 23, Loss: 0.3642601880789152
Epoch 24, Loss: 0.3599258407062458
Epoch 25, Loss: 0.3557736475933194
Epoch 26, Loss: 0.3517955572373006
Epoch 27, Loss: 0.3479838488882407
Epoch 28, Loss: 0.3443311248249553
Epoch 29, Loss: 0.34083