In [1]:
import numpy as np

# Activation Class
class Activation:
    @staticmethod
    def apply(aggregate_signal, activation_function='relu'):
        if activation_function == 'relu':
            return np.maximum(0, aggregate_signal)
        elif activation_function == 'sigmoid':
            return 1 / (1 + np.exp(-aggregate_signal))
        elif activation_function == 'tanh':
            return np.tanh(aggregate_signal)
        elif activation_function == 'softmax':
            e_x = np.exp(aggregate_signal - np.max(aggregate_signal, axis=1, keepdims=True))
            return e_x / np.sum(e_x, axis=1, keepdims=True)
        else:
            raise ValueError("Unsupported activation function.")

    @staticmethod
    def derivative(aggregate_signal, activation_function='relu'):
        if activation_function == 'relu':
            return np.where(aggregate_signal > 0, 1, 0)
        elif activation_function == 'sigmoid':
            s = 1 / (1 + np.exp(-aggregate_signal))
            return s * (1 - s)
        elif activation_function == 'tanh':
            return 1 - np.tanh(aggregate_signal) ** 2
        # Note: Softmax derivative needs special handling

# Neuron Class
class Neuron:
    def __init__(self, weights, bias):
        self.weights = weights
        self.bias = bias

    def process(self, input_data):
        # Vectorized operation for efficiency
        return np.dot(input_data, self.weights) + self.bias

# Parameters Class
class Parameters:
    def __init__(self, layer_size, next_layer_size, learning_rate=0.001):
        self.weights = np.random.randn(layer_size, next_layer_size) * 0.1
        self.bias = np.zeros((1, next_layer_size))
        self.learning_rate = learning_rate

# Layer Class
class Layer:
    def __init__(self, size, next_layer_size, activation_function):
        self.neurons = [Neuron(np.random.randn(next_layer_size) * 0.1, 0) for _ in range(size)]
        self.params = Parameters(size, next_layer_size)  # Parameters for vectorized operations
        self.activation_function = activation_function
        self.input_data = None
        self.aggregate_signal = None 

# ForwardPropagation Class
class ForwardPropagation:
    @staticmethod
    def apply(layer, input_data):
        layer.input_data = input_data  # Store input data
        aggregate_signal = np.dot(input_data, layer.params.weights) + layer.params.bias
        layer.aggregate_signal = aggregate_signal  # Store aggregate signal
        output = Activation.apply(aggregate_signal, layer.activation_function)
        return output


#LossFunction Class
class LossFunction:
    @staticmethod
    def compute(output, target, function='cross_entropy'):
        m = target.shape[0]
        if function == 'mse':
            return np.mean(np.power(target - output, 2))
        elif function == 'cross_entropy':
            return -np.sum(target * np.log(np.clip(output, 1e-10, 1 - 1e-10))) / m
        else:
            raise ValueError("Unsupported loss function.")

    @staticmethod
    def derivative(output, target, function='cross_entropy'):
        if function == 'mse':
            return output - target
        elif function == 'cross_entropy':
            output_clipped = np.clip(output, 1e-10, 1 - 1e-10)
            return - (target / output_clipped) + ((1 - target) / (1 - output_clipped))
        else:
            raise ValueError("Unsupported loss function derivative.")

#GradientDescent Class            
class GradientDescent:
    @staticmethod
    def update_params(layer, dW, dB):
        layer.params.weights -= layer.params.learning_rate * dW
        layer.params.bias -= layer.params.learning_rate * dB

#BackwardPropagation
class BackwardPropagation:
    @staticmethod
    def apply(network, output, target):
        layers = network.layers[::-1]
        for i, layer in enumerate(layers):
            if i == 0:  # Output layer
                if layer.activation_function == 'softmax':
                    dZ = output - target
                else:
                    dZ = LossFunction.derivative(output, target, 'cross_entropy')
            else:
                prev_layer = layers[i-1]
                dZ = np.dot(dZ, prev_layer.params.weights.T) * Activation.derivative(layer.aggregate_signal, layer.activation_function)

            dW = np.dot(layer.input_data.T, dZ) / layer.input_data.shape[0]
            dB = np.sum(dZ, axis=0, keepdims=True) / layer.input_data.shape[0]
            GradientDescent.update_params(layer, dW, dB)

#Model Class
class Model:
    def __init__(self):
        self.layers = []

    def add_layer(self, size, next_layer_size, activation_function='relu'):
        self.layers.append(Layer(size, next_layer_size, activation_function))

    def predict(self, inputs):
        for layer in self.layers:
            inputs = ForwardPropagation.apply(layer, inputs)
        return inputs

    def train(self, inputs, targets, epochs=1000):
        for epoch in range(epochs):
            # Forward pass
            output = self.predict(inputs)
            loss = LossFunction.compute(output, targets, 'cross_entropy')

            # Backward pass
            BackwardPropagation.apply(self, output, targets)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

input_size = 100  # Number of input features
hidden_layer_size = 50 
hidden_layer_size2 = 70
output_size = 3  # Number of output classes for a classification problem
learning_rate = 0.001  # Learning rate for gradient descent

model = Model()
model.add_layer(input_size, hidden_layer_size, 'relu')
model.add_layer(hidden_layer_size, hidden_layer_size2, 'tanh')
model.add_layer(hidden_layer_size2, hidden_layer_size2, 'tanh')
model.add_layer(hidden_layer_size2, output_size, 'softmax')

np.random.seed(0)
inputs = np.random.rand(100, input_size)  # 100 samples
targets = np.zeros((100, output_size))
for i in range(100):
    targets[i, np.random.randint(0, output_size)] = 1  # Random targets for demonstration

# Train the model
model.train(inputs, targets, epochs=1000)




Epoch 0, Loss: 1.1083590775700618
Epoch 100, Loss: 1.099348049457084
Epoch 200, Loss: 1.0935887165232463
Epoch 300, Loss: 1.089647785169491
Epoch 400, Loss: 1.0868079748497563
Epoch 500, Loss: 1.0845982628097817
Epoch 600, Loss: 1.0827435156664944
Epoch 700, Loss: 1.0810951585037896
Epoch 800, Loss: 1.0795647912896933
Epoch 900, Loss: 1.0781048753364784
