In [1]:
import numpy as np

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

class Network(object):

    def __init__(self, layer_dims, activations):
        if len(layer_dims) - 1 != len(activations):
            raise ValueError("Number of layers and activation functions do not match.")

        self.layer_dims = layer_dims
        self.activations = activations
        self.initialize_parameters()

    def initialize_parameters(self):
        self.weights = []
        self.biases = []
        for i in range(1, len(self.layer_dims)):
            self.weights.append(np.random.normal(size=(self.layer_dims[i], self.layer_dims[i-1])))
            self.biases.append(np.random.normal(size=(self.layer_dims[i])))

    def forward(self, x):
        for w, b, activation in zip(self.weights, self.biases, self.activations):
            x = activation(np.dot(w, x) + b)
        return x

    def loss(self, y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        loss = -np.sum(y_true * np.log(y_pred))
        return loss / y_true.shape[0]

    def backpropagate(self):
        
        pass

# Example usage
layer_dimensions = [784, 512, 256, 128, 64, 32, 16, 10]
activations = [relu, relu, relu, relu, relu, relu, softmax]
net = Network(layer_dimensions, activations)

In [5]:
net.weights[0].shape

(512, 784)