In [48]:
import numpy as np
from helpers.activfunc import ReLu, sigmoid
from helpers.lossfunc import sum_squares, cross_entropy
# from optimizer import SGD, ..., optimizer_dict

In [49]:
activfunc_dict = {
    'relu': ReLu, 'sigmoid': sigmoid
}

lossfunc_dict = {
    'sumsquares': sum_squares, 'crossentropy': cross_entropy
}

# optimizer_dict = {
#     'sgd': SGD, 'minibatchgd': MiniBatchGD, ... 
# }

In [41]:
class neuralnetwork:
    
    def __init__(self):
        self.W = []
        self.Z = []
        self.Y = []
        self.Wgrad = None
        self.nlayers = 0
        self.activations = []
        self.prev_layer_neurons = None
        
    def add_layer(self, num_neurons, activation):
        if self.nlayers == 0:
            self.W.append(None)
        else:
            weights = np.random.rand(num_neurons, self.prev_layer_neurons + 1)
            self.W.append(weights)
        self.activations.append(activation.lower())
        self.prev_layer_neurons = num_neurons
        self.nlayers += 1
    
    def forward(self, inputs):
        
        current_x = inputs
        np.insert(current_x, 0, 1, axis = 0)
        self.Y.append(current_x)
        for k in range(nlayers):
            z = (np.matmul(self.W[k], current_x.T)).T
            Z.append(z)
            
            sigma = activfunc_dict[self.activations[k]]
            y = sigma.forward(z)
            self.Y.append(y)
            current_x = y
            np.insert(current_x, 0, 1, axis = 0)
                        
        return y
        
    def backward(self, op_gradient):
        # Assuming op_gradient = row vector (convention)
        current_grad = op_gradient
        for k in range(nlayers, -1, -1): 
            sigma = activfunc_dict[self.activations[k - 1]]
            gradDz = np.matmul(current_grad, sigma.backward(self.Z[k - 1]))
            gradzw = np.matmul(np.ones(np.shape(W[k - 1])[0]), self.Y[k - 1])
            self.Wgrad[k - 1] = np.matmul(gradDz, gradzw)
            current_grad = np.matmul(gradDz, self.W[k - 1])
    
    def clear_outputs(self):
        self.Z = []
        self.Y = []
        
    def train_network(self, X_train, Y_train, method, loss_function, eta = 0.001):
        
        input_size = np.shape(X_train)[1]
        layer1_size = np.shape(self.W[1])[1]
        self.W[0] = np.random.rand(layer1_size, input_size + 1)
        self.Wgrad = np.zeros(np.shape(self.W))
        lossfunction = lossfunc_dict[loss_function.lower()]
        update_rule = optimizer_dict[method.lower()]       
        update_rule(self, X_train, Y_train, lossfunction)
        