In [121]:
class NN():
    """
    class for a neural network. Requires number of hidden layers, and number of neurons
    at each layer (we assume all hidden layers are of the same size)
    """
    def __init__(self, input_size, num_HL, hidden_size, output_size):
        # Initialize by setting random 
        self.input_size = input_size
        self.hidden_layers = num_HL
        # NOTE we are assuming all hidden layers are the same size
        self.hidden_size = hidden_size
        self.output_size = output_size
        # Activations for each neuron
        self.activations_in = np.ones(self.input_size)
        # Hidden can comprise multiple layers, so we have a matrix
        self.activations_hidden = np.ones((self.hidden_layers, self.hidden_size))
        self.activations_out = np.ones(self.output_size)
        # Weights of all the edges, randomized for good results
        self.weights_in = np.random.randn(self.input_size, self.hidden_size)
        # We will only have hidden weights if there are multiple hidden layers
        if self.hidden_layers > 1:
            self.weights_hidden = np.random.randn(self.hidden_layers - 1, self.hidden_size, self.hidden_size)
        else:
            self.weights_hidden = []
        self.weights_out = np.random.randn(self.hidden_size, self.output_size)
        
    import numpy as np
    
    def _sigmoid(self, x):
        """
        Sigmoid function for calculating a distribution over 2 classes
        """
        return 1 / (1 + np.exp(-x))

    def _activate(self, x):
        """
        RELU for non-linear activation function
        """
        return max(x, 0.0)
    
    def _activate_vector(self, X):
        """
        Run on a numpy vector
        """
        activations = np.vectorize(self._activate)
        return activations(X)

    def _transformation(self, x, W, b):
        """
        Calculate a layer of the network with a
        linear transformation. W here is a matrix of size
        dimensions_IN x dimensions_OUT
        Note that this is basically an instance of calculating the input vector to the sigmoid
        function that we saw in logistic regression. In this case, we will calle the function this is
        passed to an 'activation' function
        """
        return np.dot(x, W) + b


    def _loss(self, y_hat, y):
        """
        Compute loss form prediction y_hat, and gold value y

        'Unlike linear models, the loss function of multi-layer neural networks 
        with respect to their parameters is not convex'

        We can still use logistic_loss/binary cross-entropy:
        This basically finds the negative of the log probability of class1 - its inverse
        """
        print(y_hat, y)
        return (-y * np.log(y_hat)) - ((1 - y) * np.log(1 - y_hat))
    
    def _vector_loss(self, Y_hat, Y):
        """
        Get a vector of losses from numpy vectors
        """
        loss = np.vectorize(self._loss)
        return loss(Y_hat, Y)
        
    def forward(self, inputs):
        """
        Forward pass: Calculate the activations of each neuron
        """
        if len(inputs) != self.input_size:
          raise Exception("That is not the size of the input layer... try %i" % self.input_size)
        
        # Set input activations
        self.activations_in = inputs[:]
        
        # calculate the activations for each hidden layer
        for h_layer_i in range(self.hidden_layers):
            # Need to take previous layer activation value * weights for a given layer
            # Starting with input layer X first hidden layer
            if h_layer_i == 0:
                # multiply the previous layer's activations by its weight vector for this layer's activations
                self.activations_hidden[h_layer_i] = np.dot(np.transpose(self.activations_in), self.weights_in)
                self.activations_hidden[h_layer_i] = self._activate_vector(self.activations_hidden[h_layer_i])
            else:
                # multiply the previous layer's activations by its weight vector for this layer's activations
                self.activations_hidden[h_layer_i] = np.dot(np.transpose(self.activations_hidden[h_layer_i - 1]), self.weights_hidden[h_layer_i - 1])
                self.activations_hidden[h_layer_i] = self._activate_vector(self.activations_hidden[h_layer_i])
                    
        self.activations_out = np.dot(np.transpose(self.activations_hidden[-1]), self.weights_out)
        # Take the simoid of the activations of the output layer, because we are doing 2 class classification
        # If we have >2 classes, we would use softmax
        self.activations_out = self._sigmoid(self._activate_vector(self.activations_out))

    def backward(self, targets):
        """
        Backpropogation for finding the partial derivative of the each node w.r.t the loss function,
        and updating weights based on those gradients
        """
        if len(targets) != len(self.activations_out):
            raise Exception("Your labels are not the same size as your output layer!")
        
        # Calculate loss - there will be a value for each node in the output layer
        loss = self._vector_loss(self.activations_out, targets)
        print(loss)
        
        """
        Now we need to calculate the partial derivative of the los w.r.t each weight.
        Each node has a value "delta", which represents the loss * activate(W * previous_activations), which
        we have already calculated in the forward pass.
        
        
        """
        
        return True

test = NN(2, 2, 3, 1)
test.forward([2, 2])
print(test.activations_in)
print(test.activations_hidden)
print(test.activations_out)
test.backward([3])
"""
class Node(Object):
    def __init__(self):
        
    def 
"""

[2, 2]
[[ 0.          1.89121379  0.        ]
 [ 0.          0.          1.13386882]]
[ 0.84416315]
[ 0.84416315] [3]
0.844163153759 3
0.844163153759048 3
[-3.20966287]


'\nclass Node(Object):\n    def __init__(self):\n        \n    def \n'