# Deep Learning Framework
By Luke Doughty  
Derived from Grokking Deep Learning by Andrew W. Trask
## Why tensors?
Tensors are abstract versions of vectors and matrixes. Vector are one dimensional tensors and matrixes are two dimensional tensors.  
The inputs, outputs, and operations are all represented with tensors (vector in, matrix transformation/operation, vector out). We set up our code so we can stack tensors on top of another exactly like layers in our neural network. We have specific instructions on how to backpropagate each type of layer, so we can automatically backpropagate and focus our engineering efforts on forward propagation.
## Why don't we have to worry about dimensions?



In [2]:
import numpy as np

class Tensor (object):
    def __init__(self, data, 
                autoGrad = False,  # autoGrad asks if this tensor should do gradient descent. Useful for drop out regularization.
                creators = None, 
                creation_operation = None, 
                id = None):
        self.data = np.array(data) # What is shape is data?
        self.creation_operation = creation_operation
        self.creators = creators
        self.gradient = None
        self.autoGrad = autoGrad
        self.children = {}

        if (id is None):
            id = np.random.randint(0, 100000)
        self.id = id

        if (creators is not None):
            for creator in creators: # creators is a dictionary where the child's id is the key, and the value at that key tells the number of children the creator tensor has with the current tensor's id.
                # keeps track of how many children a tensor has.
                if (self.id not in creator.children):
                    creator.children[self.id] = 1
                else: # should never go down this branch. That would mean that more than one children have been made with the same id.
                    creator.children[self.id] += 1

    # Checks whether a tensor has received the correct number of gradients from each child.
    def all_children_gradients_accounted_for(self):
        for id, count in self.children.items():
            if(count != 0):
                return False
        return True

    def backpropagate(self, gradient, gradient_origin = None):
        if(self.autoGrad):
            if(gradient_origin is not None):
                # checks to make sure you can backpropagate or whether youre waiting for a gradient, in which case decrement the counter.
                if(self.children[gradient_origin.id] == 0):
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[gradient_origin.id] -= 1

            if (self.gradient is None):
                self.gradient = gradient
            else:
                # if we're getting passed gradients from multiple children, add them.
                self.gradient += gradient

            if (self. creators is not None and (self.all_children_gradients_accounted_for() or gradient_origin is None)):
                # here is where we actually start backpropagating.
                if(self.creation_operation == "add"): # TODO: make this either elif or a switch case
                    # backpropagate to both parents/creators. 
                    self.creators[0].backpropagate(gradient, self)
                    self.creators[1].backpropagate(gradient, self)
                elif(self.creation_operation == "neg"):
                    self.creators[0].backpropagate(self.gradient.__neg__())
                elif(self.creation_operation == "sub"):
                    gradient_0 = Tensor(self.gradient.data)
                    self.creators[0].backpropagate(gradient_0, self)
                    gradient_1 = Tensor(self.gradient.__neg__().data)
                    self.creators[1].backpropagate(gradient_1, self)
                elif(self.creation_operation == "mul"):
                    gradient_0 = self.gradient * self.creators[1]
                    self.creators[0].backpropagate(gradient_0, self)
                    gradient_1 = self.gradient * self.creators[0]
                    self.creators[1].backpropagate(gradient_1, self)
                elif(self.creation_operation == "dot"):
                    activation = self.creators[0]
                    weights = self.creators[1]
                    activation_gradient = self.gradient.dot(weights.transpose())
                    activation.backpropagate(activation_gradient)
                    weights_gradient = self.gradient.transpose.dot(activation).transpose()
                    weights.backpropagate(weights_gradient)
                elif(self.creation_operation == "transpose"):
                    self.creators[0].backpropagate(self.gradient.transpose())
                elif("sum" in self.creation_operation):
                    # since the operation is "sum" + str(dimension)
                    dimension = int(self.creation_operation.split("_")[1])
                    data_shape = self.creators[0].data.shape[dimension]
                    self.creators[0].backpropagate(self.gradient.expand(dimension, data_shape))
                elif("expand" in self.creation_operation):
                    dimension = int(self.creation_operation.split("_")[1])
                    self.creators[0].backpropagate(self.gradient.sum(dimension))


    # add together two tensors
    def __add__(self, other):
        if (self.autoGrad and other.autoGrad):
            return Tensor(self.data + other.data, autoGrad = True, creators = [self, other], creation_operation = "add")
        return Tensor(self.data + other.data)
    
    # negates the given tensor. Flips the signs.
    def __neg__(self):
        if (self.autoGrad):
            return Tensor(self.data * -1, autoGrad = True, creators = [self], creation_operation = "neg")
        return Tensor(self.data * -1)

    # subtract one tensor from another
    def __sub__(self, other):
        if (self.autoGrad and other.autoGrad):
            return Tensor(self.data - other.data, autoGrad = True, creators = [self, other], creation_operation = "sub")
        return Tensor(self.data - other.data)

    # multiply two tensors
    def __mul__(self, other):
        if (self.autoGrad and other.autoGrad):
            return Tensor(self.data * other.data, autoGrad = True, creators = [self, other], creation_operation = "mul")
        return Tensor(self.data * other.data)

    # collapses a tensor along a given dimension, adding all numbers along that dimension.
    def sum(self, dimension):
        if (self.autoGrad):
            return Tensor(self.data.sum(dimension), autoGrad = True, creators = [self], creation_operation = "sum_" + str(dimension))
        return Tensor(self.data.sum(dimension))

    # expand a tensor along a given dimension, creating copies of the tensor stacked along the given dimension.
    def expand(self, dimension, copies):
        # transposition_command tells the order of dimensions for the expanded tensor.
        transposition_command = list(range(0, len(self.data.shape)))
        transposition_command.insert(dimension, len(self.data.shape))
        new_shape = list(self.data.shape) + [copies]
        new_data = self.data.repeat(copies).reshape(new_shape)
        new_data = new_data.transpose(transposition_command)

        if (self.autoGrad):
            return Tensor(new_data, autoGrad = True, creators = [self], creation_operation = "sum_" + str(dimension))
        return Tensor(self.data.sum(dimension))

    # transpose the tensor. In a 1d and 2d tensors, this means swapping the rows and columns.
    def transpose(self):
        if (self.autoGrad):
            return Tensor(self.data.transpose(), autoGrad = True, creators= [self], creation_operation= "transpose")
        return Tensor(self.data.transpose())

    # dot product of two tensors. Returns a scalar.
    def dot(self, other): # book calls this matrix_multiplication
        if(self.autoGrad):
            return Tensor(self.data.dot(other.data), autoGrad = True, creators = [self, other], creation_operation = "dot")
        return Tensor(self.data.dot(other.data))

    # produces the vector as a string, but is supposed to not get rid of any information so an object can be recreated from it.
    # similar to __str__, but __str__ is meant to be human-friendly.
    def __repr__(self):
        return str(self.data.__repr__())

    # prints the tensor as a string
    def __str__(self):
        return str(self.data.__str__())
