In [None]:
import math

class ComputationNode:
    def __init__(self, *input_nodes, name=None):
        '''
        A computation graph is an acyclic graph of nodes, each representing
        a value that is computed from its child (predecessor) input_nodes.
        In our implementation, you must give the input nodes at construction
        time, and unlike pytorch we do not compute the result right away.
        We also do not compute the gradient yet. Those are your jobs.
        '''
        self.name = name
        self.input_nodes = input_nodes
        self.result = None
        self.gradient = 0.0
    def input(self, i):
        '''
        After the results are already computed in an input node, this
        method gives you the result of the ith input.
        '''
        return self.input_nodes[i].result
    def forward(self):
        '''
        The job of forward is to do the computation, and fill in self.result.
        '''
        assert 'Implementation needed' # Subclasses must override
        self.result = function_of(self.input(0)) # Example
    def backward(self, upstream_gradient):
        '''
        The job of backward is to do one step of backprop, which has two steps:
        1. The current node's own gradient must be updated based on the upstream gradient.
        2. A downstream gradient is computed for each of the current node's inputs.
        This function should return a list of downstream gradients, one for
        each input.
        '''
        assert 'Implementation needed' # Subclasses must override
        self.gradient += upstream_gradient # Example
        return [compute_downstream_gradient(upstream_gradient, self.result, inp.result) for inp in self.input_nodes]
    def leaves(self, result=None):
        '''
        Fills in and returns a dictionary of all the leaf nodes in the tree under the current node.
        '''
        if result is None: result = {}
        if self.name is not None:
            assert self.name not in result or result[self.name] is self, f'Different nodes named {self.name}'
            result[self.name] = self
        for node in self.input_nodes:
            node.leaves(result)
        return result
    def __repr__(self):
        '''Print out the tree so you can see the form of the graph.'''
        name = type(self).__name__ + (' ' + self.name if self.name else '')
        our_repr = f'{name} result={self.result} gradient={self.gradient}'
        tree_repr = '\n'.join([our_repr] + list(node.__repr__() for node in self.input_nodes))
        return '\n  '.join(tree_repr.split('\n'))
    
    # For convenience, we automatically construct computation nodes based
    # some method and infix operators.  You can add more.  Pytorch does this too.
    def exp(self):
        return Exp(self)
    def __neg__(self):
        return Negate(self)
    def __pow__(self, int_power):
        return IntPow(self, int_power) # Optinal: you could make an IntPow class
    def __add__(self, other):
        return Sum(self, other) # You should make a Sum class
    def __mul__(self, other):
        return Product(self, other) # You should make a Product class

class Leaf(ComputationNode):
    def __init__(self, **kwargs):
        assert len(kwargs) == 1, 'Leaf must specify one unique name=number'
        [(name, value)] = kwargs.items()
        super().__init__(name=name)
        self.value = value
    def forward(self):
        self.result = self.value
    def backward(self, upstream_gradient):
        self.gradient += upstream_gradient
        return [] # no downstream gradient

class Exp(ComputationNode):
    def forward(self):
        x = self.input(0)
        self.result = x.exp() if callable(getattr(x, 'exp', None)) else math.exp(x)
    def backward(self, upstream_gradient):
        self.gradient += upstream_gradient
        local_gradient = self.result
        return [local_gradient * upstream_gradient]

class Negate(ComputationNode):
    def forward(self):
        self.result = -self.input(0)
    def backward(self, upstream_gradient):
        self.gradient += upstream_gradient
        # return downstream gradient
        return [-upstream_gradient]
    
class Mean(ComputationNode):
    def forward(self):
        self.result = (self.input(0) + self.input(1)) / 2
    def backward(self, upstream_gradient):
        self.gradient += upstream_gradient
        # return downstream gradient
        return [upstream_gradient / 2, upstream_gradient / 2]

def forward_algorithm(tree):
    # TODO: you fill in the code here
    return tree

def backward_algorithm(tree, upstream_gradient=1.0):
    # TODO: you fill in the code here
    return tree

def zero_gradient(tree):
    # TODO: you fill in the code here
    return tree


x = Leaf(x=0.5)
sinh_x = Mean(Exp(x), -Exp(-x))
cosh_x = Mean(Exp(x), Exp(-x))
print('sinh_x tree:', sinh_x)
print('sinh_x forward result:', forward_algorithm(sinh_x).result)
print('sinh_x backward leaves:', backward_algorithm(sinh_x).leaves())
print('cosh_x tree:', cosh_x)
print('cosh_x forward result:', forward_algorithm(cosh_x).result)





In [None]:
def forward_algorithm(tree):
    for node in tree.input_nodes:
        forward_algorithm(node)
    tree.forward()
    return tree

def backward_algorithm(tree, upstream_gradient=1.0):
    downstream_gradient = tree.backward(upstream_gradient) 
    for i, node in enumerate(tree.input_nodes):
        backward_algorithm(node, downstream_gradient[i])
    return tree

def zero_gradient(tree):
    for node in tree.input_nodes:
        zero_gradient(node)
    tree.gradient = 0.0
    return tree

x = Leaf(x=0.5)
sinh_x = Mean(Exp(x), -Exp(-x))
cosh_x = Mean(Exp(x), Exp(-x))
print('sinh_x tree:', sinh_x)
print('sinh_x forward result:', forward_algorithm(sinh_x).result)
print('sinh_x backward leaves:', backward_algorithm(sinh_x).leaves())
zero_gradient(cosh_x)
print('cosh_x tree:', cosh_x)
print('cosh_x forward result:', forward_algorithm(cosh_x).result)
print('cosh_x backward leaves:', backward_algorithm(cosh_x).leaves())


In [None]:
class IntPow(ComputationNode):
    def __init__(self, int_power):
        super().__init__()
        self.int_power = int_power
    def forward(self):
        self.result = self.input(0) ** self.int_power
    def backward(self, upstream_gradient):
        self.gradient += upstream_gradient
        # return downstream gradient
        return [upstream_gradient * self.int_power * (self.input(0) ** (self.int_power - 1))]
    
class Sum(ComputationNode):
    def forward(self):
        self.result = self.inputs()[0] + self.inputs()[1]
    def backward(self, upstream_gradient):
        self.gradient += upstream_gradient
        # return downstream gradient
        return [upstream_gradient, upstream_gradient]

class Product(ComputationNode):
    def forward(self):
        self.result = self.inputs()[0] * self.inputs()[1]
    def backward(self, upstream_gradient):
        self.gradient += upstream_gradient
        inputs = self.inputs()
        # return downstream gradient
        return [inputs[1] * upstream_gradient, inputs[0] * upstream_gradient]

In [None]:
backward_algorithm(sinh_x)
sinh_x

In [None]:
forward_algorithm(out)
out

In [None]:
backward_algorithm(out)
out

In [None]:
out

In [None]:
def backprop_algorithm(out):
    out.backward()
    for inp in out.input_nodes():
        backprop_algorithm()