In [10]:
#
# Based on https://alexander-schiendorfer.github.io/2020/02/16/automatic-differentiation.html
#

class CompNode:
    def __init__(self, tape):
        # make sure that the gradient tape knows us
        tape.add(self)
    
    # perform the intended operation 
    # and store the result in self.output
    def forward(self):
        pass
    
    # assume that self.gradient has all the information 
    # from outgoing nodes prior to calling backward
    # -> perform the local gradient step with respect to inputs
    def backward(self):
        pass
    
    # needed to be initialized to 0 
    def set_gradient(self, gradient):
        self.gradient = gradient
        
    # receive gradients from downstream nodes     
    def add_gradient(self, gradient):
        self.gradient += gradient

class Tape:    
    def __init__(self):
        self.computations = []
        
    def add(self, compNode : CompNode):
        self.computations.append(compNode)
        
    def forward(self):
        for compNode in self.computations:
            compNode.forward()
            
    def backward(self):
        # first initialize all gradients to zero 
        for compNode in self.computations:
            compNode.set_gradient(0)
            
        # we need to invert the order    
        self.computations.reverse()    
        # last node gets a default value of one for the gradient
        self.computations[0].set_gradient(1)
        for compNode in self.computations:
            compNode.backward()

class ConstantNode(CompNode):
    def __init__(self, value, tape):
        self.value = value
        super().__init__(tape)
        
    def forward(self):
        self.output = self.value
    
    def backward(self):
        # nothing to do here
        pass

class Multiply(CompNode):
    
    def __init__(self, left : CompNode, right : CompNode, tape : Tape):
        self.left = left
        self.right = right
        super().__init__(t)
        
    def forward(self):
        self.output = self.left.output * self.right.output
        
    # has to know how to locally differentiate multiplication
    def backward(self):
        self.left.add_gradient(self.right.output * self.gradient)
        self.right.add_gradient(self.left.output * self.gradient)

# We are now ready to automatically differentiate our previous example:

t = Tape()
a = ConstantNode(2,t)
b = ConstantNode(3,t)

o = Multiply(a, b, t)
f = Multiply(ConstantNode(5, t), o, t)
t.forward()

# Calling backward on the tape will trigger the reverse-mode automatic differentiation. 
# Some people call already that step backpropagation which I would reserve for the 
# application of autodiff to neural networks and applying a gradient update on the weights.

t.backward()
print(o.gradient)
print(a.gradient) 
print(b.gradient)



<__main__.Tape object at 0x110cc1ca0>
5
15
10
