In [1]:
import numpy as np
# just performs simple automatic differentiation

In [53]:
class CompNode:
    def __init__(self, tape):
        # make sure that the gradient tape knows us
        tape.add(self)
    
    # perform the intended operation 
    # and store the result in self.output
    def forward(self):
        pass
    
    # assume that self.gradient has all the information 
    # from outgoing nodes prior to calling backward
    # -> perform the local gradient step with respect to inputs
    def backward(self):
        pass
    
    # needed to be initialized to 0 
    def set_gradient(self, gradient):
        self.gradient = gradient
        
    # receive gradients from downstream nodes     
    def add_gradient(self, gradient):
        self.gradient += gradient
    
class ConstantNode(CompNode):
    def __init__(self, value, tape):
        self.value = value
        super().__init__(tape)
        
    def forward(self):
        self.output = self.value
    
    def backward(self):
        # nothing to do here
        pass
    
class Multiply(CompNode):
    
    def __init__(self, left : CompNode, right : CompNode, tape : Tape):
        self.left = left
        self.right = right
        super().__init__(t)
        
    def forward(self):
        self.output = self.left.output * self.right.output
        
    # has to know how to locally differentiate multiplication
    def backward(self):
        self.left.add_gradient(self.right.output * self.gradient)
        self.right.add_gradient(self.left.output * self.gradient)
        
class Tape:
    
    def __init__(self):
        self.computations = []
        
    def add(self, compNode : CompNode):
        self.computations.append(compNode)
        
    def forward(self):
        for compNode in self.computations:
            compNode.forward()
            
    def backward(self):
        # first initialize all gradients to zero 
        for compNode in self.computations:
            compNode.set_gradient(0)
            
        # we need to invert the order    
        self.computations.reverse()    
        # last node gets a default value of one for the gradient
        self.computations[0].set_gradient(1)
        for compNode in self.computations:
            compNode.backward()

In [54]:
t = Tape()
a = ConstantNode(2,t)
b = ConstantNode(3,t)

o = Multiply(a, b, t)
f = Multiply(ConstantNode(5, t), o, t)
t.forward()

In [55]:
print(f.output)

30


In [57]:
# start reverse mode autodifferentiation
t.backward()

In [60]:
# now inspect the gradients 
print(f.gradient)
print(o.gradient)
print(a.gradient)
print(b.gradient)

1
5
15
10


### A diamond-shaped graph that makes use of the multivariate chain rule

In [71]:
t = Tape()
x = ConstantNode(3,t)
y = ConstantNode(2,t)
z = ConstantNode(1,t)

h1 = Multiply(x, y, t)
h2 = Multiply(y, z, t)

h = Multiply(h1, h2, t)
o = Multiply(h, h, t)
t.forward()

In [72]:
t.backward()
print(h.gradient)
print("--")
print(h1.gradient)
print(h2.gradient)
print("--")
print(x.gradient)
print(y.gradient)
print(z.gradient)

24
48
144
96
288
288


now with an explicit operation for taking the square.

In [79]:
class Square(CompNode):
    
    def __init__(self, x : CompNode, tape : Tape):
        self.x = x
        super().__init__(t)
        
    def forward(self):
        self.output = self.x.output**2
        
    # has to know how to locally differentiate x^2
    def backward(self):
        self.x.add_gradient( (2*self.x.output) * self.gradient)


In [80]:
t = Tape()
x = ConstantNode(3,t)
y = ConstantNode(2,t)
z = ConstantNode(1,t)

h1 = Multiply(x, y, t)
h2 = Multiply(y, z, t)

h = Multiply(h1, h2, t)
o = Square(h, t)
t.forward()

In [81]:
t.backward()
print(h.gradient)
print("--")
print(h1.gradient)
print(h2.gradient)
print("--")
print(x.gradient)
print(y.gradient)
print(z.gradient)

24
--
48
144
--
96
288
288
