In [8]:
class Value:
    def __init__(self, value, _children=()):
        self.value = value
        self.grad = 0
        self._backward = lambda: None
        self._prev = set(_children)
        
    def __repr__(self):
        return "value: {} ; grad: {}".format(self.value, self.grad)
    
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.value + other.value, (self, other))
        
        def _backward():
            self.grad += out.grad 
            other.grad += out.grad 
        out._backward = _backward
        return out
    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.value * other.value, (self, other))
        
        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward
        return out
    
    def __sub__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.value-other.value, (self, other))
        
        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward
        return out
    
    def __pow__(self, p):
        assert isinstance(p, (float, int))
        out = Value(self.value**p, (self,))
        
        def _backward():
            self.grad += (p * out.grad ** (p-1)) * out.grad
        out._backward = _backward
        return out
    
    def __relu__(self):
        out = 0 if self.data < 0 else self.data
        
        def _backward():
            self.grad += out.data > 0 * out.grad
        out._backward = _backward
        return out
    
    def backward(self):
        topo = []
        visited = set()
        def topological_sort(node):
            if node not in visited:
                visited.add(node)
                for c in node._prev:
                    topological_sort(c)
                topo.append(node)
        topological_sort(self)
        print(topo)
        # Apply the chain rule:
        self.grad = 1
        for n in reversed(topo):
            n._backward()
            
    
    

In [24]:
a = Value(5)
b = Value(-1)
c = a - b
d = c**2 * c**3
e = d - 2

def viz():
    print(a)
    print(b)
    print(c)
    print(d)

e.backward()
viz()

[value: -1 ; grad: 0, value: 5 ; grad: 0, value: 6 ; grad: 0, value: 216 ; grad: 0, value: 36 ; grad: 0, value: 7776 ; grad: 0, value: 2 ; grad: 0, value: 7774 ; grad: 0]
value: 5 ; grad: 5
value: -1 ; grad: 5
value: 6 ; grad: 5
value: 7776 ; grad: 1
