In [29]:
#from NeuralNet.engine import Value
import numpy as np

In [84]:
class Value:

    def __init__(self, data, _children=()):
        self.data = data
        # To build our graph we need to keep track of which Values (obj) create other Values (obj)
        self.children = _children
        # Initalise grad at zero and the backwards function as a lambda: None in the case of just a leaf node
        self.grad = 0.0
        self._backward = lambda: None
         
    
    def __repr__(self) -> str:
        return f"Value(data={self.data})"
    

    ################################
    ### emulating numerical type ###
    ################################
    
    ##  Want Value obj to safify numerical operations like +, -, /, * ##
    ##  In Python a*b+c is a.__mul__(b).__add__(c)

    def __add__(self, other):
        """
        Creates a Value object of the sum with children given by the Values (obj) in the sum. 
        `other` represents the other value in the sum.
        """

        out = Value(self.data + other.data, _children=(self, other))
        # In the expression graph, the chain rule is very simple
        # it is the previous nodes gradient (which represents the chaining of gradients up to that point) w.r.t final output node
        # multiplied by the current "local" (local in the expression graph) gradient
        # e.g. a*b=c, c+d=e, e+f=g then dg/da = dg/de x de/dc x dc/da 
        # dg/de x de/dc is the previous nodes gradient (i.e. its dg/dc)
        # dc/da is the "local" gradient.

        # If we are on an addition node then the local gradient is 1
        # If we are on a multiplication node then the local gradient is the other parameter (dc/da = b)

        def _backward():
            self.grad =  out.grad * 1.0
            other.grad = out.grad * 1.0
        #this is out's backward pass (out represents the addition) e.g c+d=e (if c is self, then d is other and e is out)
        #self and other are initalised with lambda : None because they could be leaf nodes, thus no backwards pass
        out._backward = _backward
        
        return out
    
    def __mul__(self, other):
        """
        Creates a Value object of the product, with children given by the Values (obj) in the product.
        a, b are the children that produce c. (c = a*b)
        """
        out = Value(self.data * other.data, _children=(self, other))

        def _backward():
            self.grad = out.grad * other.data
            other.grad = out.grad * self.data
        out._backward = _backward

        return out 
    
    ############################
    ### Activation Functions ###
    ############################

    ## Can be arbitrarily complicated as long as we know how to differenciate it ##

    def tanh(self):
        """
        Creates a Value obj of tanh evaluted on self
        """
        t = (np.exp(2*self.data) - 1)/(np.exp(2*self.data) + 1)
        out = Value(t, _children=(self,))

        def _backward():
            self.grad = out.grad * (1 - t**2) #NOTE: must manunually initalise out.grad to one if out is output node (its currently set to 0)
        out._backward = _backward

        return out


In [85]:
### numerical diff ###


#inital expression (f(x))
a = Value(1.0)
b = Value(2.0)
c = a*b
d = Value(-1.0)
e = c + d
e1 = e.data

h = 0.01

#shift a parameter by h (f(x+h)) - in this case a
a = Value(1.0 + h)
b = Value(2.0)
c = a*b
d = Value(-1.0)
e = c + d
e2 = e.data

#Derivative of e w.r.t the parameter that is shifted by h
#de/da = (f(x+h) - f(x))/h
print((e2-e1)/h)

2.0000000000000018


In [91]:
#backwards pass step-by-step

a = Value(1.2)
b = Value(2.3) 
c = a*b
d = Value(-0.7)
e = c + d
o = e.tanh()

In [92]:
#working backwards...
o.grad = 1.0 # o is output (root node) so do/do is 1 (currently its initalised with 0) so must manually overwrite
o._backward()
print(e.grad)

0.06291733186271753


In [93]:
e._backward()
print(c.grad)
print(d.grad)
#the same as expected (previous grad * 1.0)

0.06291733186271753
0.06291733186271753


In [94]:
c._backward()
print(a.grad)
print(b.grad)

0.1447098632842503
0.07550079823526103


In [96]:
topo = []
visted = set()

def build_topo(value):
    if value not in visted:
        visted.add(value)
        for child in value.children:
            build_topo(child) #recursively add if to visted (if we havent visted)
            topo.append(value)
build_topo(o)
topo

[Value(data=2.76),
 Value(data=2.76),
 Value(data=2.0599999999999996),
 Value(data=2.0599999999999996),
 Value(data=0.9680303033155948)]