In [7]:
import math

In [51]:
class Value:

    def __init__(self, data, _childern=(), _op=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None #is a function that doesnt do anything
        self._prev = set(_childern)
        self._op = _op
    
    def __repr__(self):
        return f"Value(data={self.data})"

    def __add__(self, other):
        out = Value(self.data + other.data, (self, other), '+')
        
        def _backward():
            self.grad = 1.0 * out.grad   #basically what you're seeing here is out.grad will be simplying be copied to self's grad and other's grad
            other.grad = 1.0 * out.grad 
        out._backward = _backward
        return out

    
    def __mul__(self, other):
        out = Value(self.data * other.data, (self, other), '*')
        
        def _backward():
            self.grad = other.data * out.grad
            other.grad = self.data * out.grad 
        out._backward = _backward
        return out

    def tanh(self):
        x = self.data
        t = (math.exp(2*x)-1)/(math.exp(2*x)+1)
        out = Value(t, (self,), 'tanh')

        def _backward():
            self.grad =  (1 - t**2) * out.grad
        out._backward = _backward

        return out 

    def backward(self):

        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()

In [11]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ data %.4f | grad %.4f }" % ( n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

In [12]:
a =  Value(2.0)
b =  Value(-3.0)
c = Value(10.0)
e = a*b
d = e+c
f = Value(-2.0)
L = d*f
L

Value(data=-8.0)

In [14]:
#draw_dot(L)

In [27]:
L.grad = 1.0

In [28]:
f.grad = 4.0
d.grad = -2.0

Want - dL/dc? 

dd/dc = d(c+e)/dc = 1
dd/de = 1
dL/dd = 

dL/dc = dL/dd * dd/dc = -2 * 1



In [30]:
c.grad = -2.0
d.grad = -2.0

In [31]:
a.grad = -2.0 * -3.0
b.grad = -2.0 * 2.0

In [32]:
# nudging all the value by 0.01
a.data += 0.01*a.grad
b.data += 0.01*b.grad
c.data += 0.01*c.grad
f.data += 0.01*f.grad

e = a * b
d = e + c
L = d * f

print(L.data)


-7.286496


In [45]:
#inputs x1, x2
x1 = Value(2.0)
x2 = Value(0.0)
#weights w1, w2
w1 = Value(-3.0)
w2 = Value(1.0)
#bias of the neuron
b = Value(6.787875467865) 
#x1w1 + x2w2 + b
x1w1 = x1*w1
x2w2 = x2*w2

x1w1x2w2 = x1w1 + x2w2
n = x1w1x2w2 + b
o = n.tanh()

In [46]:
n,o 

(Value(data=0.7878754678649997), Value(data=0.657203809653529))

In [41]:
o.grad = 1.0
o._backward()

n._backward()
b._backward()
x1w1x2w2._backward()
x2w2._backward()
x1w1._backward()

### Topological graph

In [47]:
o.grad = 1.0

topo = []
visited = set()
def build_topo(v):
    if v not in visited:
        visited.add(v)
        for child in v._prev:
            build_topo(child)
        topo.append(v)
build_topo(o)

for node in reversed(topo):
    node._backward()

In [49]:
x1w1.grad

0.5680831525768881

### Manual calculation of gradient

In [26]:
x1.grad = w1.data * x1w1.grad
w1.grad = x1.data * x1w1.grad

x2.grad = w2.data * x2w2.grad
w2.grad = x2.data * x2w2.grad

x1w1.grad = 0.5
x2w2.grad = 0.5

x1w1x2w2.grad = 0.5
b.grad = 0.5

n.grad = 0.5

o.grad = 1.0

o = tanh(n)<br>
do/dn = 1- o**2 = 0.499999

In [None]:
x