In [70]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [71]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)
  
  def __rsub__(self,other):
        return -self+other
  
  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  def printgrads(self):
    vl = set(self._prev)
    while(len(vl)):
        print([f"{v.label} : {v.grad} " for v in vl])
        nvl = set()
        for v in vl:
            nvl.update(v._prev)
        vl = nvl
  def backward(self):
    self.clean()
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()
  
  def clean(self):
    nodes = set()
    def _clean(v):
        if v not in nodes:
            nodes.add(v) # ensures atmax only one level deep the repititions will be.
            v.grad = 0
            for child in v._prev:
                _clean(child)
    _clean(self)

In [72]:
#inputs x1,x2
x1 = Value(2.0,label='x1')
x2 = Value(-3.0,label='x2')
#weights
w1,w2 = Value(-4.0,label="w1"),Value(5.0,label='w2')
#biases
b = Value(4.5,label='b')
x1w1 = x1*w1; x1w1.label = 'x1w1'
x2w2 = x2*w2; x2w2.label = 'x2w2'
x1w1x2w2 = x1w1+x2w2; x1w1x2w2.label = 'x1w1x2w2'
n = x1w1x2w2+b; n.label = 'n'
o = n.tanh(); o.label = 'o'

In [73]:
q = Value(n.data,label='q')
r = q.tanh()

p = Value(n.data,label='p')
x = p*2; x.label='x'
d = (x.exp()-1)*(x.exp()+1)**-1; d.label = 'd'
 
r.backward()
# r.printgrads()
print(f"using formula q.grad={q.grad}")

print(f"d = {d}")
d.backward()
print(f"using chainrule p.grad={p.grad}")

2.220446049250313e-16
d = Value(data=-0.9999999999999999)
using chainrule p.grad=3.413219050297626e-16
