In [251]:
import math
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [252]:
class Value:
  def __init__(self, data, _children=(), _op=''):
    self.data = data
    self.grad = 0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op

  def __repr__(self):
    return f"Value(data={self.data})"

  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
    return out

  def __rmul__(self, other):
    return self*other

  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), "exp")
    def _backward():
      self.grad = out.data * out.grad

    out._backward = _backward
    return out

  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    x = self.data
    out = Value(x**other, (self,), f'**{other}')

    def _backward():
      self.grad += other*x**(other-1) * out.grad
    out._backward = _backward

    return out

  def relu(self):
    x = self.data
    out = Value(0 if x<0 else x, (self, ), 'ReLU')
    def _backward():
      self.grad += (out.data>0) * out.grad
    out._backward = _backward
    return out

  def backward(self):
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)

    self.grad = 1
    for node in reversed(topo):
      node._backward()

  def __neg__(self):
    return self*-1

  def __sub__(self, other):
    return self + (-other)

  def __rsub__(self, other):
    return other + (-self)

  def __radd__(self, other):
    return self + other

  def __truediv__(self, other):
    return self * other**-1

  def __rtruediv__(self, other):
    return other * self**-1

In [253]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'})
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') nodes for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f}" % (n.label, n.data, n.grad), shape = 'record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid+n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)
  for n1, n2 in edges:
    #connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2))+n2._op)

  return dot

In [340]:
class Module:
  def zero_grad(self):
      for p in self.parameters():
          p.grad = 0.0

  def parameters(self):
      return []

class Neuron(Module):
  def __init__(self, nin, nonlin = True):
    self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
    self.b = Value(0)
    self.nonlin = nonlin

  def __call__(self, x):
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
    out = act.relu()
    return out if self.nonlin else act

  def parameters(self):
    return self.w + [self.b]

class Layer(Module):
  def __init__(self, nin, nout, **kwargs):
    self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

  def __call__(self, x):
    out = [n(x) for n in self.neurons]
    return out[0] if len(out)==1 else out

  def parameters(self):
    return [p for n in self.neurons for p in n.parameters()]

class MLP(Module):
  def __init__(self, nin, nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1], nonlin = i!=len(nouts)-1) for i in range(len(nouts))]

  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]

In [345]:
# example test code
n = MLP(3, [4,4,1])
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0]

for k in range(30):
  # forward pass
  ypred = [n(x) for x in xs]
  loss = sum((yout-ygt)**2 for ygt, yout in zip(ys, ypred))

  #backward pass
  n.zero_grad()
  loss.backward()
  for p in n.parameters():
    p.data -= .05 * p.grad

  print(k, loss.data)

0 4.121383228041115
1 3.9345867921035316
2 3.7006667531307054
3 3.411101353553709
4 3.0638941713657903
5 2.6957672597363707
6 2.590484969311572
7 2.6730489436811236
8 2.043915194686715
9 1.7052255270251522
10 1.4697956325917172
11 1.2692024721412325
12 1.091033535959315
13 0.9300925482856157
14 0.7842536840832706
15 0.6530085579587167
16 0.536482814843884
17 0.4348168261997631
18 0.34782238464563353
19 0.2748496602836407
20 0.2148067865533707
21 0.16627464858243945
22 0.12766175723424397
23 0.09735415014652012
24 0.0738320037991575
25 0.05574256064737223
26 0.04193262956462088
27 0.0314509679143757
28 0.02353217990035935
29 0.017571766681377735
