In [2]:
import torch 
import numpy as np 

In [7]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

In [3]:
import random
import math

In [4]:
len([4,4,1])

3

In [237]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

In [60]:
from typing import Any


class Neuron:

    def __init__(self,nin):
        self.w= [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b=Value(random.uniform(-1,1))
        
    def __call__(self, x):
        # w*x + b 
        act=sum((wi*xi for wi, xi in zip(self.w,x)),self.b) 
        out=act.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]


class Layer:
    def __init__(self,nin,nout):
        self.neurons=[Neuron(nin) for i in range(nout)]
    
    def __call__(self,x):
        outs=[n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
class MLP: 
    def __init__(self,nin,nouts):
        sz=[nin]+nouts
        self.layers=[Layer(sz[i],sz[i+1])for i in range(len(nouts))]
        print(self.layers)
    
    def __call__(self,x):
        for layer in self.layers:
            x=layer(x)
        return x
    
    def parameters(self):
        return[p for layer in self.layers for p in layer.parameters()]

In [61]:
x=[2.0,3.0,-1.0]
n=MLP(3,[4,4,2])
n(x)

[<__main__.Layer object at 0x000002425A3D7B50>, <__main__.Layer object at 0x000002425A3D7DC0>, <__main__.Layer object at 0x000002425A3D7340>]


[Value(data=-0.17607025610798982), Value(data=0.33781677368697677)]

In [57]:
nin=3
nouts=[4,4,2]

In [59]:
sz=[nin]+nouts
sz

[3, 4, 4, 2]

In [241]:
xs=[
    [2.0,3.0,-1.0],
    [3.0,-1.0,-0.5],
    [0.5,1.0,1.0],
    [1.0,1.0,-1.0],
]

ys= [1.0,-1.0,-1.0,1.0] #desired targets

In [242]:
ypred=[n(x) for x in xs]
ypred

[Value(data=-0.6291213693049871),
 Value(data=-0.687817240907032),
 Value(data=-0.5328086479467509),
 Value(data=-0.5728024678584366)]

In [243]:
loss=sum([(yout - ygt )**2 for ygt, yout in zip(ys,ypred)])
loss

Value(data=5.443469873335985)

In [244]:
loss.backward()

In [245]:
n.layers[0].neurons[0].w[0].grad

0.012044728327797585

In [246]:
n.layers[0].neurons[0].w[0].data

0.7594731338499234

In [247]:
lr=0.05
for p in n.parameters(): 
    p.data += p.grad * -0.01

In [270]:
for k in range(10000):

    #forward pass 
    ypred=[n(x) for x in xs]
    loss=sum([(yout - ygt )**2 for ygt, yout in zip(ys,ypred)])

    #backward pass 
    for p in n.parameters(): 
        p.grad=00
    loss.backward()

    #update
    for p in n.parameters(): 

        p.data -= p.grad * lr

    print(k,loss.data)

0 0.00025042116847180803
1 0.0002502973580054023
2 0.00025017366571666226
3 0.00025005009143872197
4 0.00024992663500503996
5 0.0002498032962493721
6 0.00024968007500579775
7 0.0002495569711087098
8 0.0002494339843927931
9 0.0002493111146930488
10 0.0002491883618447993
11 0.0002490657256836431
12 0.0002489432060455174
13 0.0002488208027666383
14 0.00024869851568354854
15 0.0002485763446330748
16 0.00024845428945236453
17 0.0002483323499788511
18 0.0002482105260502845
19 0.0002480888175047083
20 0.000247967224180466
21 0.00024784574591620246
22 0.00024772438255085894
23 0.0002476031339236827
24 0.0002474819998742081
25 0.00024736098024227826
26 0.00024724007486801687
27 0.0002471192835918574
28 0.0002469986062545253
29 0.00024687804269703077
30 0.000246757592760702
31 0.00024663725628712343
32 0.00024651703311819793
33 0.00024639692309611513
34 0.00024627692606335596
35 0.0002461570418626792
36 0.0002460372703371512
37 0.00024591761133012385
38 0.0002457980646852172
39 0.000245678630246