In [1]:
import torch
import random
import math

In [2]:

x1 = torch.Tensor([2.0]).double()
x1.requires_grad = True
x2 = torch.Tensor([0.0]).double()
x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()
w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()
w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double()
b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print("-"*10)
print("x2", x2.grad.item())
print("w2", w2.grad.item())
print("x1", x1.grad.item())
print("w1", w1.grad.item())

0.7071066904050358
----------
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737


In [3]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v.prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n.op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n.op, label = n.op)
      # and connect this node to it
      dot.edge(uid + n.op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2.op)

  return dot

In [4]:
class Value:
    def __init__(self, data, __children=(), op="", label=""):
        self.data = data
        self.grad = 0.0
        self.backward = lambda: None
        self.prev = set(__children)
        self.op = op
        self.label = label

    def __repr__(self):
        return f"Value=(data={self.data})"

    def __add__(self, other):
        if type(other) != Value:
            raise ValueError(f"Other is not of type 'Value', other is of type '{type(other)}'")

        out = Value(self.data + other.data, (self, other), '+', label="v")

        def backward():
            self.grad += out.grad
            other.grad += out.grad
        
        out.backward = backward
        return out
    
    def __sub__(self, other):
        if type(other) != Value:
            raise ValueError(f"Other is not of type 'Value', other is of type '{type(other)}'")
    
        out = Value(self.data - other.data, (self, other), '-', label="v")

        def backward():
            self.grad += out.grad
            other.grad += -out.grad
        
        out.backward = backward
        return out
    
    def __mul__(self, other):
        if type(other) != Value:
            raise ValueError(f"Other is not of type 'Value', other is of type '{type(other)}'")

        out = Value(self.data * other.data, (self, other), '*', label="v")

        def backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        
        out.backward = backward
        return out
    
    def __pow__(self, other):
        if type(other) != Value:
            raise ValueError(f"Other is not of type 'Value', other is of type '{type(other)}'")

        out = Value(self.data ** other.data, (self, other), "**", label="v")

        def backward():
            self.grad += (other.data * self.data ** (other.data-1)) * out.grad
        
        out.backward = backward
        return out

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1) / (math.exp(2*x) + 1)
        out = Value(t, (self,), "tanh", label="o")

        def backward():
            self.grad += (1 - t**2) * out.grad
        
        out.backward = backward
        return out
    
    def backwards(self):
        self.grad = 1.0
        topo = []
        visited = set()
        def build_topo(node):
            if node in visited:
                return

            visited.add(node)
            for child in node.prev:
                build_topo(child)

            topo.append(node)
        
        build_topo(self)
        for node in reversed(topo):
            node.backward()

class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1), label="w") for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1), label="b")

    def __call__(self, x):
        out = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
        return out.tanh()
    
    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        if len(outs) == 1:
            return outs[0]

        return outs
    
    def parameters(self):
        params = []
        for neuron in self.neurons:
            ps = neuron.parameters()
            params.extend(ps)
        
        return params

class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        
        return x
    
    def parameters(self):
        params = []
        for layer in self.layers:
            ps = layer.parameters()
            params.extend(ps)
        
        return params


In [5]:
# Data Init
xs = [
    [Value(2.0, label="i1"), Value(3.0, label="i2"), Value(-1.0, label="i3")],
    [Value(3.0, label="i1"), Value(-1.0, label="i2"), Value(0.5, label="i3")],
    [Value(0.5, label="i1"), Value(1.0, label="i2"), Value(1.0, label="i3")],
    [Value(1.0, label="i1"), Value(1.0, label="i2"), Value(-1.0, label="i3")]
]
ys = [Value(1.0, label="o1"), Value(-1.0, label="o2"), Value(-1.0, label="o3"), Value(1.0, label="o4")]

# Network Init
nn = MLP(3, [4, 4, 1])

In [6]:
LEARNING_RATE = 0.01
for k in range(100):
    # Forward Pass
    ypreds = [nn(x) for x in xs]
    loss = sum(((y - ypred)**Value(2.0) for y, ypred in zip(ys, ypreds)), Value(0.0))

    for p in nn.parameters():
        p.grad = 0.0
    # Backawrd Pass
    loss.backwards()

    # Update
    for p in nn.parameters():
        p.data += -LEARNING_RATE * p.grad
    
    print(k, loss.data)

0 9.065074237785792
1 8.16639801236619
2 6.900970488365216
3 5.392139518206354
4 4.181512143008223
5 3.6308872283459204
6 3.3409548364965853
7 3.104669739096446
8 2.8758890517072637
9 2.645003256085407
10 2.4132804696285763
11 2.1866727951502254
12 1.9726542185166076
13 1.777504494657832
14 1.6045235773090425
15 1.4538087458964426
16 1.3232535033468613
17 1.2098415239180142
18 1.1105748184498725
19 1.0229049809011905
20 0.9448232958560516
21 0.8747936013112828
22 0.8116432088690656
23 0.7544627370290532
24 0.7025293073565473
25 0.6552523241366797
26 0.6121366822791764
27 0.5727581857674094
28 0.5367471231154791
29 0.5037771656870509
30 0.47355770362381866
31 0.4458283949117831
32 0.4203551411087467
33 0.396926987654709
34 0.37535363013019135
35 0.35546332594860897
36 0.3371010867978327
37 0.32012707542006746
38 0.3044151605620831
39 0.2898516024018045
40 0.2763338516396962
41 0.2637694515359888
42 0.25207503534582437
43 0.2411754130785545
44 0.23100274206602706
45 0.22149577594709494
4