In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [154]:
from graphviz import Digraph

def trace(root):
    # builds a set of all nodes and edges in a graph
    nodes, edges = set(), set()

    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)

    build(root)
    return nodes, edges


def draw_dot(root):
    dot = Digraph(format="svg", graph_attr={"rankdir": "LR"})  # LR = left to right

    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        # for any value in the graph, create a rectangular ('record') node for it
        dot.node(name=uid, label="{ %s | data %.4f | grad %.4f}" % (n.label, n.data, n.grad,), shape="record")

        if n._op:
            # if this value is a result of some operation, create an op node for it
            dot.node(name=uid + n._op, label=n._op)
            # and connect this node to it
            dot.edge(uid + n._op, uid)

    for n1, n2 in edges:
        # connect n1 to the op node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)

    return dot

In [155]:
class Value:
    def __init__(self ,data ,_children=(), _op='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
        self.label = label

    def __repr__(self):
        return f"Value(data={self.data})"

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward

        return out
    
    def __radd__(self, other):
        return self + other

    def __sub__(self, other):
        return self + (-other)
    
    def __neg__(self):
        return self * -1

    def __mul__(self, other):
        other = other if isinstance( other, Value) else Value(other)
        out = Value(self.data * other.data , (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
            
        out._backward = _backward

        return out

    def __rmul__(self, other):
        return self * other
    
    def __truediv__(self, other):
        return self * other**-1

    def __pow__(self, other):
        assert isinstance(other, (int, float))
        out = Value(self.data**other, (self,), 'pow')

        def _backward():
            self.grad += other * (self.data ** (other - 1)) * out.grad
        out._backward = _backward

        return out
    
    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self,), 'exp')

        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward

        return out 
    
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1) / (math.exp(2*x) + 1)
        out = Value(t, (self,), 'tanh')

        def _backward():
            self.grad = (1 - t**2) * out.grad

        out._backward = _backward

        return out

    def backward(self):
        topo = []
        visited = set()
        
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        self.grad = 1.0
        for node in reversed(topo):
            node._backward()



In [156]:
class Neuron:
    def __init__(self, nin):
        self.w = [Value(np.random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(np.random.uniform(-1,1))

    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w,x)), self.b)
        return act.tanh() 

    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [157]:
x = [2.0, 3.0, -1.0]
mlp = MLP(3, [4, 4, 1])
print(mlp(x))

Value(data=-0.5992375648994782)


In [158]:
print(mlp.parameters())
print(len(mlp.parameters()))

[Value(data=0.2856164769992924), Value(data=0.829491087038833), Value(data=-0.8241300433776473), Value(data=-0.021863898960120975), Value(data=0.8456491230272654), Value(data=0.8905489381366243), Value(data=-0.634936340321693), Value(data=0.1717848037249019), Value(data=-0.11893291180242715), Value(data=0.1946579279152687), Value(data=0.4806020770799728), Value(data=-0.8842687307698123), Value(data=0.0476534519720595), Value(data=-0.7964900188865289), Value(data=0.2762122376078642), Value(data=0.11684188562997488), Value(data=0.03348605033825924), Value(data=0.6015311566384114), Value(data=-0.7689541451647546), Value(data=0.7677798719612567), Value(data=-0.02459519355915263), Value(data=-0.24141696017834247), Value(data=0.4990749466692386), Value(data=-0.850944892426384), Value(data=-0.03435385503020982), Value(data=0.6717540203662693), Value(data=0.3273581225158382), Value(data=0.3673780245391556), Value(data=0.8634358501246815), Value(data=-0.38470631122072896), Value(data=-0.1096973

In [159]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 2.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0]
ypred = [mlp(x) for x in xs]
ypred

[Value(data=-0.5992375648994782),
 Value(data=0.058731446879178514),
 Value(data=-0.3294919561196667),
 Value(data=-0.5887996559988128)]

In [160]:
loss = [(yout - ygt)**2 for ygt, yout in zip(ys, ypred)]
loss = sum(loss,0)
loss
    

Value(data=6.6523384494066695)

In [161]:
loss.backward()

In [162]:
for p in mlp.parameters():
    p.data += -0.01 * p.grad

In [163]:
# new loss
ypred = [mlp(x) for x in xs]
loss = [(yout - ygt)**2 for ygt, yout in zip(ys, ypred)]
loss = sum(loss)
loss

Value(data=6.317441609234911)

In [164]:
def train(val):
    # forward pass
    ypred = [mlp(x) for x in xs]
    loss = sum([(yout - ygt)**2 for ygt, yout in zip(ys, ypred)])

    # zero gradients before backward pass
    for p in mlp.parameters():
        p.grad = 0.0
    
    # backward pass
    loss.backward() # gradient descent algo

    # update weights using gradients
    for p in mlp.parameters():
        p.data += -0.05 * p.grad

    print("iter ", val, "loss ", loss.data)


In [165]:
for i in range(100):
    train(i)

iter  0 loss  6.317441609234911
iter  1 loss  4.696570665979875
iter  2 loss  3.6556744194484994
iter  3 loss  3.177821173125466
iter  4 loss  2.881715967650859
iter  5 loss  2.6347939261569557
iter  6 loss  2.343212629794987
iter  7 loss  1.9000368304972688
iter  8 loss  1.2762820391090306
iter  9 loss  0.7485536297320639
iter  10 loss  0.4706883554430532
iter  11 loss  0.32915375022954496
iter  12 loss  0.2483464233471812
iter  13 loss  0.19734644260675954
iter  14 loss  0.16267830566647562
iter  15 loss  0.13777587629697405
iter  16 loss  0.119120177581792
iter  17 loss  0.10467649284900188
iter  18 loss  0.09319478523423372
iter  19 loss  0.08386844160433204
iter  20 loss  0.0761555157620263
iter  21 loss  0.06967938302124846
iter  22 loss  0.06417071748610534
iter  23 loss  0.05943214557304995
iter  24 loss  0.05531592110088682
iter  25 loss  0.05170937633939627
iter  26 loss  0.04852517746190569
iter  27 loss  0.04569463851113774
iter  28 loss  0.04316303460507102
iter  29 loss  

In [166]:
ypred = [mlp(x) for x in xs]
ypred

[Value(data=0.9640100399465161),
 Value(data=-0.958690761496502),
 Value(data=-0.9493516264179659),
 Value(data=0.9495628350373376)]