In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
from graphviz import Digraph


def trace(root):
    # builds a set of all nodes and edges in a graph
    nodes, edges = set(), set()

    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)
    build(root)
    return nodes, edges


def draw_dot(root):
    dot = Digraph(format='svg', graph_attr={
                  'rankdir': 'LR'})  # LR = left to right

    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        # for any value in the graph, create a rectangular ('record') node for it
        dot.node(name=uid, label="{ %s | data %.4f | grad %.4f }" % (
            n.label, n.data, n.grad), shape='record')
        if n._op:
            # if this value is a result of some operation, create an op node for it
            dot.node(name=uid + n._op, label=n._op)
            # and connect this node to it
            dot.edge(uid + n._op, uid)

    for n1, n2 in edges:
        # connect n1 to the op node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)

    return dot

In [3]:
class Value:

    def __init__(self, data, _children=(), _op='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
        self.label = label

    def __repr__(self):
        return f'Value(data={self.data})'

    def __add__(self, other_instance):
        other_instance = other_instance if isinstance(
            other_instance, Value) else Value(other_instance)
        out = Value(self.data + other_instance.data,
                    (self, other_instance), '+')

        def _backward():
            self.grad += out.grad * 1.0
            other_instance.grad += out.grad * 1.0
        out._backward = _backward
        return out

    def __mul__(self, other_instance):
        other_instance = other_instance if isinstance(
            other_instance, Value) else Value(other_instance)
        out = Value(self.data * other_instance.data,
                    (self, other_instance), '*')

        def _backward():
            self.grad += out.grad * other_instance.data
            other_instance.grad += out.grad * self.data
        out._backward = _backward
        return out

    def __pow__(self, exponent):
        assert isinstance(exponent, (int, float)
                          ), "Only supporting int/float powers for now."
        out = Value(self.data ** exponent, (self, ), f'**{exponent}')

        def _backward():
            self.grad += out.grad * (exponent * self.data ** (exponent - 1))
        out._backward = _backward
        return out

    def __radd__(self, other_instance):
        return self + other_instance

    def __rmul__(self, other_instance):
        return self * other_instance

    def __truediv__(self, other_instance):
        return self * other_instance**-1

    def __neg__(self):
        return self * (-1)

    def __sub__(self, other_instance):
        return self + (-other_instance)
    
    def __rsub__(self, other_instance):
        return (-self) + other_instance

    def tanh(self):
        value = self.data
        tanh = (math.exp(2 * value) - 1) / (math.exp(2 * value) + 1)
        out = Value(tanh, (self, ), 'tanh')

        def _backward():
            self.grad += out.grad * (1 - tanh**2)
        out._backward = _backward
        return out

    def exp(self):
        value = self.data
        out = Value(math.exp(value), (self, ), 'exp')

        def _backward():
            self.grad += out.grad * out.data
        out._backward = _backward
        return out

    def backward(self):
        topo = []
        visited = set()

        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)

        build_topo(self)
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()

In [4]:
class Neuron:
    def __init__(self, nin):
        self.weights = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.bias = Value(random.uniform(-1, 1))

    def __call__(self, x):
        summation = sum(
            (wi * xi for wi, xi in zip(self.weights, x)), self.bias)
        out = summation.tanh()
        return out


# test
a = Neuron(3)
x = [1.0, 2.0, 3.0]
b = a(x)
print(b)

Value(data=0.9221436839720673)


In [5]:
class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [neuron(x) for neuron in self.neurons]
        return outs


# test
c = Layer(2, 3)
x = [2.0, 3.0]
d = c(x)
print(d)

[Value(data=0.45191879723585127), Value(data=-0.9985516242974392), Value(data=-0.9916946576048227)]


In [6]:
class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(sz) - 1)]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x


# test
e = MLP(3, [4, 4, 1])
x = [2.0, 3.0, -1.0]
f = e(x)
print(f)

[Value(data=-0.7783268057732532)]


In [7]:
# assemble together

class Neuron:
    def __init__(self, nin):
        self.weights = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.bias = Value(random.uniform(-1, 1))

    def __call__(self, x):
        summation = sum(
            (wi * xi for wi, xi in zip(self.weights, x)), self.bias)
        out = summation.tanh()
        return out

    def parameters(self):
        return self.weights + [self.bias]


class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [neuron(x) for neuron in self.neurons]
        return outs[0] if len(outs) == 1 else outs

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]


class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(sz) - 1)]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [8]:
n = MLP(3, [4, 4, 1])

In [9]:
xs = [[2.0, 3.0, -1.0],
      [3.0, -1.0, 0.5],
      [0.5, 1.0, 1.0],
      [1.0, 1.0, -1.0]]
ys = [1.0, -1.0, -1.0, 1.0]

In [10]:
for k in range(50):
    ypred = [n(x) for x in xs]
    loss = sum((ygt - yout)**2 for ygt, yout in zip(ys, ypred))

    for p in n.parameters():
        p.grad = 0.0
    loss.backward()

    for p in n.parameters():
        p.data += -0.01 * p.grad

    if k % 10 == 0:
        for i in ypred:
            print(i)

    print(k, loss.data)

Value(data=0.45204733652207435)
Value(data=-0.71594076785818)
Value(data=0.4637364080775739)
Value(data=0.48721183132176427)
0 2.78641774704577
1 2.7283892064353763
2 2.675178826005084
3 2.62605295626099
4 2.5802019487470713
5 2.5368144838484428
6 2.4951287984763173
7 2.4544571654781784
8 2.4141894005389513
9 2.373783695966012
Value(data=0.3244831440601312)
Value(data=-0.7863099717777668)
Value(data=0.22467455885916185)
Value(data=0.42472826747430775)
10 2.3327517921804986
11 2.2906430694968574
12 2.2470300111078934
13 2.201496099389689
14 2.153626507932747
15 2.1030017383715607
16 2.0491944362744583
17 1.9917698746002568
18 1.9302909350894926
19 1.8643287878732042
Value(data=0.3156914596066604)
Value(data=-0.829493478552485)
Value(data=0.028090640715615154)
Value(data=0.5109603420293509)
20 1.7934808049064883
21 1.7173974626914164
22 1.6358199970877518
23 1.548630275129282
24 1.4559136650366844
25 1.3580344288197184
26 1.2557205880291158
27 1.1501492068263972
28 1.0430099710468013
29 

In [11]:
for k in range(450):
    ypred = [n(x) for x in xs]
    loss = sum((ygt - yout)**2 for ygt, yout in zip(ys, ypred))

    for p in n.parameters():
        p.grad = 0.0
    loss.backward()

    for p in n.parameters():
        p.data += -0.01 * p.grad

    if k % 10 == 0:
        for i in ypred:
            print(i)

    print(k, loss.data)

Value(data=0.8155079794618378)
Value(data=-0.8935618971977708)
Value(data=-0.752017455248716)
Value(data=0.8444410090021381)
0 0.131060317551987
1 0.12483367034666197
2 0.11917010678585443
3 0.1139981675191809
4 0.10925765139914603
5 0.10489755128750076
6 0.10087441280902697
7 0.09715102201226915
8 0.09369535003501961
9 0.09047969962722417
Value(data=0.852594141676935)
Value(data=-0.9017011800996594)
Value(data=-0.8040390315811201)
Value(data=0.8670031396589483)
10 0.08748001106600156
11 0.0846752946145661
12 0.0820471639917863
13 0.07957945090417692
14 0.07725788497484537
15 0.07506982670323105
16 0.07300404364354596
17 0.07105052197673814
18 0.0692003072042192
19 0.0674453689121137
Value(data=0.8740341009873138)
Value(data=-0.908178480936524)
Value(data=-0.8341849699331424)
Value(data=0.8817406991176522)
20 0.06577848551845311
21 0.06419314568028779
22 0.06268346364713108
23 0.061244106335198584
24 0.059870230289511515
25 0.058557427018138854
26 0.05730167544023339
27 0.0560993003992

In [12]:
ypred

[Value(data=0.9657048797405967),
 Value(data=-0.965158207410294),
 Value(data=-0.957978615376185),
 Value(data=0.9625905046626972)]