In [407]:
import math

class Value():
    def __init__(self, data, _children=(), ):
        self.data = data
        self.grad = 0
        self._backward = lambda: None
        self._prev = set(_children)

    def __repr__(self):
        return f"Value(data={self.data})"

    def __add__(self, target):
        target = target if isinstance(target, Value) else Value(target)
        out = Value(self.data + target.data, (self, target))

        def _backward():
            self.grad += out.grad
            target.grad += out.grad
        out._backward = _backward
        return out

    def __radd__(self, target):
        return self + target

    def __neg__(self):
        return -1 * self

    def __sub__(self, target):
        return self + (-target)

    def __rsub__(self, target):
        return target + (-self)
    
    def __mul__(self, target):
        target = target if isinstance(target, Value) else Value(target)
        out = Value(self.data * target.data, (self, target))

        def _backward():
            self.grad += target.data * out.grad 
            target.grad += self.data * out.grad
        out._backward = _backward
        return out

    def __rmul__(self, target):
        return self * target

    def __pow__(self, other):
        out = Value(self.data**other, (self, ))

        def _backward():
            self.grad += other*(self.data**(other-1)) * out.grad
        out._backward = _backward
        return out   

    def __truediv__(self, other):
        return self * other**-1

    def __rtruediv__(self, other):
        return other * self**-1

    def exp(self):
        out = Value(math.exp(self.data), (self, ))

        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        return out

    def sigmoid(self):
        out = Value(math.exp(self.data)/(1+math.exp(self.data)), (self, ))

        def _backward():
            self.grad += out.data * (1-out.data) * out.grad
        out._backward = _backward
        return out

    def tanh(self):
        out = Value((math.exp(2*self.data) - 1)/(math.exp(2*self.data) + 1), (self, ))

        def _backward():
            self.grad += (1 - out.data**2) * out.grad
        out._backward = _backward
        return out

    def relu(self):
        out = Value(max(0, self.data), (self, ))

        def _backward():
            self.grad += (self.data > 0) * out.grad
        out._backward = _backward
        return out

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if not v in visited:
                visited.add(v)
                for p in v._prev:
                    build_topo(p)
                topo.append(v)
        build_topo(self)

        self.grad = 1.0
        for v in reversed(topo):
            v._backward()  

In [468]:
import random
class Neuron():
    def __init__(self, inputs):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(inputs)]
        self.b = Value(random.uniform(-1,1))

    def __call__(self, x):
        out = sum([xi*wi for wi, xi in zip(self.w, x)], self.b)
        return out.tanh()

    def parameters(self):
        return self.w + [self.b]

class Layer():
    def __init__(self, inputs, neurons):
        self.neurons = [Neuron(inputs) for _ in range(neurons)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

class MLP():
    def __init__(self, inputs, layer_sizes):
        ls = [inputs] + layer_sizes
        self.layers = [Layer(ls[i], ls[i+1]) for i, _ in enumerate(layer_sizes)]

    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x

    def parameters(self):
        return [p for l in self.layers for n in l.neurons for p in n.parameters()]

In [493]:
model = MLP(3, [4, 4, 1])

In [494]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

ys = [1.0, -1.0, -1.0, 1.0]

ypred = [model(x) for x in xs]
ypred

[Value(data=-0.8715807949720276),
 Value(data=-0.8867550647457126),
 Value(data=-0.8575604278322922),
 Value(data=-0.8529394047762934)]

In [495]:
loss = sum((yout - ygt)**2 for yout, ygt in zip(ypred, ys))
loss

Value(data=6.969312556960919)

In [501]:
from tqdm import tqdm
epochs = 10000
lr = 0.01

for epoch in tqdm(range(epochs)):
    ypred = [model(x) for x in xs]

    loss = sum((you - ygt)**2 for you, ygt in zip(ypred, ys))

    for p in model.parameters():
        p.grad = 0.0
    
    loss.backward()

    for p in model.parameters():
        p.data -= lr * p.grad

    if (epoch % 1000 == 0):
        print(f"(Epoch: {epoch} | Loss: {loss.data: .4f}")

  2%|█▋                                                                   | 239/10000 [00:00<00:07, 1238.96it/s]

(Epoch: 0 | Loss:  4.6310


 13%|████████▋                                                           | 1272/10000 [00:00<00:06, 1429.07it/s]

(Epoch: 1000 | Loss:  0.0031


 22%|██████████████▊                                                     | 2173/10000 [00:01<00:05, 1414.84it/s]

(Epoch: 2000 | Loss:  0.0015


 32%|█████████████████████▉                                              | 3225/10000 [00:02<00:04, 1457.05it/s]

(Epoch: 3000 | Loss:  0.0010


 43%|████████████████████████████▉                                       | 4261/10000 [00:02<00:03, 1443.09it/s]

(Epoch: 4000 | Loss:  0.0007


 53%|███████████████████████████████████▉                                | 5288/10000 [00:03<00:03, 1435.77it/s]

(Epoch: 5000 | Loss:  0.0006


 62%|█████████████████████████████████████████▉                          | 6164/10000 [00:04<00:02, 1424.17it/s]

(Epoch: 6000 | Loss:  0.0005


 72%|████████████████████████████████████████████████▉                   | 7197/10000 [00:05<00:02, 1389.38it/s]

(Epoch: 7000 | Loss:  0.0004


 82%|███████████████████████████████████████████████████████▉            | 8225/10000 [00:05<00:01, 1424.18it/s]

(Epoch: 8000 | Loss:  0.0004


 92%|██████████████████████████████████████████████████████████████▊     | 9244/10000 [00:06<00:00, 1410.83it/s]

(Epoch: 9000 | Loss:  0.0003


100%|███████████████████████████████████████████████████████████████████| 10000/10000 [00:07<00:00, 1425.84it/s]


In [502]:
ypred = [model(x) for x in xs]
ypred

[Value(data=0.9920577218425359),
 Value(data=-0.9926791456006598),
 Value(data=-0.9904678691716146),
 Value(data=0.991101097916135)]

In [403]:
a = Value(-4.0)
b = Value(2.0)
c = a + b
d = a * b + b**3
c += c + 1
c += 1 + c + (-a)
d += d * 2 + (b + a).relu()
d += 3 * d + (b - a).relu()
e = c - d
f = e**2
g = f / 2.0
g += 10.0 / f
print(f'{g.data:.4f}') # prints 24.7041, the outcome of this forward pass
g.backward()
print(f'{a.grad:.4f}') # prints 138.8338, i.e. the numerical value of dg/da
print(f'{b.grad:.4f}') # prints 645.5773, i.e. the numerical value of dg/db

24.7041
138.8338
645.5773
