In [26]:
import math
class Value:

    def __init__(self, data, _children=(), _op=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op


    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def __pow__(self, power):
        assert isinstance(power, (int, float))
        out = Value(self.data ** power, (self,), f'**{power}')

        def _backward():
            self.grad += power * (self.data ** (power - 1)) * out.grad
        out._backward = _backward

        return out

    def tanh(self):
        t = math.tanh(self.data)
        out = Value(t, (self,), 'tanh')

        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward

        return out

    def backward(self):
        topo = []
        visited = set()

        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)

        build_topo(self)

        self.grad = 1.0
        for v in reversed(topo):
            v._backward()

    def __neg__(self):
        return self * -1

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self, other):
        return other + (-self)

    def __radd__(self, other):
        return self + other

    def __rmul__(self, other):
        return self * other

    def __truediv__(self, other):
        return self * other**-1

    def __rtruediv__(self, other):
        return other * self**-1

    def __repr__(self):
        return f"Value(data={self.data}, grad={self.grad})"


In [27]:
x = Value(2.0)
y = x.tanh()
y.backward()

print("tanh(x) =", y.data)
print("dy/dx =", x.grad)


tanh(x) = 0.9640275800758169
dy/dx = 0.07065082485316443


In [28]:
import random

In [29]:
class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

class Neuron(Module):

    def __init__(self, nin, nonlin=True):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(0)
        self.nonlin = nonlin

    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.tanh() if self.nonlin else act

    def parameters(self):
        return self.w + [self.b]

    def __repr__(self):
        return f"{'tanh' if self.nonlin else 'Linear'}Neuron({len(self.w)})"

class Layer(Module):

    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

In [30]:
def print_graph(v, indent=0, visited=None):
    if visited is None:
        visited = set()
    if v in visited:
        return
    visited.add(v)
    print("  " * indent + f"{v._op} | data={v.data:.4f} | grad={v.grad:.4f}")                         #Graphviz wasnt working 
    for child in v._prev:
        print_graph(child, indent + 1, visited)



In [33]:

loss.backward()
print_graph(loss)

* | data=0.0058 | grad=1.0000
  + | data=0.0230 | grad=0.5000
    + | data=0.0230 | grad=0.7500
      **2 | data=0.0009 | grad=1.0000
        + | data=0.0305 | grad=0.0764
          * | data=-1.0000 | grad=0.0916
             | data=1.0000 | grad=0.6774
             | data=-1.0000 | grad=0.1069
          + | data=1.0305 | grad=0.0916
            + | data=0.7646 | grad=0.1069
              + | data=0.7696 | grad=0.1222
                * | data=0.8059 | grad=0.1375
                  tanh | data=-0.9695 | grad=-0.1275
                    + | data=-2.0832 | grad=-0.0084
                      * | data=-1.7554 | grad=-0.0092
                         | data=2.0000 | grad=-0.0031
                         | data=-0.8788 | grad=0.3131
                      + | data=-0.3278 | grad=-0.0092
                        * | data=-0.3445 | grad=-0.0100
                           | data=0.3448 | grad=-0.1049
                           | data=-1.0000 | grad=0.1527
                         | data=0.0173 | gr

In [23]:
def accuracy(y_pred, y_true):
    correct = 0
    for yp, yt in zip(y_pred, y_true):
        pred = 1 if yp.data > 0 else -1
        if pred == yt.data:
            correct += 1
    return correct / len(y_true)
def mse_loss(y_pred, y_true):
    return sum((yp - yt)**2 for yp, yt in zip(y_pred, y_true)) / len(y_true)


In [36]:
learning_rate = 0.05
model = MLP(2, [4, 1])
epochs = 20

xs = [[Value(2.0), Value(3.0)],[Value(1.0), Value(-1.0)],[Value(-1.0), Value(2.0)],[Value(-2.0), Value(-3.0)]]
ys = [Value(1.0),Value(-1.0),Value(1.0),Value(-1.0)]

for epoch in range(epochs):
    y_pred = [model(x) for x in xs]
    loss = mse_loss(y_pred, ys)
    model.zero_grad()
    loss.backward()
    for p in model.parameters():
        p.data -= learning_rate * p.grad
    acc = accuracy(y_pred, ys)
    print(f"Epoch {epoch:02d} | Loss = {loss.data:.4f} | Accuracy = {acc*100:.2f}%")


Epoch 00 | Loss = 4.1485 | Accuracy = 25.00%
Epoch 01 | Loss = 1.9259 | Accuracy = 50.00%
Epoch 02 | Loss = 0.5665 | Accuracy = 100.00%
Epoch 03 | Loss = 0.1753 | Accuracy = 100.00%
Epoch 04 | Loss = 0.0734 | Accuracy = 100.00%
Epoch 05 | Loss = 0.0354 | Accuracy = 100.00%
Epoch 06 | Loss = 0.0186 | Accuracy = 100.00%
Epoch 07 | Loss = 0.0105 | Accuracy = 100.00%
Epoch 08 | Loss = 0.0064 | Accuracy = 100.00%
Epoch 09 | Loss = 0.0042 | Accuracy = 100.00%
Epoch 10 | Loss = 0.0030 | Accuracy = 100.00%
Epoch 11 | Loss = 0.0023 | Accuracy = 100.00%
Epoch 12 | Loss = 0.0018 | Accuracy = 100.00%
Epoch 13 | Loss = 0.0016 | Accuracy = 100.00%
Epoch 14 | Loss = 0.0014 | Accuracy = 100.00%
Epoch 15 | Loss = 0.0012 | Accuracy = 100.00%
Epoch 16 | Loss = 0.0011 | Accuracy = 100.00%
Epoch 17 | Loss = 0.0011 | Accuracy = 100.00%
Epoch 18 | Loss = 0.0010 | Accuracy = 100.00%
Epoch 19 | Loss = 0.0010 | Accuracy = 100.00%
