In [19]:
import math


class Value:
    def __init__(self,data,_children=(),_op='',label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda:None
        self._prev = set(_children)
        self._op = _op
        self.label = label

    def __repr__(self):
        return f"Value(data={self.data},Grad = {self.grad})"
        
    def __add__(self,other):
        other = other if isinstance(other,Value) else Value(other)
        out = Value(self.data + other.data, (self,other),'+')

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward

        return out

    def __mul__(self,other):
        other = other if isinstance(other,Value) else Value(other)
        out = Value(self.data * other.data, (self,other),'*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float))
        out = Value(self.data ** other, (self,), f'**{other}')

        def _backward():
            self.grad += other * (self.data ** (other - 1)) * out.grad
        out._backward = _backward

        return out
    
    def __rmul__(self,other):
        return self * other

    def __truediv__(self,other):
        return self * other**-1

    def __rtruediv__(self, other): 
        return other * self**-1

    def __radd__(self, other):
        return self + other

    def __neg__(self,other):
        return self * -1

    def __sub__(self,other):
        return self + (-other)

    def __rsub__(self, other): 
        return other + (-self)

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self, ), 'tanh')

        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward

        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')

        def _backward():
            self.grad = out.data * out.grad
        out._backward = _backward
        return out

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        self.grad = 1.0
        for node in reversed(topo):
            node._backward()

import random

class Neuron:

    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))

    def __call__(self,x):
        act = sum((wi*xi for wi,xi in zip(self.w,x)), self.b)
        out = act.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]

class Layer:

    def __init__(self,nin,nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self,x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:

    def __init__(self,nin,nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

    def __call__(self,x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

x = [2.0, 3.0, -1.0]
n = MLP(3, [5, 5, 1])
n(x)



Value(data=-0.7160858784900538,Grad = 0.0)

In [20]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
    [2.0,2.1,1.0,1.5]
]
ys = [1.0, -1.0, -1.0, 1.0, 2.0]



In [29]:
for k in range(30):
    ypred = [n(x) for x in xs]
    loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))

    for p in n.parameters():
        p.grad = 0.0
    loss.backward()

    for p in n.parameters():
        p.data += -0.05 * p.grad

    print(k+1, loss.data)

1 1.0141342043848824
2 1.0139641914685136
3 1.01379823268269
4 1.0136361851093882
5 1.013477912440704
6 1.013323284602886
7 1.0131721774056115
8 1.0130244722145811
9 1.012880055645626
10 1.012738819278725
11 1.0126006593904262
12 1.0124654767033234
13 1.0123331761513281
14 1.0122036666596002
15 1.0120768609380772
16 1.0119526752876433
17 1.0118310294180373
18 1.0117118462766945
19 1.0115950518877554
20 1.0114805752005531
21 1.0113683479469344
22 1.011258304506821
23 1.011150381781465
24 1.0110445190738895
25 1.0109406579760465
26 1.010838742262257
27 1.0107387177885274
28 1.0106405323973762
29 1.0105441358278133
30 1.0104494796301602


In [30]:
ypred

[Value(data=0.998615534355601,Grad = -0.00276893128879796),
 Value(data=-0.952889235478447,Grad = 0.0942215290431061),
 Value(data=-0.9568569237480328,Grad = 0.08628615250393445),
 Value(data=0.9951277092583197,Grad = -0.009744581483360548),
 Value(data=0.9968334761836998,Grad = -2.0063330476326007)]

In [23]:
n.parameters()

[Value(data=0.6712138326907527,Grad = 0.0005762188828185056),
 Value(data=0.5457807931043493,Grad = 6.494571448301963e-05),
 Value(data=0.9959010236728167,Grad = -0.0009073112074947226),
 Value(data=0.6221857793121803,Grad = 0.00016261560789872839),
 Value(data=-0.7766811050916552,Grad = -0.01207483360149936),
 Value(data=0.42210557987398006,Grad = -0.01658781981845842),
 Value(data=0.30209321765772323,Grad = -0.006205947340774232),
 Value(data=0.6133087256582984,Grad = -0.005365352060037961),
 Value(data=0.12678273375888374,Grad = 0.019837008638848023),
 Value(data=-0.8572376457898446,Grad = 0.031590350905787),
 Value(data=0.8406778418071976,Grad = -0.022927028689767513),
 Value(data=-0.33218561356144494,Grad = -0.026845244789331136),
 Value(data=-0.8976295953200736,Grad = 0.020097570424692814),
 Value(data=-0.3842475559790518,Grad = 0.006694636756271424),
 Value(data=0.2874583107880269,Grad = -0.032158207438397236),
 Value(data=1.0153823047369095,Grad = -0.027338470568068634),
 Value

In [24]:
len(n.parameters())

56