In [2]:
import math


class Value:
    def __init__(self,data,_children=(),_op='',label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda:None
        self._prev = set(_children)
        self._op = _op
        self.label = label

    def __repr__(self):
        return f"Value(data={self.data},Grad = {self.grad})"
        
    def __add__(self,other):
        other = other if isinstance(other,Value) else Value(other)
        out = Value(self.data + other.data, (self,other),'+')

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward

        return out

    def __mul__(self,other):
        other = other if isinstance(other,Value) else Value(other)
        out = Value(self.data * other.data, (self,other),'*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float))
        out = Value(self.data ** other, (self,), f'**{other}')

        def _backward():
            self.grad += other * (self.data ** (other - 1)) * out.grad
        out._backward = _backward

        return out
    
    def __rmul__(self,other):
        return self * other

    def __truediv__(self,other):
        return self * other**-1

    def __rtruediv__(self, other): 
        return other * self**-1

    def __radd__(self, other):
        return self + other

    def __neg__(self,other):
        return self * -1

    def __sub__(self,other):
        return self + (-other)

    def __rsub__(self, other): 
        return other + (-self)

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self, ), 'tanh')

        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward

        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')

        def _backward():
            self.grad = out.data * out.grad
        out._backward = _backward
        return out

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        self.grad = 1.0
        for node in reversed(topo):
            node._backward()

import random

class Neuron:

    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))

    def __call__(self,x):
        act = sum((wi*xi for wi,xi in zip(self.w,x)), self.b)
        out = act.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]

class Layer:

    def __init__(self,nin,nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self,x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:

    def __init__(self,nin,nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

    def __call__(self,x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

x = [2.0, 3.0, -1.0]
n = MLP(3, [5, 5, 1])
n(x)



Value(data=-0.779189178876498,Grad = 0.0)

In [3]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
    [2.0,2.1,1.0,1.5]
]
ys = [1.0, -1.0, -1.0, 1.0, 2.0]



In [51]:
for k in range(30):
    ypred = [n(x) for x in xs]
    loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))

    for p in n.parameters():
        p.grad = 0.0
    loss.backward()

    for p in n.parameters():
        p.data += -0.05 * p.grad

    print(k+1, loss.data)

1 1.0016095033099373
2 1.0016069267186598
3 1.001604358168975
4 1.0016017976236653
5 1.0015992450457403
6 1.0015967003984363
7 1.0015941636452148
8 1.0015916347497593
9 1.0015891136759751
10 1.0015866003879872
11 1.0015840948501376
12 1.0015815970269846
13 1.0015791068833024
14 1.0015766243840765
15 1.001574149494505
16 1.001571682179995
17 1.0015692224061628
18 1.0015667701388302
19 1.0015643253440245
20 1.0015618879879766
21 1.0015594580371199
22 1.001557035458088
23 1.0015546202177135
24 1.0015522122830263
25 1.0015498116212531
26 1.0015474181998143
27 1.0015450319863253
28 1.0015426529485918
29 1.00154028105461
30 1.001537916272565


In [52]:
ypred

[Value(data=0.9996719235539867,Grad = -0.0006561528920265847),
 Value(data=-0.9935569663778211,Grad = 0.012886067244357813),
 Value(data=-0.9855775873719455,Grad = 0.028844825256109097),
 Value(data=0.999437901878043,Grad = -0.0011241962439139108),
 Value(data=0.9993562202182829,Grad = -2.0012875595634343)]

In [53]:
n.parameters()

[Value(data=0.38841889379116595,Grad = -0.00020038012713870858),
 Value(data=0.6002204667487516,Grad = -0.0008172477800786889),
 Value(data=-0.5122815775357037,Grad = 0.0006307000478415622),
 Value(data=-0.6411361173284622,Grad = 0.0007819112792396479),
 Value(data=1.1569482048954214,Grad = -0.0005821253194531913),
 Value(data=0.050865683552161076,Grad = -0.0005085194654331856),
 Value(data=-0.9471686342755691,Grad = -9.973318575319612e-05),
 Value(data=-0.39215974660012204,Grad = -9.720314172638578e-05),
 Value(data=-0.06833842901804933,Grad = -0.0004627432171627092),
 Value(data=0.8206574849712375,Grad = -0.0001469831491712596),
 Value(data=-1.175262270788462,Grad = 0.0008812983067277111),
 Value(data=0.6146995770178395,Grad = 0.0009332654548660384),
 Value(data=-0.08227681747560865,Grad = -0.00010171353662276985),
 Value(data=-0.6201620950764254,Grad = 2.8477107224725554e-05),
 Value(data=-0.08549139310054664,Grad = -0.0003002278570482084),
 Value(data=0.17530388121144772,Grad = -0.

In [54]:
len(n.parameters())

56