In [None]:
from main import Value

In [None]:
a = 2.0
b = -3.0
c = 10.0 
fn = lambda a,b,c: a*b+c 
print(fn(a,b,c)) 

In [None]:
# https://en.wikipedia.org/wiki/Derivative 
# df/da = (f(a+h)-f(a))/h for infinitessimally small h 
h = 0.0001 
print('slope:', (fn(a+h,b,c) - fn(a,b,c)) / h)

In [None]:
a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='c')
d = fn(a,b,c); d.label = 'd'
print(d)

In [None]:
d._op, d._prev

In [None]:
from helpers import trace, draw_dot

In [None]:
nodes, edges = trace(d)
print(f'{len(nodes)=} {len(edges)=}')

In [None]:
draw_dot(d)

In [None]:
a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='c')
d = fn(a,b,c); d.label = 'd'
f = Value(-2.0, label='f')
L = d*f; 
L.label = 'L'
print(L)

In [None]:
draw_dot(L)

In [None]:
def forward_pass(a,b,c,d,f):
    d = fn(a,b,c); d.label = 'd'
    L = d*f; 
    L.label = 'L'
    return L
L = forward_pass(a,b,c,d,f)
Ld = forward_pass(a, b, c, d+h, f)
La = forward_pass(a+h, b, c, d, f)
Lb = forward_pass(a, b+h, c, d, f)
Lc = forward_pass(a, b, c+h, d, f)
Lf = forward_pass(a, b, c, d, f+h)

In [None]:
dL_wrt_dx = lambda L2, L1: (L2 - L1) / h
print(f'{dL_wrt_dx(L+h, L)=}')
print(f'{dL_wrt_dx(Ld, L)=}')
print(f'{dL_wrt_dx(Lf, L)=}')
print(f'{dL_wrt_dx(La, L)=}')
print(f'{dL_wrt_dx(Lb, L)=}')
print(f'{dL_wrt_dx(Lc, L)=}')

```
dL/dL = ((L+h)-L) / h = 1 
dL/dd = ((d+h)*f - d*f) / h = f 
dL/df = d 

dL/dc = dL/dd * dd/dc = f * d/dc (a*b+c) = f*1 = f = -2

let e = a*b
dL/de = dL/dd * dd/de = f * d/de (e+c) = f*1 = f = -2 

dL/da = dL/dd * dd/de * de/da = f * 1 * d/da (a*b) = f*b = -2*-3 = 6
dL/db  = f*a = -4 
```

In [None]:
((d+h)*f - L)/h 

# MLP

In [None]:
import numpy as np 
import matplotlib.pyplot as plt

xs = np.arange(-5,5, 0.2)
ys = np.tanh(xs)
plt.plot(xs, ys); plt.grid(); 

In [None]:
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')

w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')
b = Value(6.8813735878195432, label='b')

n  = x1*w1 + x2*w2  + b 
n.label = 'n'
o = n.tanh()

In [None]:
draw_dot(o)

In [None]:
o.backward()

In [None]:
from main import Value 
from helpers import draw_dot

2*Value(2)
2+Value(2)
Value(2)-2

In [None]:
2-Value(2)
Value(4) / Value(2)

In [None]:
Value(4) / 2

In [None]:
x = 4 / Value(2)


In [None]:
draw_dot(x)

In [99]:
import random 
from main import Value
from helpers import draw_dot
class Module: 
    def zero_grad(self): 
        for p in self.parameters(): 
            p.grad = 0 

    def parameters(self): 
        return []
    
    def parameters_grad(self): 
        return [p.grad for p in self.parameters()]
    
class Neuron(Module): 
    def __init__(self, nin): 
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))

    def __repr__(self): 
        return f'Neuron({len(self.w)})'
    
    def __call__(self,x): 
        out = (sum((xi*wi for (wi, xi) in zip(self.w, x)), self.b))
        return out.tanh()
    
    @property
    def grad(self): 
        return ([x.grad for x in self.w], self.b.grad)
    
    def parameters(self): 
        return self.w + [self.b]

class Layer(Module): 
    def __init__(self, nin: int, nout: list[int]): 
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x): 
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs 
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP(Module): 
    def __init__(self, nin, nouts): 
        sz = [nin]+nouts 
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
    
    def __call__(self, x): 
        for layer in self.layers: 
            x = layer(x) 
        return x
    
    def parameters(self): 
        return [p for layer in self.layers for p in layer.parameters()]
    
def mse(ytrue, ypred): 
    out = sum((yout-ygt)**2 for ygt, yout in zip(ytrue, ypred))
    out.label = 'mse_loss'
    return out

In [100]:
x = [2.0, 3.0, -1.0]
model = MLP(3, [4,4,1])
model.zero_grad()
print(len(model.parameters()))

41


In [101]:
xs = [
    [2.0,3.0,-1.0],
    [3.0,-1.0,0.5],
    [1.0,1.0,-1.0]
]
ys = [1.0,-1.0,-1.0,1.0]

In [185]:
for k in range(10): 
    ypred = [model(x) for x in xs]
    model.zero_grad()
    loss = mse(ys, ypred)
    topos = loss.backward()
    lr = 0.01 

    for p in model.parameters(): 
        p.data += -lr * p.grad
    print(k, loss.data)


Value(8.0989E-02)
0 0.0809891227550811
Value(7.9475E-02)
1 0.07947466914833257
Value(7.8010E-02)
2 0.07800990879997755
Value(7.6593E-02)
3 0.07659257234409468
Value(7.5221E-02)
4 0.07522052130326581
Value(7.3892E-02)
5 0.07389173906041406
Value(7.2604E-02)
6 0.07260432254937296
Value(7.1356E-02)
7 0.0713564745998019
Value(7.0146E-02)
8 0.07014649687843598
Value(6.8973E-02)
9 0.06897278337433158


In [183]:
ypred

[Value(7.7487E-01), Value(-9.4917E-01), Value(-8.2886E-01)]

In [184]:
loss.data

0.08255567969305228