In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def f(x):
    return 3*x**2 - 4*x + 5

In [None]:
f(3.0)

In [None]:
xs = np.arange(-5, 5, 0.25)
xs
ys = f(xs)
ys
plt.plot(xs, ys)

In [None]:
h = 0.000001
x = 2/3
(f(x + h) - f(x)) / h

In [None]:
a = 2.0
b = -3.0
c = 10.0
d = a*b + c
print(d)

In [None]:
h = 0.00001
a = 2.0
b = -3.0
c = 10.0

d1 = a*b + c

c += h
d2 = a*b + c

print(f'{d1 = }')
print(f'{d2 = }')
print(f'slope {(d2 - d1)/h}')


In [127]:
class Value:
    def __init__(self, data, _children=(), _op="", label=""):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
        self.label = label
        
    
    def __repr__(self):
        return f"Value(data = {self.data})"
    
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out =  Value(self.data + other.data, (self, other), "+")
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward
        return out
    
    # sum() does not work without it even for two Values
    def __radd__(self, other):
        return self + other
    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), "*")
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out
    
    def __rmul__(self, other):
        return self * other

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only sup for int/float powers"
        out = Value(self.data**other, (self, ), f'**{other}')
        def _backward():
            self.grad += other * (self.data**(other-1)) * out.grad
        
        out._backward = _backward
        return out
    
    def __truediv__(self, other):
        return self * other**-1
    
    def __neg__(self):
        return self * -1
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rsub__(self, other):
        return (-self) + other
    
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out =  Value(t, (self,), 'tanh')
        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        
        return out
    
    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')
        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        return out
    
    def backward(self):
        topo = []
        visited = set()
        def built_topo(v):
            if v not in visited:
                visited.add(v)
                for kid in v._prev:
                    built_topo(kid)
                topo.append(v)
        built_topo(self)
        
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()
        

In [None]:
a = Value(2.0)
b = Value(4.0)
a - b

In [None]:
a = Value(2.0, label = "a")
b = Value(-3.0, label = "b")
c = Value(10.0, label = "c")
e = a*b; e.label = 'e'
d = e + c; d.label = 'd'
f = Value(-2.0, label = 'f')
L = d * f; L.label = 'L'
L

In [None]:
d._prev
d._op

In [None]:
from graphviz import Digraph

In [None]:
def trace(root):
    nodes, edges = set(), set()
    def built(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                built(child)
    built(root)
    return nodes, edges

In [None]:
def draw_dot(root):
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'})  # LR = left to right
    
    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        node_label = '{%s | data %.4f | gard %.4f}' % (n.label, n.data, n.grad)
        
        node_attrs = {
            'label': node_label,
            'shape': 'record',
            'color': 'blue',      # Change the node border color to blue
            'fontcolor': 'green', # Change the font color of the label to green
            'bgcolor': 'lightyellow'  # Change the background color to light yellow
        }
        
        # for any value in the graph, create a rectangular ('record') node for it
        dot.node(name=uid, **node_attrs)
        
        if n._op:
            # if this value is a result of some sort of operation, crate an op node for it
            # Customize operation node attributes here
            op_node_attrs = {
                'label': n._op,
                'shape': 'ellipse',   # Change the shape to an ellipse (oval)
                'color': 'red',       # Change the node border color to red
                'fontcolor': 'black', # Change the font color of the label to black
                'style': 'filled',    # Fill the oval node with color
                'fillcolor': 'lightblue'  # Set the fill color of the oval node
            }
            
            dot.node(name=uid + n._op, **op_node_attrs)
            dot.edge(uid + n._op, uid, color='purple')  # Change the color of the edge leaving the operation node
            
    for n1, n2 in edges:
        # Customize edge attributes here
        edge_attrs = {
            'color': 'red',  # Change the edge color to red
            'fontcolor': 'blue'  # Change the font color of the edge label to blue
        }
        # connect n1 to the top node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op, **edge_attrs)
        
    return dot


In [None]:
draw_dot(L)

In [None]:
# 5
a.data += 0.01 * a.grad
b.data += 0.01 * b.grad
c.data += 0.01 * c.grad
f.data += 0.01 * f.grad


e = a*b
d = e + c
L = d * f
print(L.data)

In [None]:
a.data

In [None]:
# 4
c.grad = -2.0 * 1
e.grad = -2.0 * 1
# b grad -> -4.0 (2 * -2)
# a grad -> 6.0 (-3 * -2)
b.grad = -4.0
a.grad = 6.0

In [None]:
#3
dL / de = -2.0

dL / de 

In [None]:
#2
dl / dc = ?

dd / dc = ? 1.0  dd / de = ? 1.0
d = c + e

dl / dc = ? 
KNOW:
dl / dd (-2.0)
dd / dc (1.0)
dl / dc = dl / de * dd / dc

In [None]:
#1
L = d * f

dL/dd = ? f

(f(x+h) - f(x)) / h
((d+h)*f - d*f) / h
(d*f + h*f - d*f) / h
(h*f) / h
f

In [None]:
L.grad = 1.0

In [None]:
def lol():
    h = 0.001
    a = Value(2.0, label = "a")
    b = Value(-3.0, label = "b")
    c = Value(10.0, label = "c")
    e = a*b; e.label = 'e'
    d = e + c; d.label = 'd'
    f = Value(-2.0, label = 'f')
    L = d * f; L.label = 'L'
    L1 = L.data
    
    a = Value(2.0, label = "a")
    b = Value(-3.0, label = "b")
    b.data += h
    c = Value(10.0, label = "c")
    e = a*b; e.label = 'e'
    d = e + c; d.label = 'd'
    f = Value(-2.0, label = 'f')
    L = d * f; L.label = 'L'
    L2 = L.data
    
    print(L1, L2)
    print((L2 - L1)/ h)
    
#lol() # derivative of L in respect to whatever was changed

In [None]:
lol() # derivative of L in respect to whatever was changed

In [None]:
#--

In [None]:
plt.plot(np.arange(-5, 5, 0.2), np.tanh(np.arange(-5, 5, 0.2))); plt.grid()

In [None]:
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')

w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')

b = Value(6.8813735870195432, label='b')

x1w1 = x1 * w1; x1w1.label = 'x1*w1'
x2w2 = x2 * w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'
o = n.tanh(); o.label = 'o'

In [None]:
o.backward()

In [None]:
draw_dot(o)

In [None]:
#1
o.grad = 1.0

In [None]:
#2
# o = tanh(n)
# do / dn = 1 - tanh(n)**2
# do / dn = 1 - o**2
print(1 - o.data**2)
n.grad = 0.5


In [None]:
#3
x1w1x2w2.grad = 0.5
b.grad = 0.5

In [None]:
#4
x1w1.grad = 0.5
x2w2.grad = 0.5

In [None]:
#5
w2.grad = x2.data * x2w2.grad
x2.grad = w2.data * x2w2.grad
x1.grad = w1.data * x1w1.grad
w1.grad = x1.data * x1w1.grad

In [None]:
#6
o.grad = 1.0
o._backward()

In [None]:
n._backward()

In [None]:
b._backward() # nothing will happen. lambda: None

In [None]:
x1w1x2w2._backward()

In [None]:
x2w2._backward()
x1w1._backward()

In [None]:
# Next implement topological sort
topo = []
visited = set()
def built_topo(v):
    if v not in visited:
        visited.add(v)
        for kid in v._prev:
            built_topo(kid)
        topo.append(v)
built_topo(o)
topo

In [None]:
o.grad = 1.0

In [None]:
len(topo)

In [None]:
for node in reversed(topo):
    node._backward()

In [None]:
# automated
o.backward()

In [None]:
# multivariable case chain rule

In [None]:
a = Value(3.0, label = 'a')
b = a + a; b.label = 'b'
b.backward()
draw_dot(b)

In [None]:
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')

w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')

b = Value(6.8813735870195432, label='b')

x1w1 = x1 * w1; x1w1.label = 'x1*w1'
x2w2 = x2 * w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'
# -
e = (2*n).exp(); e.label = 'e'
o = (e - 1) / (e + 1)
# -
o.label = 'o'
o.backward()
draw_dot(o)

In [None]:
import random

In [None]:
#

In [115]:
class Neuron:
    def __init__(self, n_inputs):
        self.w = [Value(random.uniform(-1, 1), label='w') for _ in range(n_inputs)]
        self.b = Value(random.uniform(-1, 1), label='b')
        
    def __call__(self, x):
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        return act.tanh()

    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self, n_input, n_output):
        self.neurons = [Neuron(n_input) for _ in range(n_output)]
        
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]
    
class MLP:
    def __init__(self, n_inputs: float, n_outputs: list[int]):
        arch = [n_inputs] + n_outputs
        self.layers = [Layer(arch[i], arch[i+1]) for i in range(len(n_outputs))]
        
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for l in self.layers for n in l.neurons for p in n.parameters()]

In [None]:
x = [2.0, 3.0]
n = Neuron(2)
n(x)

In [None]:
x = [2.0, 3.0]
l = Layer(2, 3)
l(x)

In [145]:
x = [2.0, 3.0, -1.0]
network = MLP(3, [4, 4, 1])
network(x)

Value(data = -0.9874581642608983)

In [118]:
network.parameters()
len(network.parameters())

41

In [None]:
print(network.layers[0].neurons[0].w[0])
network.layers[0].neurons[0].w[0].grad

In [None]:
draw_dot(network(x))

In [148]:
network = MLP(3, [4, 4, 1])
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0]



In [149]:
for k in range(20):
    ypred = [network(x) for x in xs]
    loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
    
    loss.backward()
    
    for p in network.parameters():
        p.data -= p.grad * 0.05
        
    print(k, loss.data)


0 1.670902273742505
1 1.0417524034784524
2 0.3670009041860463
3 0.07287908735901233
4 0.011495365596774516
5 0.0019455755871336466
6 0.0004214300417017214
7 0.00012343856795255407
8 4.554633940624859e-05
9 1.9356518663191815e-05
10 8.962879672776089e-06
11 4.388911568731498e-06
12 2.2337676487033635e-06
13 1.1685341158560794e-06
14 6.23515709088498e-07
15 3.37557277702208e-07
16 1.847378812848098e-07
17 1.0195901629173476e-07
18 5.6666139503974056e-08
19 3.169195512466654e-08


In [None]:
draw_dot(loss)

In [150]:
ypred

[Value(data = 0.9998698339772941),
 Value(data = -0.999999902942694),
 Value(data = -0.9999999140296185),
 Value(data = 0.9998785555894799)]