In [1]:
import numpy as np

In [93]:
# from turtle import backward
import math

from matplotlib.pyplot import isinteractive

class VectorValue:
    def __init__(self, data, _children = (), _op: str = "", label: str = ""):
        if isinstance(data, (int, float)):
            self.data = [float(data)]
        else:
            self.data = list(data)
            
        self.grad = [0.0 for _ in self.data]
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
        self.label = label

    def __repr__(self):
        return f"VectorValue(data = {self.data})"
    
    # Addition(s)
    def __add__(self, other):
        other = other if isinstance(other, VectorValue) else VectorValue(other)
        assert len(self.data) == len(other.data)

        out_data = [a + b for a, b in zip(self.data, other.data)]
        out = VectorValue(out_data, (self, other), "+")

        def _backward():
            for i in range(len(self.grad)):
                self.grad[i] += out.grad[i]
                other.grad[i] += out.grad[i]
        out._backward = _backward
        return out
    
    def __radd__(self, other):
        return self + other
    

    # Multiplications
    def __mul__(self, other):
        other = other if isinstance(other, VectorValue) else VectorValue(other)
        assert len(other.data) == len(self.data)
        out_data = [a * b for a, b in zip(self.data, other.data)]
        out = VectorValue(out_data, (self, other), "*")

        def _backward():
            for i in range(len(self.grad)):
                self.grad[i] += other.data[i] * out.grad[i]
                other.grad[i] += self.data[i] * out.grad[i]
        
        out._backward = _backward
        return out
    
    def __rmul__(self, other):
        return self * other
    
    # Dot product
    def dot(self, other):
        assert len(self.data) == len(other.data)
        out_data = [sum(a * b for a, b in zip(self.data, other.data))]
        out = VectorValue(out_data, (self, other), "dot-product")

        def _backward():
            for i in range(len(self.grad)):
                self.grad[i] += other.data[i] * out.grad[0]
                other.grad[i] += self.data[i] * out.grad[0]
        
        out._backward = _backward
        return out  
    
    # Sum
    def sum(self):
        out_data = [sum(self.data)]
        out = VectorValue(out_data, (self, ), "sum")

        def _backward():
            for i in range(len(self.data)):
                self.grad[i] += out.grad[0]
        
        out._backward = _backward
        return out
    
    # Activations

    def relu(self):
        out_data = [max(0.0, x) for x in self.data]
        out = VectorValue(out_data, (self, ), "ReLU")

        def _backward():
            for i in range(len(self.data)):
                self.grad[i] += (self.data[i] > 0) * out.grad[i]
        
        out._backward = _backward
        return out
    
    def tanh(self):
        out_data = [math.tanh(x) for x in self.data]
        out = VectorValue(out_data, (self,), "tanh")

        def _backward():
            for i in range(len(self.data)):
                self.grad[i] += (1 - out.data[i]**2) * out.grad[i]
        
        out._backward = _backward
        return out
             
    
    # Negation
    def __neg__(self):
        return self * -1
    
    # Subtractions
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rsub__(self, other):
        return other + (-self)
    
    def __pow__(self, power):
        assert isinstance(power, (int, float)), "only supports scalar powers"

        out_data = [x ** power for x in self.data]
        out = VectorValue(out_data, (self,), f"**{power}")

        def _backward():
            for i in range(len(self.data)):
                self.grad[i] += power * (self.data[i] ** (power - 1)) * out.grad[i]

        out._backward = _backward
        return out

        
    def backward(self):
        topo = []
        visited = set()

        def build_topo(v):
            if not v in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        self.grad = [1.0 for _ in self.grad]
        for v in reversed(topo):
            v._backward()

    

In [26]:
x = VectorValue([1.0, 2.0, 3.0])
w = VectorValue([0.5, -1.0, 2.0])
b = VectorValue([0.1])

# single neuron forward
y = (w.dot(x) + b).tanh()
y.backward()

print("x:", x)
print("w:", w)
print("b:", b)
print("y:", y)


x: VectorValue(data = [1.0, 2.0, 3.0])
w: VectorValue(data = [0.5, -1.0, 2.0])
b: VectorValue(data = [0.1])
y: VectorValue(data = [0.9997979416121845])


In [52]:
y = (w.dot(x) + b).tanh()
y.backward()

print("x:", x.grad)
print("w:", w.grad)
print("b:", b.grad)
print("y:", y.grad)

x: [0.00545502529852554, -0.01091005059705108, 0.02182010119410216]
w: [0.01091005059705108, 0.02182010119410216, 0.03273015179115324]
b: [0.01091005059705108]
y: [1.0]


In [13]:
a = VectorValue([1, 2, 3])
b = [1, 2, 3]
a *b

VectorValue(data = [1, 4, 9])

### Neural-Net

In [53]:
import random

In [71]:
class Module:
    def zer_grad(self):
        for p in self.parameters():
            p.grad = [0.0 for _ in p.grad]
    
    def parameters(self):
        return []

In [72]:
class Neuron(Module):
    def __init__(self, nin, nonlin = True):
        self.w = VectorValue(
            [random.uniform(-1, 1) for _ in range(nin)], 
             label='w'
        )
    
        self.b = VectorValue([0.0], label='w')
        self.nonlin = nonlin

    def __call__(self, x:VectorValue):
        act = (self.w.dot(x)) + self.b
        return act.tanh() if self.nonlin else act
    
    def parameters(self):
        return [self.w, self.b]
    
    def __repr__(self):
        return f"{'Tanh' if self.nonlin else 'Linear'} Neuron ({len(self.w.data)})"
     

In [79]:
class Layer(Module):
    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x: VectorValue):
        values = [n(x).data[0] for n in self.neurons]
        return VectorValue(values)


    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]
    
    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"
    

In [68]:
a = Layer(3, 4)
a.neurons[0].w

VectorValue(data = [-0.2861476807732817, -0.08775453120393562, 0.06057624828001851])

In [80]:
class MLP(Module):
    def __init__(self, nin, nouts):
        size = [nin] + nouts
        self.layers = [
            Layer(
                size[i],
                size[i+1],
                nonlin = i != len(nouts) -1
                )
            for i in range(len(nouts))
        ]
    
    def __call__(self, x:VectorValue):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

In [108]:
xs = [
    VectorValue([2, 3, -1]),
    VectorValue([3, -1, 0.5]),
    VectorValue([0.5, 1, 1]),
    VectorValue([1, 1, -1])
]

ys = [VectorValue([1]), VectorValue([-1]), VectorValue([-1]), VectorValue([1])]

m = MLP(3, [4, 4, 1])

for k in range(20):

    y_pred = [m(x) for x in xs]

    losses = []
    for ygt, yout in zip(ys, y_pred):
        diff = yout -ygt
        losses.append(diff * diff)

    loss = losses[0]
    for l in losses[1:]:
        loss = loss + l

    for p in m.parameters():
        p.grad = [0.0 for _ in p.grad]

    loss.backward()

    for p in m.parameters():
        for i in range(len(p.data)):
            p.data[i] += -0.01 * p.grad[i]

    print(k, loss.data)


0 [4.832787144060846]
1 [4.832787144060846]
2 [4.832787144060846]
3 [4.832787144060846]
4 [4.832787144060846]
5 [4.832787144060846]
6 [4.832787144060846]
7 [4.832787144060846]
8 [4.832787144060846]
9 [4.832787144060846]
10 [4.832787144060846]
11 [4.832787144060846]
12 [4.832787144060846]
13 [4.832787144060846]
14 [4.832787144060846]
15 [4.832787144060846]
16 [4.832787144060846]
17 [4.832787144060846]
18 [4.832787144060846]
19 [4.832787144060846]
