In [374]:
class Value:

    def __init__(self, value, parents=()):
        self.value = value
        self.parents = parents
        self.grad = 0.0

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        child = Value(self.value + other.value, parents=(self, other, '+'))
        
        def _backward():
            self.grad += 1.0 * child.grad
            other.grad += 1.0 * child.grad

            self._backward()
            other._backward()
        
        child._backward = _backward

        return child

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        child = Value(self.value * other.value, parents=(self, other, '*'))
        
        def _backward():
            self.grad += other.value * child.grad
            other.grad += self.value * child.grad

            self._backward()
            other._backward()

        child._backward = _backward
        
        return child

    def __neg__(self):
        return self * -1

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self, other):
        return other + (-self)

    def __rmul__(self, other):
        return self * other

    def __pow__(self, other:int):
        child = Value(self.value ** other, parents=(self, Value(other), '**'))

        def _backward():
            self.grad += other * (self.value ** (other - 1) * child.grad)
            self._backward()
        
        child._backward = _backward

        return child

    def _backward(self):
        pass

    def backward(self):
        self._backward()

    def __repr__(self) -> str:
        if self.parents:
            return f"Val({self.value}, grad: {self.grad}, parents: ({self.parents[0].value} {self.parents[2]} {self.parents[1].value}))"
        else:
            return f'Val({self.value}, grad: {self.grad}, parents: None)'

In [375]:

import random

class Neuron:

    def __init__(self, input_features):
        """
        Initializes a neuron with a w for each feature of a data sample x
        """
        self.w = [Value(random.random()) for _ in range(input_features)]
        self.b = Value(0.0)

    def __call__(self, x):
        """
        Computes the forward pass (i.e. the prediction) given a dataset (matching the initialized model dimensions)
        """
        result = Value(0.0)
        for wi, xi in zip(self.w, x):
            result += wi * xi 

        result += self.b
        
        return result

    def parameters(self):
        return self.w + [self.b]

In [376]:
class Layer:
    def __init__(self, input_features, output_features):
        self.neurons = [Neuron(input_features) for _ in range(output_features)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

In [377]:
class Model:
    def __init__(self, layers) -> None:
        self.layers = layers

    def __call__(self, x):
        out = x
        for layer in self.layers:
            out = layer(out)

        return out

    def parameters(self):
        params = []

        for layer in self.layers:
            for neuron in layer.neurons:
                params += neuron.parameters()

        return params

In [378]:
class Optimizer:
    def __init__(self, parameters, learn_rate) -> None:
        self.parameters = parameters
        self.learn_rate = learn_rate

    def step(self):
        for p in self.parameters:
            p.value = p.value - self.learn_rate * p.grad

    def zero_grad(self):
        for p in self.parameters:
            p.grad = 0.0

In [379]:
class MSE:
    def __init__(self):
        pass

    def __call__(self, y_pred, y):
        loss = Value(0.0)
        
        n = len(y_pred)

        for i in range(n):
            loss += (y_pred[i] - y[i]) ** 2

        loss = loss * (Value(0.5) * Value(1/n))
        loss.grad = 1.0
        
        return loss

In [380]:
## Set-up
x = [Value(2.0), Value(3.0), Value(12.0)]
y = [Value(2.0), Value(3.0), Value(12.0)]

layer = Layer(1, 1)
model = Model([layer])

learn_rate, epochs = 0.02, 1000

optimizer = Optimizer(model.parameters(), learn_rate)

criterion = MSE()

#DEBUG
model.parameters()[0].value = -2.0
model.parameters()[1].value = 0.0

for epoch in range(epochs):
    #Forward pass
    y_pred = [model([xi]) for xi in x]
    loss = criterion(y_pred, y)

    print(f"Epoch: {epoch:04d} | Loss: {loss} | w: {model.parameters()[0].value:.6f} | b: {model.parameters()[1].value:.6f}")
    #Backward pass and optimization
    print(f"start: w: {model.parameters()[0]}   b:{model.parameters()[1]}")
    
    optimizer.zero_grad()
    loss.backward()
    print(f"before step: w: {model.parameters()[0]}   b:{model.parameters()[1]}")

    optimizer.step()
    print(f"after step: w: {model.parameters()[0]}   b:{model.parameters()[1]}")

    print()

#Inference:
x = [Value(5)]
y_pred = model(x)

print(y_pred)
    

Epoch: 0000 | Loss: Val(235.5, grad: 1.0, parents: (1413.0 * 0.16666666666666666)) | w: -2.000000 | b: 0.000000
start: w: Val(-2.0, grad: 0.0, parents: None)   b:Val(0.0, grad: 0.0, parents: None)
before step: w: Val(-2.0, grad: -157.0, parents: None)   b:Val(0.0, grad: -17.0, parents: None)
after step: w: Val(1.1400000000000001, grad: -157.0, parents: None)   b:Val(0.34, grad: -17.0, parents: None)

Epoch: 0001 | Loss: Val(0.8404000000000009, grad: 1.0, parents: (5.042400000000006 * 0.16666666666666666)) | w: 1.140000 | b: 0.340000
start: w: Val(1.1400000000000001, grad: -157.0, parents: None)   b:Val(0.34, grad: -17.0, parents: None)
before step: w: Val(1.1400000000000001, grad: 9.25333333333334, parents: None)   b:Val(0.34, grad: 1.1333333333333337, parents: None)
after step: w: Val(0.9549333333333333, grad: 9.25333333333334, parents: None)   b:Val(0.31733333333333336, grad: 1.1333333333333337, parents: None)

Epoch: 0002 | Loss: Val(0.02245495703703704, grad: 1.0, parents: (0.13472