In [121]:
import math
import random

In [231]:
class Value:
    def __init__(self, data, prev = (), op='', label=''):
        self.data = data
        self.prev = set(prev)
        self.op = op
        self.label = label
        self._backward = lambda: None
        self.grad = 0.0
    def __repr__(self):
        return f"Value {self.label} = {self.data}"
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        def _backward():
            self.grad += 1 * out.grad
            other.grad += 1 * out.grad
        out._backward = _backward
        return out
    def __radd__(self, other): # other + self
        return self + other
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out
    def __rmul__(self, other): # other * self
        return self * other
    def __sub__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data - other.data, (self, other), '-')
        def _backward():
            self.grad += 1 * out.grad
            other.grad += - 1 * out.grad
        out._backward = _backward
        return out
    def __pow__(self, x):
        out = Value(self.data ** x, (self, ), '**')
        def _backward():
            self.grad += x * (self.data ** (x - 1)) * out.grad
        out._backward = _backward
        return out
    def relu(self):
        out = Value(max(0, self.data), (self, ), 'relu')
        def _backward():
            self.grad += 1 if self.data > 0 else 0
        out._backward = _backward
        return out
    def sigmoid(self):
        out = Value(1/(1 + math.exp(-self.data)), (self, ), "sigmoid")
        def _backward():
            self.grad += out * (1 - out)
        out._backward = _backward
        return out
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self, ), 'tanh')
        
        def _backward():
          self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        
        return out
    def topo(self, visited, all_vars):
        visited.append(self)
        for child in self.prev:
            if child not in visited:
                child.topo(visited, all_vars)
        all_vars.append(self)
    def backward(self):
        all_vars = []
        visited = []
        self.topo(visited, all_vars)
        self.grad = 1.0
        for var in reversed(all_vars):
            var._backward()
                

In [232]:
a = Value(2.0, label='a')
b = Value(-3.0, label='b')
c = Value(10.0, label='c')
e = a*b; e.label = 'e'
d = e + c; d.label = 'd'
f = Value(-2.0, label='f')
L = d * f; L.label = 'L'
print(L.data)
L.backward()

-8.0


In [233]:
print(a.grad, b.grad, c.grad, d.grad, e.grad, f.grad, L.grad)

6.0 -4.0 -2.0 -2.0 -2.0 4.0 1.0


In [234]:
print(L.data)
L.backward()

-8.0


In [235]:
# inputs x1,x2
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')
# weights w1,w2
w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')
# bias of the neuron
b = Value(6.8813735870195432, label='b')
# x1*w1 + x2*w2 + b
x1w1 = x1*w1; x1w1.label = 'x1*w1'
x2w2 = x2*w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'
o = n.tanh(); o.label = 'o'
o.backward()

In [236]:
a.data -= 0.01 * a.grad
b.data -= 0.01 * b.grad
c.data -= 0.01 * c.grad
f.data -= 0.01 * f.grad

e = a * b
d = e + c
L = d * f

print(L.data)

-46.097828427361144


In [237]:
class Neuron:
    def __init__(self, input_sz):
        self.w = [Value(random.uniform(-1,1)) for _ in range(input_sz)]
        self.b = Value(random.uniform(-1,1))
    def __call__(self, X):
        act = sum([xi * wi for xi,wi in zip(X, self.w)]) + self.b
        out = act.tanh()
        return out
    def parameters(self):
        return self.w + [self.b]
class Layer:
    def __init__(self, non, input_sz):
        self.layer = [Neuron(input_sz) for _ in range(non)]
    def __call__(self, X):
        outs = [neu(X) for neu in self.layer]
        return outs[0] if len(outs) == 1 else outs
    def parameters(self):
        return [p for neuron in self.layer for p in neuron.parameters()]
class MLP:
    def __init__(self, neurons_per_layer, input_sz):
        neurons_per_layer = [input_sz] + neurons_per_layer
        self.hidden_layers = [Layer(neurons_per_layer[i+1], neurons_per_layer[i]) for i in range(len(neurons_per_layer)-1)]
    def __call__(self, X):
        x_input = X
        out = []
        for layer in self.hidden_layers:
            out = layer(x_input)
            x_input = out
        return out
    def parameters(self):
        return [p for layer in self.hidden_layers for p in layer.parameters()]

In [238]:
# X = [2, 3, 1]
# mlp = MLP([2,3,1], 3)
# mlp(X)

x = [2.0, 3.0, -1.0]
n = MLP([4, 4, 1], 3)
n(x)


Value  = -0.3202100817428525

In [239]:
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets

In [249]:

for k in range(2000):
  
  # forward pass
  ypred = [n(x) for x in xs]
  loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
  
  # backward pass
  for p in n.parameters():
    p.grad = 0.0
  loss.backward()
  
  # update
  for p in n.parameters():
    p.data += -0.1 * p.grad
  
  print(k, loss.data)

0 1.2900238161828414e-05
1 1.2899395818703956e-05
2 1.2898553585189536e-05
3 1.2897711461263364e-05
4 1.289686944690492e-05
5 1.2896027542091773e-05
6 1.2895185746803309e-05
7 1.2894344061018444e-05
8 1.2893502484715793e-05
9 1.28926610178726e-05
10 1.2891819660469746e-05
11 1.2890978412484228e-05
12 1.2890137273895945e-05
13 1.2889296244682771e-05
14 1.2888455324823366e-05
15 1.2887614514297562e-05
16 1.2886773813084395e-05
17 1.2885933221158996e-05
18 1.2885092738504375e-05
19 1.2884252365097048e-05
20 1.2883412100916597e-05
21 1.2882571945940109e-05
22 1.2881731900148924e-05
23 1.2880891963520141e-05
24 1.2880052136032767e-05
25 1.2879212417665458e-05
26 1.2878372808397046e-05
27 1.2877533308207045e-05
28 1.2876693917074731e-05
29 1.2875854634976884e-05
30 1.28750154618931e-05
31 1.2874176397802538e-05
32 1.287333744268322e-05
33 1.287249859651483e-05
34 1.2871659859276622e-05
35 1.287082123094659e-05
36 1.2869982711503023e-05
37 1.2869144300925714e-05
38 1.2868305999192347e-05
39 1

In [250]:
ypred

[Value  = 0.9984327217819634,
 Value  = -0.9988122344364361,
 Value  = -0.997642861524014,
 Value  = 0.9985904232387929]