In [1]:
import math
import numpy as np
import random
from collections import deque

In [2]:
class Node:
    def __init__(
        self, 
        val: int | float, 
        label: str,
        _left_component: 'Node' = None,
        _right_component: 'Node' = None,
        _operation: str = '',
        _repr: str = None
        ):
        self.val = val
        self.label = label
        self.grad = 0
        self._backward = lambda: None
        self._left_component = _left_component
        self._right_component = _right_component
        self._operation = _operation
        self._repr = f"'{self.label}' | Value: {self.val}"
        
    def __repr__(self) -> str:
        return self._repr
    
    def __add__(self, other):
        other = other if isinstance(other, Node) else Node(other, label=f'{self.label}_')
        out_val = self.val + other.val
        out_label = f"({self.label} + {other.label})"
        out = Node(val=out_val, label=out_label, _left_component=self, _right_component=other, _operation='+')
        def _backward():
            self.grad += out.grad * 1.0
            other.grad += out.grad * 1.0
        out._backward = _backward
        return out
    
    def __mul__(self, other):
        other = other if isinstance(other, Node) else Node(other, label=f'{self.label}_')
        out_val = self.val * other.val
        out_label = f"({self.label} * {other.label})"
        out = Node(val=out_val, label=out_label, _left_component=self, _right_component=other, _operation='*')
        def _backward():
            self.grad += out.grad * other.val
            other.grad += out.grad * self.val
        out._backward = _backward
        return out
    
    def __sub__(self, other):
        other = other  if  isinstance(other, Node) else Node(other,  label=f'{self.label}_')
        out_val = self.val - other.val
        out_label = f"({self.label} - {other.label})"
        out = Node(val=out_val, label=out_label, _left_component=self, _right_component=other, _operation='-')
        def _backward():
            self.grad += out.grad
            other.grad += -out.grad
        out._backward = _backward
        return out
    
    def __pow__(self, n):
        return self.pow(n)
    
    def __truediv__(self, other):
        return self  * (other ** (-1))
            
    def __radd__(self, other):
        return self + other
    
    def _topological_ordering(self):
        topologicaly_sored_nodes =  []
        traversed_nodes = set()
        
        def traverse(node):
            if not node in traversed_nodes:
                traversed_nodes.add(node)
                for component_node  in [node._left_component, node._right_component]:
                    if component_node:
                        traverse(component_node)
                topologicaly_sored_nodes.append(node)
        traverse(self)
        return topologicaly_sored_nodes
                
    
    def exp(self):
        out_val = np.e ** self.val
        out_label = f"(e ^ {self.label})"
        out = Node(val=out_val, label=out_label, _left_component=self, _right_component=0, _operation='exp')
        def _backward():
            self.grad += out.grad * out_val
        out._backward = _backward
        return out
    
    def pow(self, n: int):
        out_val = self.val ** n
        out_label = f"({self.label} ^ {n})"
        out = Node(val=out_val, label=out_label, _left_component=self, _right_component=None, _operation='pow')
        def _backward():
            self.grad += out.grad * (n * (self.val ** (n - 1)))
        out._backward = _backward
        return out
    
    def tanh(self):
        t = (math.exp(2*self.val) - 1) / (math.exp(2*self.val) + 1)
        out_val = t
        out_label = f"tanh({self.label})"
        out = Node(val=out_val, label=out_label, _left_component=self, _right_component=None, _operation='tanh')
        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        return out
    
    def sin(self):
        out_val = np.sin(self.val)
        out_label = f"(sin({self.label}))"
        out = Node(val=out_val, label=out_label, _left_component=self, _right_component=None, _operation='sin')
        def _backward():
            self.grad += out.grad * np.cos(self.val)
        out._backward = _backward
        return out
    
    def traverse(self):
        topologicaly_ordered_nodes  =  self._topological_ordering()
        for node in topologicaly_ordered_nodes[::-1]:
            print(f'[{node}] | Left component: {node._left_component} | Right component: {node._right_component} | Op: {node._operation} | Grad: {node.grad}')
            
    def print_grads(self):
        topologicaly_ordered_nodes  =  self._topological_ordering()
        for node in  topologicaly_ordered_nodes[::-1]:
            print(f'{node.label}  | grad: {node.grad}')
        
    def backward(self):
        topologicaly_sorted_nodes = self._topological_ordering()
        self.grad = 1.0
        for node in topologicaly_sorted_nodes[::-1]:
            node._backward()
    

In [3]:
x1 = Node(2.0, label='x1')
x2 = Node(0.0, label='x2')
w1 = Node(-3.0, label='w1')
w2 = Node(1.0, label='w1')
b = Node(6.88137358, label='b')
x1w1 = x1*w1
x2w2 = x2*w2
x1w1x2w2 = x1w1 + x2w2
n = x1w1x2w2 + b
o = n.tanh()

o.backward()
o.traverse()

['tanh((((x1 * w1) + (x2 * w1)) + b))' | Value: 0.707106777676776] | Left component: '(((x1 * w1) + (x2 * w1)) + b)' | Value: 0.88137358 | Right component: None | Op: tanh | Grad: 1.0
['(((x1 * w1) + (x2 * w1)) + b)' | Value: 0.88137358] | Left component: '((x1 * w1) + (x2 * w1))' | Value: -6.0 | Right component: 'b' | Value: 6.88137358 | Op: + | Grad: 0.5000000049635664
['b' | Value: 6.88137358] | Left component: None | Right component: None | Op:  | Grad: 0.5000000049635664
['((x1 * w1) + (x2 * w1))' | Value: -6.0] | Left component: '(x1 * w1)' | Value: -6.0 | Right component: '(x2 * w1)' | Value: 0.0 | Op: + | Grad: 0.5000000049635664
['(x2 * w1)' | Value: 0.0] | Left component: 'x2' | Value: 0.0 | Right component: 'w1' | Value: 1.0 | Op: * | Grad: 0.5000000049635664
['w1' | Value: 1.0] | Left component: None | Right component: None | Op:  | Grad: 0.0
['x2' | Value: 0.0] | Left component: None | Right component: None | Op:  | Grad: 0.5000000049635664
['(x1 * w1)' | Value: -6.0] | Le

In [4]:
x1 = Node(2.0, label='x1')
x2 = Node(0.0, label='x2')
w1 = Node(-3.0, label='w1')
w2 = Node(1.0, label='w1')
b = Node(6.88137358, label='b')
x1w1 = x1*w1
x2w2 = x2*w2
x1w1x2w2 = x1w1 + x2w2
n = x1w1x2w2 + b

e =  (n * 2).exp()
o = (e - 1)  / (e + 1)
o.backward()
o.traverse()

['(((e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_)) - (e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_))_) * (((e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_)) + (e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_))_) ^ -1))' | Value: 0.7071067776767759] | Left component: '((e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_)) - (e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_))_)' | Value: 4.8284270429204 | Right component: '(((e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_)) + (e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_))_) ^ -1)' | Value: 0.146446611161612 | Op: * | Grad: 1.0
['(((e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_)) + (e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 * w1) + (x2 * w1)) + b)_))_) ^ -1)' | Value: 0.146446611161612] | Left component: '((e ^ ((((x1 * w1) + (x2 * w1)) + b) * (((x1 

In [5]:
import torch

x1 =  torch.Tensor([2.0]).double();   x1.requires_grad = True
x2 =  torch.Tensor([0.0]).double();   x2.requires_grad = True
w1 =  torch.Tensor([-3.0]).double();  w1.requires_grad = True
w2 =  torch.Tensor([1.0]).double();   w2.requires_grad = True
b = torch.Tensor([6.88]).double();    b.requires_grad = True
n  =  x1*w1 + x2*w2  + b
o  =  torch.tanh(n)

print(o.data.item())
o.backward()

print('---')
print(f'x2:', x2.grad.item())
print(f'w2:', w2.grad.item())
print(f'x1:', x1.grad.item())
print(f'w1:', w1.grad.item())

0.7064193777288968
---
x2: 0.5009716627691181
w2: 0.0
x1: -1.5029149883073543
w1: 1.0019433255382362


In [6]:
class Neuron:
    def __init__(self, nin):
        self.w = [Node(random.uniform(-1, 1), label=f'w{neuron_idx}') for neuron_idx in range(nin)]
        self.b = Node(random.uniform(-1, 1), label='b')
        
    def __repr__(self):
        return f'Neuron(nin={len(self.w)})'
    
    def __call__(self, x, ):
        act =  sum((wi*xi for wi, xi in zip(self.w, x))) + self.b
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]
    
class  Layer:
    def __init__(self, nin, nout):
        self._nin = nin
        self._nout = nout
        self.neurons = [Neuron(nin)  for _ in range(nout)]
        
    def  __repr__(self):
        return  f'Layer(nin={self._nin}, nout={self._nout})'
        
    def  __call__(self,  x):
        outs  = [n(x) for n in self.neurons]
        return outs[0] if len(outs)  ==  1  else outs
    
    def parameters(self):
        params = []
        for neuron in self.neurons:
            params.extend(neuron.parameters())
        return params
    
class MLP:
    def  __init__(self, nin, layers: list = [2], nout: int =  1):
        self.nin =  nin
        self.layers_dims =  layers
        self.nout = nout
        self.mlp_layers  = [nin] +  layers + [nout]
        for right_pointer in  range(1, len(self.mlp_layers)):
            self.mlp_layers[right_pointer - 1] = Layer(self.mlp_layers[right_pointer - 1], 
                                                       self.mlp_layers[right_pointer])
        self.mlp_layers.pop()
    def __repr__(self):
        return  f'MLP(nin={self.nin}, layers_dims={self.layers_dims}, nout={self.nout})'
    
    def __call__(self, x):
        for layer in  self.mlp_layers:
            x = layer(x)
        return x
    
    def parameters(self):
        params = []
        for layer in self.mlp_layers:
            params.extend(layer.parameters())
        return params

In [7]:
model  = MLP(3, [4,4],  1)
model

MLP(nin=3, layers_dims=[4, 4], nout=1)

In [8]:
xs  =  [
    [2.0,  3.0, -1.0],
    [3.0, -1.0,  0.5],
    [0.5, 1.0,  1.0],
    [1.0, 1.0,  -1.0]
    ]

ys  = [1.0,  -1.0, -1.0,  1.0]

y_pred = [model(x) for  x  in  xs]
y_pred_vals = [prediction.val for prediction in y_pred]
y_pred_vals
loss = sum([(yout  - ygt) ** 2 for  ygt, yout in zip(ys, y_pred)]) / len(y_pred_vals)
loss.val

1.2226277976863635

In [9]:
loss.backward()
for p in  model.parameters():
    p.val -= 0.001  * p.grad
    
y_pred = [model(x) for  x  in  xs]
y_pred_vals = [prediction.val for prediction in y_pred]
y_pred_vals
loss = sum([(yout  - ygt) ** 2 for  ygt, yout in zip(ys, y_pred)]) / len(y_pred_vals)
loss.val

1.2054957801787323

In [30]:
# F = a * b + c
# K = F * m + n
# K  = (a*b + c) * m + n
# P = K * s + h
# P = ((a*b + c) * m + n) * s + h
# P | a
# P | b
# P | c
# ...
# P | h

a = Node(1.0, label='a')
b = Node(3.0, label='b')
c = Node(5.0, label='c')
m = Node(1.0, label='m')
n = Node(10.0, label='n')
F = a*b + c
K = F * m + n


In [54]:
x1 = Node(1.0, label='x1')
w1 = Node(2.0, label='w1')

x2 = Node(1.0, label='x2')
w2 = Node(2.0, label='w2')


x1w1 = x1*w1
x2w2 = x2*w2

x1w1x2w2 = x1w1 + x2w2

result = x1w1x2w2.tanh()
result.backward()
result.print_grads()


tanh(((x1 * w1) + (x2 * w2)))  | grad: 1.0
((x1 * w1) + (x2 * w2))  | grad: 0.0013409506830258655
(x2 * w2)  | grad: 0.0013409506830258655
w2  | grad: 0.0013409506830258655
x2  | grad: 0.002681901366051731
(x1 * w1)  | grad: 0.0013409506830258655
w1  | grad: 0.0013409506830258655
x1  | grad: 0.002681901366051731


In [58]:
model = MLP(2, [4, 5], 1)
x = [20, 1]
res = model(x)
res.val

0.5393125111453734

In [65]:
res.backward()

res.print_grads()

tanh((((((((w0 * tanh(((((((w0 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b))) + (w0 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))_) + (w1 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + (w2 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + (w3 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + b))) + (w0 * tanh(((((((w0 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b))) + (w0 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))_) + (w1 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + (w2 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + (w3 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + b)))_) + (w1 * tanh(((((((w0 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b))) + (w0 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))_) + (w1 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + (w2 * tanh(((((w0 * w0_) + (w0 * w0_)_) + (w1 * w1_)) + b)))) + (w3 * tanh(((((w0 * w0_) + (w

In [70]:
a = Node(5, label='a')
b = Node(10, label='b')
c  = a * b

In [75]:

c = a * b + a
c._topological_ordering()

['a' | Value: 5,
 'b' | Value: 10,
 '(a * b)' | Value: 50,
 '((a * b) + a)' | Value: 55]

In [94]:
class Node:
    def __init__(self, val):
        self.val = val
        
    def __repr__(self):
        return f'Oбгортка(число={self.val})'
    
    def __add__(self, other):
        out = Node(self.val + other.val)
        out.left = self
        out.right = other
        return out
    
a = Node(7)
b = Node(6)
c = a + b
m = c + a
m, m.left, m.right

(Oбгортка(число=20), Oбгортка(число=13), Oбгортка(число=7))