Based on https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py

In [1]:
import numpy as np

In [2]:
class Tensor:
    def __init__(self, data, prev=(), op=None, *args, **kwargs):
        self.data = data
        self.prev = prev
        self.grad = 0
        self.op = op
        self.grad_fn = lambda x: None
    
    def backward(self, gradient=None):
        if gradient is None:
            gradient = np.ones_like(self.data)
        self.grad = gradient
        self.grad_fn(self.grad)
        for p in self.prev:
            p.backward(p.grad)

    def __repr__(self):
        return repr(self.data)
    
    def __add__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data + other.data, (self, other), op='+')
        def grad_fn(gradient):
            self.grad += gradient
            other.grad += gradient
        out.grad_fn = grad_fn
        return out
    
    def __mul__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data * other.data, (self, other), op='*')
        def grad_fn(gradient):
            self.grad += gradient * other.data
            other.grad += gradient * self.data
        out.grad_fn = grad_fn
        return out
    
    def __pow__(self, other):
        assert isinstance(other, (int, float))
        out = Tensor(self.data ** other, (self,), op='*')
        def grad_fn(gradient):
            self.grad += gradient * (other * (self.data ** (other-1)))
        out.grad_fn = grad_fn
        return out
    
    def __matmul__(self, other):
        out = Tensor(self.data @ other.data, (self, other), op='@')
        return out
    
    def __sub__(self, other):
        out = self + (-other)
        return out
    
    def __neg__(self):
        return self * -1
    
    def __truediv__(self, other):
        out = self * (other**-1)
        return out

In [43]:
inp = Tensor(np.ones((4,5),dtype=np.float32)*2)
w = Tensor(np.ones((4,5),dtype=np.float32)*3, requires_grad=True)
bi = Tensor(np.ones((1,5),dtype=np.float32)*4, requires_grad=True)

In [44]:
p = inp + bi

In [45]:
p

array([[6., 6., 6., 6., 6.],
       [6., 6., 6., 6., 6.],
       [6., 6., 6., 6., 6.],
       [6., 6., 6., 6., 6.]], dtype=float32)

In [46]:
p.backward()

In [47]:
p.grad

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]], dtype=float32)

In [48]:
o.grad

0

In [49]:
bi.grad

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]], dtype=float32)

In [50]:
w.grad

0

In [51]:
inp.grad

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]], dtype=float32)

In [10]:
import torch

In [53]:
a = torch.tensor(np.ones((4,5))*2., requires_grad=True)
b = torch.tensor(np.ones((4,5))*3., requires_grad=True)
d = torch.tensor(np.ones((1,5))*4., requires_grad=True)

In [54]:
e = a + d

In [55]:
e

tensor([[6., 6., 6., 6., 6.],
        [6., 6., 6., 6., 6.],
        [6., 6., 6., 6., 6.],
        [6., 6., 6., 6., 6.]], dtype=torch.float64, grad_fn=<AddBackward0>)

In [56]:
e.retain_grad()
c.retain_grad()

In [57]:
e.backward(torch.ones_like(e))

In [58]:
e.grad

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], dtype=torch.float64)

In [59]:
d.grad

tensor([[4., 4., 4., 4., 4.]], dtype=torch.float64)

In [60]:
c.grad

tensor([[300., 300., 300., 300., 300.],
        [300., 300., 300., 300., 300.],
        [300., 300., 300., 300., 300.],
        [300., 300., 300., 300., 300.]], dtype=torch.float64)

In [61]:
b.grad

In [62]:
a.grad

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], dtype=torch.float64)