In [1]:
from itertools import accumulate
import numpy as np
import matplotlib.pyplot as plt

class Tensor():
    def __init__(self, value, requires_grad=True):
        self.value = np.array(value, dtype=float)
        self.requires_grad = requires_grad

        if requires_grad:
            self.grad = np.zeros(self.value.shape)
        
    def backwards(self, accumulated_gradient=None):
        if self.requires_grad:
            self.grad += accumulated_gradient

    def __add__(self, b):
        return Add(self, b)

    def __sub__(self, b):
        return Add(self, Neg(b))

    def __mul__(self, b):
        return Mul(self, b)

    def __truediv__(self, b):
        return Mul(self,  b) #** Tensor(-1)

    def __pow__(self, b):
        return Pow(self, b)

    def __matmul__(self, b):
        return Dot(self, b)

    def __gt__(self, b):
        return Tensor(self.value > b.value, requires_grad=False)

class Add(Tensor):
    def __init__(self, a, b):
        self.a, self.b = a, b
        self.value = a.value + b.value
        self.grad = np.zeros(self.value.shape)

    def backwards(self, accumulated_gradient=None):
        self.a.backwards(accumulated_gradient)
        self.b.backwards(accumulated_gradient)

class Neg(Tensor):
    def __init__(self, a):
        self.a = a
        self.value = -a.value
        self.grad = np.zeros(self.value.shape)
    
    def backwards(self, accumulated_gradient=None):
        self.a.backwards(-accumulated_gradient)

class Mul(Tensor):
    def __init__(self, a, b):
        self.a, self.b = a, b
        self.value = a.value * b.value
        self.grad = np.zeros(self.value.shape)

    def backwards(self, accumulated_gradient=None):
        self.a.backwards(self.b.value * accumulated_gradient)
        self.b.backwards(self.a.value * accumulated_gradient)

class Pow(Tensor):
    def __init__(self, a, b):
        self.a, self.b = a, b
        self.value = a.value ** b.value
        self.grad = np.zeros(self.value.shape)

    def backwards(self, accumulated_gradient=None):
        self.a.backwards(self.b.value * self.a.value ** (self.b.value-1) * accumulated_gradient)
        self.b.backwards(np.log(self.a.value) * self.a.value ** self.b.value * accumulated_gradient)

class Dot(Tensor):
    def __init__(self, a, b):
        self.a, self.b = a, b
        self.value = a.value @ b.value
        self.grad = np.zeros(self.value.shape)

    def backwards(self, accumulated_gradient=None):
        self.a.backwards(accumulated_gradient @ self.b.value.T)
        self.b.backwards(self.a.value.T @ accumulated_gradient)        

class Sum(Tensor):
    def __init__(self, a):
        self.a = a
        self.value = sum(a.value)
        self.grad = np.zeros(self.value.shape)

    def backwards(self, accumulated_gradient=None):
        self.a.backwards(np.ones(self.a.shape) * accumulated_gradient)

In [7]:
x = Tensor([[1],[2]])

m1 = Tensor(np.random.rand(3,2))

m2 = Tensor(np.random.rand(1,3))

target = Tensor(60,requires_grad=False)

In [8]:
lr = 0.0001

losses = []
for ix in range(100):
    h = m1 @ x
    h = h * (h > Tensor(0))
    y = m2 @ h

    loss = ((y - target) ** Tensor(2,requires_grad=False))

    loss = loss / Tensor(5, requires_grad=False)

    losses.append(loss.value)

    loss.backwards(np.ones(loss.value.shape))

    for m in [m1,m2]:
        m.value = m.value - lr * m.grad
        m.grad = np.zeros(m.value.shape)

  self.b.backwards(np.log(self.a.value) * self.a.value ** self.b.value * accumulated_gradient)


In [9]:
losses

[array([[17425.28875932]]),
 array([[17196.77262158]]),
 array([[16924.75707715]]),
 array([[16594.97252537]]),
 array([[16191.75107758]]),
 array([[15698.16163379]]),
 array([[15096.62564263]]),
 array([[14370.2112265]]),
 array([[13504.80490521]]),
 array([[12492.26831382]]),
 array([[11334.43438565]]),
 array([[10047.33446672]]),
 array([[8664.42262022]]),
 array([[7237.02639238]]),
 array([[5830.2941866]]),
 array([[4514.04734908]]),
 array([[3350.1893752]]),
 array([[2380.65353056]]),
 array([[1620.52777029]]),
 array([[1058.94400539]]),
 array([[666.61134717]]),
 array([[406.06622139]]),
 array([[240.53568565]]),
 array([[139.23383242]]),
 array([[79.11458107]]),
 array([[44.302765]]),
 array([[24.53032516]]),
 array([[13.46590378]]),
 array([[7.34418466]]),
 array([[3.98599125]]),
 array([[2.15553083]]),
 array([[1.16253217]]),
 array([[0.62573959]]),
 array([[0.33631569]]),
 array([[0.18056498]]),
 array([[0.09686723]]),
 array([[0.05193601]]),
 array([[0.02783401]]),
 array([[

In [146]:
m1.grad

array([[1., 2.],
       [2., 4.],
       [3., 6.]])

In [147]:
m2.grad

array([[ 5., 11., 17.]])

In [150]:
m1.value

array([[1, 2],
       [3, 4],
       [5, 6]])

In [97]:
h.value

array([[ 5],
       [11],
       [17]])

In [87]:
m1.value

array([[1, 2],
       [3, 4],
       [5, 6]])