AutoDiff 

In [0]:
import numpy as np

In [0]:
class lokisor(object):
  def __init__(self,data,parents=None,cop=None):
    self.data = np.array(data)
    self.cop = cop
    self.parents = parents
    self.grad = None


  def backward(self,grad):
    self.grad = grad
    if (self.cop == 'add'):
      self.parents[0].backward(grad)
      self.parents[1].backward(grad)


  def __add__(self,other):
    return lokisor(self.data + other.data,parents=[self,other],cop="add")
  def __repr__(self):
    return str(self.data.__repr__())
  def __str__(self):
    return str(self.data.__str__())


In [0]:
x = lokisor([1,2,3,4,5])
y = lokisor([2,2,2,2,2])

In [14]:
print(x.grad)

None


new

In [0]:
 class lokisor:

    def __init__(self, data, requires_grad=False):
        self.data = data

        if not isinstance(data, np.ndarray):
            self.data = np.array(data)
        
        # whether to run backpropagation or not
        self.requires_grad = requires_grad
        
        # tensor gradient
        self._grad = None
        
        # operation if this tensor was used in it
        self._grad_fn = None

    @property
    def shape(self):
        return self.data.shape

    @property
    def grad_fn(self):
        if not self.requires_grad:
            raise Exception('This tensor is not backpropagated')
        return self._grad_fn

    @property
    def grad(self):
        return self._grad

    def backward(self, grad=None):
        if not self.grad_fn:
            return False

        if grad is None and self._grad is None:

            # in case if this is last loss tensor
            grad = self.__class__(1., requires_grad=False)

        elif self.grad is not None:
            grad = self._grad

        if not self.requires_grad:
            raise Exception('This tensor is not backpropagated')

        self.grad_fn.backward(grad)
        return True


    def __str__(self):
        return f'lokisor({str(self.data)})'


    def add_grad(self, grad):
        if self._grad is None:
            self._grad = grad
        else:
            self._grad += grad


    def __add__(self, o): 
        if self.data is not None:
            self.data += o.data  
            return self
        self.data = o.data 
        return self


class Op:

    def forward(self):
        raise NotImplemented

    def backward(self, grad):
        raise NotImplemented

    def __call__(self, *args):

        self.out = self.forward(*args)
        self.out._grad_fn = self
        return self.out



class AddOp(Op):

    def forward(self, x: lokisor, y: lokisor):
        self.x = x
        self.y = y
        requires_grad = x.requires_grad or y.requires_grad
        return lokisor(x.data + y.data, requires_grad=requires_grad)


    def backward(self, grad):
        if self.x.requires_grad:
            if self.x.shape != grad.shape:
                axis = np.argmax(np.abs(np.array(self.x.shape) - 
                                 np.array(grad.shape)))
                self.x.add_grad(Tensor(grad.data.sum(axis=axis, 
                                                keepdims=True)))
            else:
                self.x.add_grad(grad)
            if self.x.grad_fn:
                self.x.backward()
        if self.y.requires_grad:
            if self.y.shape != grad.shape:
                axis = np.argmax(np.abs(np.array(self.y.shape) - 
                                        np.array(grad.shape)))
                self.y.add_grad(Tensor(grad.data.sum(axis=axis, 
                                                keepdims=True)))
            else:
                self.y.add_grad(grad)
            if self.y.grad_fn:
                self.y.backward()


class MulOp(Op):

    def forward(self, x: lokisor, y: lokisor):
        self.x = x
        self.y = y
        requires_grad = x.requires_grad or y.requires_grad
        return Tensor(x.data * y.data, requires_grad=requires_grad)


    def backward(self, grad):
        if self.x.requires_grad:
            print(self.x, self.x._grad, grad)
            self.x.add_grad(lokisor(grad.data * self.y.data, False))
            if self.x.grad_fn:
                self.x.backward()

        if self.y.requires_grad:
            self.y.add_grad(lokisor(grad.data * self.x.data, False))
            if self.y.grad_fn:
                self.y.backward()

In [0]:
class Layer:

    def forward(self):
        raise NotImplemented

    def backward(self, grad):
        raise NotImplemented

    def __call__(self, *args):
        return self.forward(*args)

class Sigmoid:

    def forward(self,x):
        self.x = x   
        return 1/(1+np.exp(-x))

    def backward(self, grad):
        grad_input = self.x*(1-self.x) * grad
        return grad_input

class Relu(Layer):

    def forward(self,x):
        self.x = x
        return np.maximum(np.zeros_like(x), x)

    def backward(self, grad):
        grad_input = (self.x > 0) * grad
        return grad_input

class SoftmaxCrossentropyWithLogits(Layer):

    def forward(self, x, y):
        self.x = x
        self.y = y

        exps = np.exp(x) 
        self.softmax = exps / np.sum(exps, axis=-1, keepdims=True)

        logits = self.softmax[np.arange(x.shape[0]),y]
        log_likelihood = -np.log(logits)
        loss = np.sum(log_likelihood) / x.shape[0]
        return loss

    def backward(self, grad=None):
        batch = self.x.shape[0]
        grad = self.softmax
        grad[np.arange(batch),self.y] -= 1
        grad = grad/batch
        return grad

class MSE(Layer):

    def forward(self, x, y):
        self.x = x
        self.y = y
        return ((x - y)**2) / (self.x.shape[0]*2)

    def backward(self, grad=None):
        # 1/2n * Sum(xi-yi)**2 
        # dx = 1/2n * Sum( x**2 -2*x*y + y**2) 
        # dx  = (2x - 2y) / 2*n = (x - y) / n
        return (self.x - self.y) / self.x.shape[0]

class Linear(Layer):

    def __init__(self, input, output, lr=0.0001):
        self.A = 2*np.random.random((input, output)) - 1
        self.b = 2*np.random.random((output)) - 1
        self.lr = lr

    def forward(self, x):
        self.x = x
        return np.dot(x,self.A) + self.b

    def backward(self, grad):
        b_grad = grad.mean(axis=0)*self.x.shape[0]
        A_grad = np.dot(self.x.T, grad)
        grad_input = np.dot(grad, self.A.T)

        self.A -= A_grad * self.lr
        self.b -= b_grad * self.lr

        return grad_input

In [17]:
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

class Model(Layer):

    def __init__(self, lr=0.00001):
        self.lr = lr
        self.layers = [
            Linear(784,100, lr=self.lr),
            Relu(),
            Linear(100,200, lr=self.lr),
            Relu(),
            Linear(200,10, lr=self.lr)        
        ]

    def forward(self,x):
        for l in self.layers:
            x = l(x)
        return x

    def backward(self, grad):
        for l in self.layers[::-1]:
            grad = l.backward(grad)

        return grad


simple = transforms.Compose([
    transforms.ToTensor(), # converts to [0,1] interval
])

ds = MNIST('./mnist', download=True, transform=simple)

ld = DataLoader(ds, batch_size=2, pin_memory=True, drop_last=True) 

mm = Model()

loss = SoftmaxCrossentropyWithLogits()

_loss_avg = 0 
for e in range(5):
    for i, (img, label) in enumerate(ld):
        x = img.view(2,-1).numpy()

        res = mm(x)

        _loss = loss(res, label.numpy())
        
        _loss_avg += _loss.mean()

        grad = loss.backward(1)

        mm.backward(grad)

        if i % 100 == 0:
            print(_loss_avg/100)
            _loss_avg = 0
            print('---------')
            
for i in range(10):

    img, target = ds[i]
    plt.imshow(img[0])

    plt.show()
    x = img.view(1,-1).numpy()

    res = mm(x)[0]
    pred = np.argmax(res)

    print(f'target: {target} predicted: {pred}' )

2.241831414161849
---------
149.66781099833986
---------
138.19709332459192
---------
130.19148339469066
---------
123.23586569367781
---------
117.82987674880738
---------
97.35586322584828
---------
116.90218699753723
---------
100.33794616210352
---------
101.22223727448495
---------
106.619960539918
---------
98.03499189609421
---------
99.13332963158018
---------
91.0437628763171
---------
101.49626194061375
---------
83.9015343571083
---------
97.56024870978277
---------
74.47854672190226
---------
86.03128405314733
---------
94.66531721470223
---------
83.36964534939166
---------
84.0211422564638
---------
81.68508505261016
---------
84.86909358183104
---------
86.3617672588303
---------
70.50233392564702
---------
88.96857629514145
---------
67.83448909568676
---------
72.69358263649218
---------
79.09924825445525
---------
73.57393911792644
---------
74.84857368217435
---------
70.8995036260008
---------
72.37660860512881
---------
68.32987695850483
---------
68.18927231423113

KeyboardInterrupt: ignored