# Operation code

In [1]:
from typing import Tuple, Union, List

import torch
from torch import Tensor

import numpy as np
from numpy import array

In [2]:
class Operation(object):

    def __init__(self):
        pass

    def forward(self, input_) -> Tuple[Tensor]:
        
        self.input_ = input_

        self.outputs = self._outputs()

        return self.outputs

    def backward(self, output_grad) -> Tuple[Tensor]:

        assert_same_shape(self.output, output_grad)

        self._input_grads(output_grad)

        assert_same_shape(self.input_, self.input_grad)

        return self.input_grads

    def _outputs(self) -> Tuple[Tensor]:
        raise NotImplementedError()

    def _input_grads(self, output_grad) -> Tuple[Tensor]:
        raise NotImplementedError()


class ParamOperation(Operation):

    def __init__(self, param: Tensor) -> Tensor:
        super().__init__()
        self.param = param

    def backward(self, output_grad) -> Tuple[Tensor]:

        assert_same_shapes(self.output, output_grad)

        self.input_grads = self._input_grads(output_grad)
        self.param_grad = self._param_grad(output_grad)

        assert_same_shapes(self.inputs, self.input_grads)

        return self.input_grads

    def _param_grad(self, output_grad) -> Tensor:
        raise NotImplementedError()

In [3]:
class Operation(object):

    def __init__(self,
                 n_in: int = 2,
                 n_out: int = 1):
        self.n_in = n_in
        self.n_out = n_out

    def forward(self, 
                inputs: Tuple[Tensor]) -> Tuple[Tensor]:
        
        assert len(inputs) == n_in
        
        self.inputs = inputs

        self.outputs = self._outputs()

        assert len(self.outputs) == n_out
        
        return self.outputs

    def backward(self, 
                 output_grads: Tuple[Tensor]) -> Tuple[Tensor]:

        assert_same_shapes(self.outputs, output_grads)

        self._input_grads(output_grads)

        assert_same_shapes(self.inputs, self.input_grads)

        return self.input_grads

    def _outputs(self) -> Tuple[Tensor]:
        raise NotImplementedError()

    def _input_grads(self,
                     output_grads: Tuple[Tensor]) -> Tuple[Tensor]:
        raise NotImplementedError()

In [4]:
class Mul(Operation):

    def __init__(self,
                 n_in: int = 1,
                 n_out: int = 1):
        self.n_in = n_in
        self.n_out = n_out

    def forward(self, 
                inputs: Tuple[Tensor]) -> Tuple[Tensor]:
        
        assert len(inputs) == n_in
        
        self.inputs = inputs

        self.outputs = self._outputs()

        assert len(self.outputs) == n_out
        
        return self.outputs

    def backward(self, 
                 output_grads: Tuple[Tensor]) -> Tuple[Tensor]:

        assert_same_shapes(self.outputs, output_grads)

        self._input_grads(output_grads)

        assert_same_shapes(self.inputs, self.input_grads)

        return self.input_grads

    def _outputs(self) -> Tuple[Tensor]:
        raise NotImplementedError()

    def _input_grads(self,
                     output_grads: Tuple[Tensor]) -> Tuple[Tensor]:
        raise NotImplementedError()

In [5]:
a = Tensor([1,2,3,4])
b = Tensor([2,3,4,5])

In [6]:
op1 = Add(2, 1)

NameError: name 'Add' is not defined

In [7]:
c = (a,b)

In [8]:
others = tuple(x for x in c if x != a)

RuntimeError: bool value of Tensor with more than one value is ambiguous

In [9]:
# without operations
torch.manual_seed(122418)
a1 = torch.randn(3,3)
w1 = torch.randn(3,3)

a2 = torch.randn(3,3)
w2 = torch.randn(3,3)

b1 = torch.mm(a1, w1)
b2 = torch.mm(a2, w2)

w3 = torch.randn(3,3)

c1 = b1 + b2
L = b2 * c1 * w3

In [10]:
L

tensor([[ 5.7283e+00, -3.1867e+01, -1.7938e-03],
        [-4.6408e-01, -7.3601e-01, -7.7959e-01],
        [ 1.2042e+00,  3.0170e+00, -2.3690e-01]])

#### with operations

```python
a1 = torch.randn(3,3)
w1 = torch.randn(3,3)

a2 = torch.randn(3,3)
w2 = torch.randn(3,3)

w3 = torch.randn(3,3)

# operations
wm1 = WeightMultiply(w1)
wm2 = WeightMultiply(w2)
add2 = Add(2, 1)
mult3 = Mult(3, 1)

b1 = wm1.forward(a1)
b2 = wm2.forward(a2)
c1 = add2.forward((b1, b2))
L = mult3.forward((c1, b2, w3))
```

#### backward

```python
c1_grad, b2_grad_1, w3_grad = mult3.backward((L_grad))
 
b1_grad, b2_grad_2 = add2.backward(c1_grad)

# combine these gradients to reflect the fact that b2 is used twice on the
# forward pass
b2_grad = b2_grad_1 + b2_grad_2

a2_grad = wm2.backward(b2_grad)

a1_grad = wm1.backward(b1_grad)
```

## Autograd

In [11]:
a = 3
a.__add__(4)

7

In [12]:
np.array(3)

array(3)

In [13]:
c = array(3)
c

array(3)

In [14]:
a = array([3,3])
print("Addition using '__add__':", a.__add__(4))
print("Addition using '+':", a + 4)

Addition using '__add__': [7 7]
Addition using '+': [7 7]


In [15]:
def tensor_sum(t: Tensor) -> Tensor:

    def _forward(t: Tensor):
        return t.data.sum()

    def t_grad(grad: np.ndarray) -> np.ndarray:
        return grad * np.ones_like(t.data)

    data = _forward(t)
    depends_on = [
        Dependency(t, t_grad),
    ]

    return Tensor(data, depends_on)

In [16]:
Addable = Union[float, int]

Numberable = Union[Addable, float, int]

def ensure_Number(num: Numberable):
    if isinstance(num, NumberWithGrad):
        return num
    else:
        return NumberWithGrad(num)        

class NumberWithGrad(object):
    
    def __init__(self, 
                 num: Addable,
                 depends_on: List[Addable] = None,
                 creation_op: str = ''):
        self.num = num
        self.grad = None
        self.depends_on = depends_on or []
        self.creation_op = creation_op

    def __add__(self, 
                other: Numberable):
        return NumberWithGrad(self.num + ensure_Number(other).num,
                              depends_on = [self, ensure_Number(other)],
                              creation_op = 'add')
    
    def __mul__(self,
                other: Numberable = None):

        return NumberWithGrad(self.num * ensure_Number(other).num,
                              depends_on = [self, ensure_Number(other)],
                              creation_op = 'mul')
    def sum(self):

        return special_sum(self)
    
    def backward(self, backward_grad: Addable = None):
        if backward_grad is None: # first time calling backward
            self.grad = 1
        else: 
            if self.grad is None:
                self.grad = backward_grad
            else:
                self.grad += backward_grad
        
        if(self.creation_op == "add"):
            self.depends_on[0].backward(self.grad)
            self.depends_on[1].backward(self.grad)    

        if(self.creation_op == "mul"):
            
            new = self.depends_on[1] * self.grad
            self.depends_on[0].backward(new.num)
            new = self.depends_on[0] * self.grad
            self.depends_on[1].backward(new.num)

In [17]:
a = NumberWithGrad(3)

In [20]:
b = a * 4
c = b + 3
d = (a + 2)
e = c * d 
e.backward()

In [21]:
a.grad

70

$$ d = (4a + 3) * (a + 2) = 4a^2 + 11a + 6 $$
$$ \frac{\partial d}{\partial a} = 8a + 11 $$

In [11]:
def forward(num: int):
    a = NumberWithGrad(num)
    b = a * 4
    c = b + 3
    return c * (a + 2)

In [12]:
(forward(3.01) - forward(2.99)) / 0.02

34.99999999999943

In [29]:
class Tensor:

    def __init__(self,
                 data: np.ndarray,
                 depends_on: List[Dependency] = None):
    
        self.data = data
        self.depends_on = depends_on or []
        self.shape = self.data.shape

    def __mul__(self, other: Tensorable) -> 'Tensor':
        return _mul(self, ensure_tensor(other))

    def __rmul__(self, other: Tensorable) -> 'Tensor':
        return _mul(ensure_tensor(other), self)
    
    def sum(self) -> 'Tensor':
        return tensor_sum(self)
    
def _mul(t1: Tensor, t2: Tensor) -> Tensor:

    # assertion specific to this function
    assert t1.shape == t2.shape

    # _forward method, defined for each function like this
    def _forward(t1: Tensor, t2: Tensor) -> np.ndarray:
        return t1.data * t2.data

    # gradient method for the first Tensor
    def t1_grad(grad: np.ndarray) -> np.ndarray:

        # assertion specific to the "_mul" method
        assert grad.shape == t2.shape
        grad = grad * t2.data
        return grad

    def t2_grad(grad: np.ndarray) -> np.ndarray:

        # assertion specific to the "_mul" method
        assert grad.shape == t1.shape
        grad = grad * t1.data
        return grad

    # compute the output and define the list of dependencies
    data = _forward(t1, t2)
    depends_on = [
        Dependency(t1, t1_grad),
        Dependency(t2, t2_grad)
    ]

    # return a Tensor based on these
    return Tensor(data, depends_on)
   
    
def _matmul(t1: Tensor, t2: Tensor) -> Tensor:

    assert t1.shape[1] == t2.shape[0]

    def _forward(t1: Tensor, t2: Tensor) -> np.ndarray:
        return t1.data @ t2.data

    def t1_grad(grad: np.ndarray) -> np.ndarray:
        grad = grad @ t2.data.T

        return grad

    def t2_grad(grad: np.ndarray) -> np.ndarray:
        grad = t1.data.T @ grad

        return grad

    data = _forward(t1, t2)
    depends_on = [
        Dependency(t1, t1_grad),
        Dependency(t2, t2_grad)
    ]
    return Tensor(data, depends_on)

In [133]:
from lincoln.autograd.tensor import Tensor

In [134]:
a = Tensor([[1,1], [1,1]])

In [135]:
b = a + 2

In [136]:
b = b * b * 3

In [137]:
c = b.sum()

In [138]:
c.backward()

In [139]:
a.grad

Tensor([[18. 18.]
 [18. 18.]])

In [140]:
from torch import Tensor

In [141]:
x = torch.ones(2, 2, requires_grad=True)

In [142]:
y = x + 2

In [143]:
y = y * y * 3
out = y.mean()

In [144]:
out

tensor(27., grad_fn=<MeanBackward1>)

In [145]:
out.backward()

In [146]:
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])