1/14/20; 1/16/20

* everyone should know their children
* include tests

In [100]:
import autograd.numpy as np 
from autograd import grad 
from autograd import elementwise_grad

In [60]:
"""
everything in PieTorch is a Tensor object.

a Tensor object may contain an op. it contains an op when 
the Tensor is created from an operation, for example c = a + b. in 
this case, A and B are Tensors, and the addition operation creates
C, a Tensor. C contains the addition op which allows it to compute
partial derivatives dA/dC and dB/dC.

a Tensor object knows its children. if A + B = C, then C is A's 
child. we keep track of children because backprogation starts at 
a leaf node, and the actual gradient for an update (dLoss/dX) must
be accumulated by traversing through its children.
"""

class Tensor(object):
    def __init__(self, val=0, parents=[], forward=None, 
                 name=None, op=None, terminal=True):
        
        self.val = float(val)
        self.parents = parents  # list of Tensor objects
        self.forward = forward  # forward function
        self.grad = 0  # the value of its own gradient
        self.name = name
        self.op = op
        self.terminal = terminal
        self.children = []  # list of Tensor objects
        
        # when Tensor is instantiated as result of an operation, it will have parents
        if len(self.parents) > 0:
            self._update_parent()
        
    def _update_parent(self):
        """
        if a Tensor is instantiated with parents, then it will also
        update its parents, so its parents know its child.
        """
        for parent in self.parents:
            parent.children.append(self)
        
    def backward(self):
        """
        update its parents by their corresponding gradients.
        """
        if self.terminal is True:
            pass
        else:
            # compute gradients for its parents
            gradients = self.op.backward()  # returns a list of gradients
            
            # now update each parent with its corresponding gradient
            for i in range(len(self.parents)):
                self.parents[i].grad = gradients[i]
                
            # recursively call backward() on its parents
            for T in self.parents:
                T.backward()
                
    def _update(self):
        """called by optimizer."""
        self.val = self.val - self.grad

In [61]:
"""
Add is an operation. when Add is called, an Adder object is 
created and used to instantiate a Tensor object, which is 
then returned to caller.
"""
class ADD(object):
    def __new__(self, x, y):  # https://stackoverflow.com/questions/53485171/how-to-return-objects-straight-after-instantiating-classes-in-python
        op = _ADDER(x, y)
        return Tensor(val=op.forward(), parents=[x, y], op=op, terminal=False)
        
"""
the ADDER object will contain methods/attributes for the ADD
operation. certain ADDER attributes and methods will be used
to instantiate a TENSOR object. it is this TENSOR object that
is subsequently returned.

we used the adder object to organize node state such as 
gradient. it is not necessary but it makes the code more 
readable.
"""
class _ADDER(object):
    def __init__(self, x, y):
        self.x = float(x.val)
        self.y = float(y.val)
        
    def forward(self):
        return self.x + self.y

    def f(self, x, y):
        return x + y

    def backward(self):
        # return a list of gradients
        self.df_dx = elementwise_grad(self.f, 0)
        self.df_dy = elementwise_grad(self.f, 1)
        return [self.df_dx(self.x, self.y), self.df_dy(self.x, self.y)]

class MULTIPLY(object):
    def __new__(self, x, y):
        """
        x and y are Tensor objects.
        """
        op = _MULTIPLIER(x, y)  # bundle everything into a multiplier object, to be passed
        return Tensor(val=op.forward(), parents=[x, y], op=op, terminal=False)
        
class _MULTIPLIER(object):
    def __init__(self, x, y):
        self.x = float(x.val)
        self.y = float(y.val)
        
    def forward(self):
        return self.x * self.y

    def f(self, a, b):
        return a * b

    def backward(self):
        # partial derivatives https://github.com/HIPS/autograd/issues/437
        self.df_dx = elementwise_grad(self.f, 0)
        self.df_dy = elementwise_grad(self.f, 1)
        return [self.df_dx(self.x, self.y), self.df_dy(self.x, self.y)]

In [62]:
X = Tensor(val=-2, name="X")
Y = Tensor(val=5, name="Y")
Q = ADD(X, Y)
Z = Tensor(val=-4, name="Z")
F = MULTIPLY(Q, Z)

updating parents:  3.0
updating parents:  -12.0


In [69]:
X.children[0].val

3.0

In [70]:
Z.children

[<__main__.Tensor at 0x10f2ad550>]

In [65]:
F.children

[]

In [66]:
F.val

-12.0

In [848]:
F.backward()

In [849]:
X.grad, Y.grad, Q.grad, Z.grad, F.grad

(array(1.), array(1.), -4.0, 3.0, 0)

In [852]:
class OPTIMIZER():
    
    def __init__(self):
        
        # find all Tensors which will be updated
        # https://stackoverflow.com/questions/633127/viewing-all-defined-variables
        self.ls_tensors = [v for k, v in globals().copy().items() if type(v) is TENSOR]
        
    def step(self):
        for T in self.ls_tensors:
            
            # if T is a terminal node, that means it is a learnable weight 
            if T.terminal:
                T._update()
            else:
                pass

In [853]:
O = OPTIMIZER()
O.step()

In [None]:
X.val, Y.val, Q.val, Z.grad, F.grad