In [1]:
"""try to use nbdev @jeremyhoward to write scripts"""
import numpy as np

In [2]:
from lib import layer_init

class Tensor:
    def __init__(self,child=None,h=1,w=1,weight=None,trainable=True,grad=None):
        if weight is None:
            weight = layer_init(h,w)
        self.weight = weight
        self.trainable = trainable
        self.grad = grad
        self.child = child

    def __call__(self,child):
        self.child = child

a = Tensor(weight=1.)
b = Tensor()
b(a)
a, b, b.child, a.weight

(<__main__.Tensor at 0x215040a0e48>,
 <__main__.Tensor at 0x215040a0f08>,
 <__main__.Tensor at 0x215040a0e48>,
 1.0)

In [3]:
class Act(Tensor):
    def __init__(self):
        super().__init__()
    
    def forward(self,x):
        raise NotImplementedError
    
    def backward(self,bpass):
        if self.child is not None:
            # backprop without assigning variable explicitly
            self.child.backward(np.multiply(self.grad,bpass))

In [4]:
class relu(Act):
    def __init__(self):
        super().__init__()
    
    def forward(self,x):
        out = np.maximum(x,0)
        self.grad = (out > 0).astype(np.float32)
        return out

x = np.arange(-1.,1.,0.5)
print("x: ",x)
xp = relu()
xp.forward(x), xp.grad

x:  [-1.  -0.5  0.   0.5]


(array([0. , 0. , 0. , 0.5]), array([0., 0., 0., 1.], dtype=float32))

In [5]:
class sigmoid(Act):
    def __init__(self):
        super().__init__()
    
    def forward(self,x):
        S = np.array(list(map(lambda x: 1/(1+np.exp(-x)), x)))
        self.grad = np.multiply(S, (1-S))
        return S

xpp = sigmoid()
xpp.forward(x), xpp.grad

(array([0.26894142, 0.37754067, 0.5       , 0.62245933]),
 array([0.19661193, 0.23500371, 0.25      , 0.23500371]))

In [6]:
class Layer(Tensor):
    def __init__(self,child=None):
        super().__init__()
        self.fpass = None
        # can refactor it as list
        self.child = child 

    def forward(self,x):
        raise NotImplementedError
    
    def backward(self,grad):
        raise NotImplementedError

c = Layer()
d = Layer()
d(c)
assert c == d.child
print(c.weight, c.trainable, c.grad)

[[0.86561971]] True None


In [7]:
class Linear(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self,x):
        self.fpass = x
        return x @ self.weight

    def backward(self,bpass):
        self.grad = self.fpass.T @ bpass
        # for backprop, not used yet
        # "child" not a list yet
        if self.child is not None:
            self.child.backward(bpass @ (self.weight.T))

a = Linear()
x = np.array([[0.5]])
xg = np.array([[0.1]])
a.weight,a.child, a.trainable, a.grad, a.forward(x), a.backward(xg), a.grad

(array([[0.4330174]]),
 None,
 True,
 None,
 array([[0.2165087]]),
 None,
 array([[0.05]]))

In [8]:
"""
if we declare:
    fn = Loss().mse

can we visited the Loss() object of "fn"
 to call "backward()" directly
"""

class Loss:
    """Take care of dimension problem (batch, input-D)"""
    def __init__(self,last_layer=None):
        self.grad = None
        self.child = last_layer
    
    def mse(self,y,yhat):
        loss = np.square(np.subtract(y,yhat))
        grad = np.multiply(2.,np.subtract(y,yhat))
        # for backprop
        self.grad = grad

        return loss.sum()
        # grad is negligible since we 
        #  have saved it in .grad
        #return loss.sum(), grad.mean()

    def backward(self):
        if self.grad is None:
            print("loss function hasn't been called yet")
            raise NotImplementedError
        if self.child is not None:
            self.child.backward(self.grad)

xx = np.array([0,1,2,3,4])
yy = xx + 1
Loss().mse(xx,yy)

5

In [9]:
# target
xt = np.array([[0.8]])

# establish layer
a = Linear()
loss = Loss(last_layer=a)
print(a.weight, a.grad)
out = a.forward(x)
print(out)
print(loss.mse(xt,out))
loss.backward()
print(a.weight, a.grad)
loss.grad

[[0.35782659]] None
[[0.17891329]]
0.385748695986333
[[0.35782659]] [[0.62108671]]


array([[1.24217341]])

In [11]:
class Optimizer:
    def __init__(self,model=[],lr=1e-4):
        self.lr = lr
        self.model = model
    
    # now, sequential only
    def sgd(self):
        for layer in self.model:
            layer.weight -= self.lr * layer.grad


print(a.weight, a.grad)
optim = Optimizer([a]).sgd
optim()
print(a.weight)

[[0.35782659]] [[0.62108671]]
[[0.35776448]]
