In [1]:
import weakref

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Function:
    def __call__(self, *inputs):
        
        # save the input variable
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # generation
        self.generation = max([x.generation for x in inputs])
        
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = [weakref.ref(output) for output in outputs]
        
        # if outputs list element is not 1 then return the first element
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError()
        
    def backward(self, gys):
        raise NotImplementedError()

In [2]:
class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)

    def clear_grad(self):
        self.grad = None

## Test

In [3]:
class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return y
    
    def backward(self, gy):
        return gy, gy
    
class Exp(Function):
    def forward(self, x):
        return np.exp(x)
    
    def backward(self, gy):
        x = self.input.data
        gx = np.exp(x) * gy
        return gx

    
def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

def exp(x):
    return Exp()(x)


def numerical_diff(f, x, eps=1e-4):
    x0 = Variable(x.data - eps)
    x1 = Variable(x.data + eps)
    y0 = f(x0)
    y1 = f(x1)
    return (y1.data - y0.data) / (2 * eps)

# Memory save mode

In [4]:
x0 = Variable(np.array(1.0))
x1 = Variable(np.array(1.0))

t = add(x0, x1)
y = add(x0, t)
y.backward()

print(y.grad, t.grad)
print(x0.grad, x1.grad)

1.0 1.0
2.0 1.0


We only need `x0` and `x1` gradient.<br>
We don't need `t` and `y` gradient.<br>

Let's make the `Variable` class be able to only save the needed gradient!

In [5]:
class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)
        
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None
            

    def clear_grad(self):
        self.grad = None

In [7]:
x0 = Variable(np.array(1.0))
x1 = Variable(np.array(1.0))

t = add(x0, x1)
y = add(x0, t)
y.backward()

print(y.grad, t.grad)
print(x0.grad, x1.grad)

None None
2.0 1.0


# Configuration

`Config` class will be used without instance

In [66]:
class Config:
    enable_backprop = True

In [55]:
class Function:
    def __call__(self, *inputs):
        
        # save the input variable
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        if Config.enable_backprop:
            self.generation = max([x.generation for x in inputs])

            for output in outputs:
                output.set_creator(self)

            self.inputs = inputs
            self.outputs = [weakref.ref(output) for output in outputs]
        
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError()
        
    def backward(self, gys):
        raise NotImplementedError()
        
        
class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
def square(x):
    return Square()(x)

## Test

In [56]:
Config.enable_backprop = True
x = Variable(np.ones((100, 100, 100)))
y = square(square(square(x)))
y.backward()
print(x.grad.shape)

Config.enable_backprop = False
x = Variable(np.ones((100, 100, 100)))
y = square(square(square(x)))
print(x.grad)

(100, 100, 100)
None


# Mode change with `with`

In [57]:
f = open('sample.txt', 'w')
f.write('hello world')
f.close

<function TextIOWrapper.close()>

In [58]:
with open('sample.txt', 'w') as f:
    f.write('hello world')

There could be exception inside the `with` and the occured exception sended to the code which runs **yield**. So we should wrap **yield** with `try/finally`

In [59]:
import contextlib

@contextlib.contextmanager
def config_test():
    print('start')
    try:
        yield
    finally:
        print('done')

with config_test():
    print('process...')

start
process...
done


In [60]:
import contextlib

@contextlib.contextmanager
def using_config(name, value):
    old_value = getattr(Config, name)
    setattr(Config, name, value)
    try:
        yield
    finally:
        setattr(Config, name, old_value)

In [62]:
with using_config('enable_backprop', False):
    x = Variable(np.array(2.0))
    y = square(x)
    
    # variable backward is not influenced by Config yet
    # So when we run y.backward() it will cause error

print(x.grad)
print(y.grad)

None
None


In [63]:
with using_config('enable_backprop', True):
    x = Variable(np.array(2.0))
    y = square(x)
    y.backward()

print(x.grad)
print(y.grad)

4.0
None


### make Config usage more simpler

In [69]:
def no_grad():
    return using_config('enable_backprop', False)

In [72]:
with no_grad():
    x = Variable(np.array(2.0))
    y = square(x)
    
print(x.grad)
print(y.grad)

None
None


In [73]:
x = Variable(np.array(2.0))
y = square(x)
y.backward()

print(x.grad)
print(y.grad)

4.0
None
