## 1. Memory management
    - reference counting
    - generation garbage collection
    
### reference counting

In [1]:
class obj:
    pass

def f(x):
    print(x)
    
# rc = reference count
a = obj() # rc : 1
f(a) # rc : 2
# function done rc : 1

a = None # clear allocate rc : 0

<__main__.obj object at 0x000002ACF9191A88>


In [2]:
a = obj()
b = obj()
c = obj()

a.b = b
b.c = c

# a.rc : 1
# b.rc : 2
# c.rc : 2

a = b = c = None

# a.rc : 0
# b.rc : 1
# c.rc : 1

# eventually all gone

But reference counting cannot handle circular reference

In [3]:
a = obj()
b = obj()
c = obj()

a.b = b
b.c = c
c.a = a

# a.rc : 2
# b.rc : 2
# c.rc : 2

a = b = c = None

# a.rc : 1
# b.rc : 1
# c.rc : 1

In [4]:
%whos

Variable   Type        Data/Info
--------------------------------
a          NoneType    None
autopep8   module      <module 'autopep8' from '<...>e-packages\\autopep8.py'>
b          NoneType    None
c          NoneType    None
f          function    <function f at 0x000002ACF908DF78>
json       module      <module 'json' from 'C:\\<...>\lib\\json\\__init__.py'>
obj        type        <class '__main__.obj'>


Actually **generation garbage collection** is needed because the deep learning framework has circular reference. Variable and Function class reference each other!

## `weakref` module

**weak reference** is reference that doens't increase the reference count number

In [6]:
import weakref
import numpy as np

a = np.array([1, 2, 3])
b = weakref.ref(a)

b

<weakref at 0x000002ACF9198F48; to 'numpy.ndarray' at 0x000002ACF9198B20>

In [7]:
b()

array([1, 2, 3])

In [12]:
a = None

# At ipython kernel it will come out dead!
# But at jupyter notebook there is hidden reference so it won't be dead!
b

<weakref at 0x000002ACF9198F48; to 'numpy.ndarray' at 0x000002ACF9198B20>

**Now let's apply weakref to the `Variable` and `Function`!**

In [13]:
import weakref

class Function:
    def __call__(self, *inputs):
        
        # save the input variable
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # generation
        self.generation = max([x.generation for x in inputs])
        
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = [weakref.ref(output) for output in outputs]
        
        # if outputs list element is not 1 then return the first element
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError()
        
    def backward(self, gys):
        raise NotImplementedError()

In [14]:
class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)

    def clear_grad(self):
        self.grad = None

## Test

In [18]:
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
def square(x):
    return Square()(x)

In [19]:
for i in range(10):
    x = Variable(np.random.randn(10000))
    y = square(square(square(x)))