# Step 18 Memory Saving mode

## 18.1 Deleting Unnecessary Gradient

In [25]:
%run Codes/step17.ipynb

In [26]:
x0=Variable(np.array(1.0))
x1=Variable(np.array(1.0))
t=add(x0,x1)
y=add(x0,t)
y.backward()

print(y.grad, t.grad)
print(x0.grad, x1.grad)

1.0 1.0
2.0 1.0


Gradients of __t__ and __y__ are unnecessary, so we add __retain_grad__ to initialize it to False

In [27]:

import numpy as np
import weakref

class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError('{} is not supported'.format(type(data)))

        self.data = data
        self.grad = None
        self.creator = None
        self.generation=0 #number of generations

    def set_creator(self, func): #everytime the set_creator function is called, +1 to the num. of gen.
        self.creator = func
        self.generation= func.generation+1 #parent_generation+1

    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)

        funcs = []
        seen_set=set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            gys = [output().grad for output in f.outputs] #prev: gys = [output.grad for output in f.outputs] 
            gxs = f.backward(*gys) 
            if not isinstance(gxs, tuple): 
                gxs=(gxs, )
            for x, gx in zip(f.inputs, gxs): 
                if x.grad is None:
                    x.grad=gx
                else:
                    x.grad=x.grad+gx 
            
                if x.creator is not None:
                    add_func(x.creator)
            
            if not retain_grad:
                for y in f.outputs:
                    y().grad=None #y is weakref
    def cleargrad(self):
        self.grad=None

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        self.generation=max([x.generation for x in inputs]) 

        
        for output in outputs:
            output.set_creator(self)
        self.inputs = inputs
        self.outputs = outputs
        self.outputs=[weakref.ref(output) for output in outputs] #added
        return outputs if len(outputs) > 1 else outputs[0]

    def forward(self, xs):
        raise NotImplementedError()

    def backward(self, gys):
        raise NotImplementedError()
        
class Add(Function):
    def forward(self, x0, x1):
        y=x0+x1
        return y
    def backward(self, gy):
        return gy, gy

def add(x0, x1):
    return Add()(x0, x1)    
    
class Square(Function):
    def forward(self, x):
        y=x**2
        return y
    def backward(self, gy):
        x=self.inputs[0].data # prev: x=self.input.data
        gx=2*x*gy
        return gx
    

def square(x):
    return Square()(x)

In [28]:
x0=Variable(np.array(1.0))
x1=Variable(np.array(1.0))
t=add(x0,x1)
y=add(x0,t)
y.backward()

print(y.grad, t.grad)
print(x0.grad, x1.grad)

None None
2.0 1.0


## 18.2 Function class

__inputs__ is used in the back propagation. However, since the inference process only includes feed-forward, the gradient is not necessary in this case. This can also save the memory. 

In [29]:
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        self.generation=max([x.generation for x in inputs])         
        for output in outputs:
            output.set_creator(self)
        self.inputs = inputs
        self.outputs=[weakref.ref(output) for output in outputs] 
        return outputs if len(outputs) > 1 else outputs[0]

## 18.3 Mode Change using Config Class

In [30]:
class Config:
    enable_backprop=True

Backpropagation is activated only when __enable_backprop__ is true.

In [31]:
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        if Config.enable_backprop:
            self.generation=max([x.generation for x in inputs])         
            for output in outputs:
                output.set_creator(self)
            self.inputs = inputs
            self.outputs=[weakref.ref(output) for output in outputs] 
        return outputs if len(outputs) > 1 else outputs[0]

## 18.4 Mode Change

In [32]:
Config.enable_backprop=True
x=Variable(np.ones((100,100,100)))
y=square(square(square(x)))
y.backward()

Config.enable_backprop=False
x=Variable(np.ones((100,100,100)))
y=square(square(square(x)))

## 18.5 Mode Change using __with__

In [39]:
import contextlib

@contextlib.contextmanager #decorator to understand the context
def config_test():
    print('start')
    try:
        yield 
    finally:
        print('done')
with config_test():
    print('process...')


start
process...
done


__name__ is string type, indicating the name of the Config attribute class.<br>
__getattr__ calls the attribute from the Config class.
__setattr__ sets the new value.

In [1]:
    
def using_config(name, value):
    old_value=getattr(Config, name)
    setattr(Config, name, value)
    try:
        yield
    finally:
        setattr(Config, name, old_value)


In [2]:
with using_config('enable_backprop',False):
    x=Variable(np.array(2.0))
    y=square(x)
    

AttributeError: __enter__

In [3]:
def no_grad():
    return using_config('enable_backprop', False)
with no_grad():
    x=Variable(np.array(2.0))
    y=square(x)

AttributeError: __enter__