# 지난 시간 복습
1. 가변 길이 인수(step11의 개선 편)
2. 가변 길이 인수(역전파)
3. 같은 변수 반복 사용

week1~4 복습을 따로 해서 생략

# 이번주에는 총 4가지를 배운다 (step15~18)
1. 복잡한 계산 그래프(이론)
2. 복잡한 계산 그래프(구현)
3. 메모리 관리와 순환 참조
4. 메모리 절약 모드

1,2 는 **_유향그래프의 위상 정렬_** 에 관한 내용  
3,4 는 파이썬 언어의 **_GC_** 에 관해서, 그리고 **_메모리 개선_** 을 구현  
특히 4에서는 with no_grad()와 비슷한 효과를 낼 수 있도록, with구문을 이용하는 방법을 설명  

# Not Clone Coding

In [16]:
import numpy as np
import weakref
from typing import List, Tuple

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Config:
    enable_backprop = False

class Variable:
    def __init__(self, data:np.array):
        if not data:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)}은(는) 지원하지 않습니다.")
        self.data = data
        self.creator = None
        self.grad = None
        self.generation = 0

    def set_creator(self, func):
        self.creator = func
        self.generation = self.creator.generation + 1

    def clear_grad(self):
        self.grad = None

    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)

        funcs = [self.creator]
        seen_func = set()

        def func_append(func):
            if func not in seen_func:
                funcs.append(func)
                seen_func.add(func)
                funcs.sort(key=lambda x: x.generation)

        while funcs:
            f = funcs.pop()
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = gxs,
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    func_append(x.creator)

            if not retain_grad:
                for output in f.outputs:
                    output().grad = None

class Function:
    def __call__(self, *inputs:List[Variable]):
        xs = []
        self.generation = 0
        for input in inputs:
            xs.append(input.data)
            self.generation = max(self.generation, input.generation)

        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = ys,

        outputs = [Variable(as_array(y)) for y in ys]
        for output in outputs:
            output.set_creator(self)
        
        self.outputs:List[np.array] = [weakref.ref(output) for output in outputs]
        self.inputs:List[np.array] = inputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, x):
        return NotImplementedError()
    
    def backward(self, gy):
        return NotImplementedError()

class Exp(Function):
    def forward(self, x:np.array) -> np.array:
        return np.exp(x)
    
    def backward(self, gy:np.array) -> np.array:
        return gy * np.exp(self.inputs[0].data)
    
class Square(Function):
    def forward(self, x:np.array) -> np.array:
        return x ** 2
    
    def backward(self, gy:np.array) -> np.array:
        return gy * 2 * self.inputs[0].data
    
class Add(Function):
    def forward(self, x0:np.array, x1:np.array) -> Tuple[np.array]:
        return x0 + x1,

    def backward(self, gy:np.array) -> Tuple[np.array]:
        return gy, gy

def exp(x):
    return Exp()(x)

def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

In [17]:
x = Variable(np.array(2.0))
a = square(x)
y = add(square(a), square(a))
y.backward()
print(y.data)
print(x.grad)

32.0
64.0


In [18]:
x0 = Variable(np.array(1.0))
x1 = Variable(np.array(1.0))
t = add(x0, x1)
y = add(x0, t)
y.backward()

print(y.grad, t.grad)
print(x0.grad, x1.grad)

None None
2.0 1.0


# 1. 복잡한 계산(이론&이론)

![fig1](figures/week5.PNG)

위 그림처럼 계산그래프가 복잡해지면, 역전파를 할 때 순서가 중요해진다.  
그걸 위해서, **_BFS 혹은 Topology traversal을 이용_** 

In [19]:
from typing import Tuple

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Variable:
    def __init__(self, data:np.array):
        if not data:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)}은(는) 지원하지 않습니다.")
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        

    def set_creator(self, func:Function):
        self.creator = func
        self.generation = func.generation + 1

    def clear_grad(self):
        self.grad = None

    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)

        funcs = []
        seen_set = set()

        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)

        add_func(self.creator)

        while funcs:
            f = funcs.pop()
            gys = [output.grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = gxs,
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)

class Function:
    def __call__(self, *inputs:List[Variable]):
        xs:List[np.array] = [input.data for input in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = ys,
        outputs = [Variable(as_array(y)) for y in ys]

        self.generation = max([x.generation for x in inputs])
        for output in outputs:
            output.set_creator(self)
        
        self.outputs:List[np.array] = outputs
        self.inputs:List[np.array] = inputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, x):
        return NotImplementedError()
    
    def backward(self, gy):
        return NotImplementedError()

class Exp(Function):
    def forward(self, x:np.array) -> np.array:
        return np.exp(x)
    
    def backward(self, gy:np.array) -> np.array:
        return gy * np.exp(self.inputs[0].data)
    
class Square(Function):
    def forward(self, x:np.array) -> np.array:
        return x ** 2
    
    def backward(self, gy:np.array) -> np.array:
        return gy * 2 * self.inputs[0].data
    
class Add(Function):
    def forward(self, x0:np.array, x1:np.array) -> Tuple[np.array]:
        return x0 + x1,

    def backward(self, gy:np.array) -> Tuple[np.array]:
        return gy, gy

def exp(x):
    return Exp()(x)

def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

In [21]:
x = Variable(np.array(2.00))
a = square(x)
y = add(square(a), square(a))
y.backward()
print(y.data)
print(x.grad)

32.0
64.0


# 3. 메모리 관리와 순환 참조

In [23]:
from typing import Tuple
import weakref

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Variable:
    def __init__(self, data:np.array):
        if not data:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)}은(는) 지원하지 않습니다.")
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        

    def set_creator(self, func:Function):
        self.creator = func
        self.generation = func.generation + 1

    def clear_grad(self):
        self.grad = None

    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)

        funcs = []
        seen_set = set()

        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)

        add_func(self.creator)

        while funcs:
            f = funcs.pop()
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = gxs,
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)

class Function:
    def __call__(self, *inputs:List[Variable]):
        xs:List[np.array] = [input.data for input in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = ys,
        outputs = [Variable(as_array(y)) for y in ys]

        self.generation = max([x.generation for x in inputs])
        for output in outputs:
            output.set_creator(self)
        
        self.outputs:List[np.array] = [weakref.ref(output) for output in outputs]
        self.inputs:List[np.array] = inputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, x):
        return NotImplementedError()
    
    def backward(self, gy):
        return NotImplementedError()

class Exp(Function):
    def forward(self, x:np.array) -> np.array:
        return np.exp(x)
    
    def backward(self, gy:np.array) -> np.array:
        return gy * np.exp(self.inputs[0].data)
    
class Square(Function):
    def forward(self, x:np.array) -> np.array:
        return x ** 2
    
    def backward(self, gy:np.array) -> np.array:
        return gy * 2 * self.inputs[0].data
    
class Add(Function):
    def forward(self, x0:np.array, x1:np.array) -> Tuple[np.array]:
        return x0 + x1,

    def backward(self, gy:np.array) -> Tuple[np.array]:
        return gy, gy

def exp(x):
    return Exp()(x)

def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

In [24]:
x = Variable(np.array(2.00))
a = square(x)
y = add(square(a), square(a))
y.backward()
print(y.data)
print(x.grad)

32.0
64.0


# 4. 메모리 절약 모드

In [39]:
from typing import Tuple
import weakref
import contextlib

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

@contextlib.contextmanager
def using_config(name, value):
    old_value = getattr(Config, "enable_backprop")
    setattr(Config, "enable_backprop", value)
    try:
        yield
    finally:
        setattr(Config, "enable_backprop", old_value)

class Config:
    enable_backprop = True

class Variable:
    def __init__(self, data:np.array):
        if not data:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)}은(는) 지원하지 않습니다.")
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        

    def set_creator(self, func:Function):
        self.creator = func
        self.generation = func.generation + 1

    def clear_grad(self):
        self.grad = None

    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)

        funcs = []
        seen_set = set()

        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)

        add_func(self.creator)

        while funcs:
            f = funcs.pop()
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = gxs,
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)

        if not not retain_grad:
            for y in f.outputs:
                y().grad = None

class Function:
    def __call__(self, *inputs:List[Variable]):
        xs:List[np.array] = [input.data for input in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = ys,
        outputs = [Variable(as_array(y)) for y in ys]

        if Config.enable_backprop:
            self.generation = max([x.generation for x in inputs])
            for output in outputs:
                output.set_creator(self) 
            self.outputs:List[np.array] = [weakref.ref(output) for output in outputs]
            self.inputs:List[np.array] = inputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, x):
        return NotImplementedError()
    
    def backward(self, gy):
        return NotImplementedError()

class Exp(Function):
    def forward(self, x:np.array) -> np.array:
        return np.exp(x)
    
    def backward(self, gy:np.array) -> np.array:
        return gy * np.exp(self.inputs[0].data)
    
class Square(Function):
    def forward(self, x:np.array) -> np.array:
        return x ** 2
    
    def backward(self, gy:np.array) -> np.array:
        return gy * 2 * self.inputs[0].data
    
class Add(Function):
    def forward(self, x0:np.array, x1:np.array) -> Tuple[np.array]:
        return x0 + x1,

    def backward(self, gy:np.array) -> Tuple[np.array]:
        return gy, gy

def exp(x):
    return Exp()(x)

def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

def no_grad():
    return using_config('enable_backprop', False)

In [41]:
with no_grad():
    x = Variable(np.array(2.0))
    y = square(x)