In [2]:
import numpy as np
import weakref    # 약한 참조. 참조 카운트를 증가시키지 않는다

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Variable:
    __array_priority__ = 200
    
    def __init__(self, data, name = None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError('{}은(는) 지원하지 않습니다.'.format(type(data)))
        
        self.data = data
        self.name = name
        self.grad = None    # gradient = 기울기
        self.creator = None
        self.generation = 0    # 세대 수를 기록하는 변수 (0, 1, 2, ...)
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        if self.data is None:
            return 'variable(None)'
        p = str(self.data).replace('\n', '\n'+' '*9)
        return 'variable(' + p + ')'
    
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1   # 세대를 기록한다 (부모 세대 + 1)
    
    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
            
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key = lambda x: x.generation)    # 리스트의 원소들을 x라 할때, x.gen을 key로 정렬
        
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            gys = [output().grad for output in f.outputs]    # 약한 참조 데이터(output) 에 접근하려면 obj()로.
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                        
                
                if x.creator is not None:
                    add_func(x.creator)
                    
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None    # y는 약한 참조(weakref)
                    
    def cleargrad(self):
        self.grad = None
        
    @property    # x.shape() 대신 x.shape로 호출 가능
    def shape(self):
        return self.data.shape
    
    @property
    def ndim(self):     # 차원 수
        return self.data.ndim
    
    @property
    def size(self):    # 원소 수
        return self.data.size
    
    @property
    def dtype(self):    # 데이터 타입
        return self.data.dtype
        
class Function:
    def __call__(self, *inputs):
        inputs = [as_variable(x) for x in inputs]    # 입력이 ndarray 인스턴스면 Variable(np.array)로 변환
        
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        if Config.enable_backprop:
            self.generation = max([x.generation for x in inputs])    # 세대 설정
            for output in outputs:
                output.set_creator(self)    # 연결 설정
            self.inputs = inputs
            self.outputs = [weakref.ref(output) for output in outputs]
        
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):    # 순전파
        raise NotImplementedError()
    
    def backward(self, gys):
        raise NotImplementedError()
        
class Config:
    enable_backprop = True   # True면 역전파 시행
        
class Square(Function):
    def forward(self, x):
        y = x ** 2
        return y
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
def square(x):
    f = Square()
    return f(x)
    
class Exp(Function):
    def forward(self, x):
        y = np.exp(x)
        return y
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = np.exp(x) * gy
        return gx

def exp(x):
    return Exp()(x)
        
class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return (y,)
    
    def backward(self, gy):
        return gy, gy
    
def add(x0, x1):
    x1 = as_array(x1)
    return Add()(x0,x1)

class Mul(Function):
    def forward(self, x0, x1):
        y = x0 * x1
        return y
    
    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        return gy*x1, gy*x0
    
def mul(x0, x1):
    x1 = as_array(x1)
    return Mul()(x0, x1)

class Neg(Function):
    def forward(self, x):
        return -x
    
    def backward(self, gy):
        return -gy
    
def neg(x):
    return Neg()(x)

class Sub(Function):
    def forward(self, x0, x1):
        y = x0 -x1
        return y
    
    def backward(self, gy):
        return gy, -gy
    
def sub(x0, x1):
    x1 = as_array(x1)
    return Sub()(x0, x1)

def rsub(x0, x1):
    x1 = as_array(x1)
    return Sub()(x1, x0)

class Div(Function):
    def forward(self, x0, x1):
        y = x0 / x1
        return y
    
    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        gx0 = gy/x1
        gx1 = gy*(-x0 / x1 ** 2)
        return gx0, gx1
    
def div(x0, x1):
    x1 = as_array(x1)
    return Div()(x0, x1)

def rdiv(x0, x1):
    x1 = as_array(x1)
    return Div()(x1, x0)

class Pow(Function):
    def __init__(self, c):
        self.c = c
        
    def forward(self, x):
        y = x**self.c
        return y
    
    def backward(self, gy):
        x = self.inputs[0].data
        c = self.c
        gx = c * x ** (c-1) * gy
        return gx
    
def pow(x, c):
    return Pow(c)(x)

def as_variable(obj):
    if isinstance(obj, Variable):
        return obj
    return Variable(obj)

import contextlib

@contextlib.contextmanager
def using_config(name, value):
    old_value = getattr(Config, name)
    setattr(Config, name, value)
    try:
        yield
    finally:
        setattr(Config, name, old_value)        

def no_grad():
    return using_config('enable_backprop', False)   

'''
Variable.__method__ = method는

class Variable:
    ...
    
    def __method__(self, other):
        return method(self, other)
        
와 같다.
'''

Variable.__mul__ = mul    # Variable * float
Variable.__add__ = add
Variable.__rmul__ = mul    # float * Variable
Variable.__radd__ = add
Variable.__neg__ = neg
Variable.__sub__ = sub
Variable.__rsub__ = rsub
Variable.__truediv__ = div
Variable.__rtruediv__ = rdiv
Variable.__pow__ = pow

step 11

In [22]:
xs = [Variable(np.array(2)), Variable(np.array(3))]
f = Add()
ys = f(xs)
y = ys[0]
print(y.data)


5


step 12

In [49]:
x0 = Variable(np.array(2))
x1 = Variable(np.array(3))
f = Add()
y = f(x0, x1)
print(y.data)
print(f.inputs[0].data, f.inputs[1].data)

5
2 3


In [50]:
x0 = Variable(np.array(2))
x1 = Variable(np.array(3))
y = add(x0, x1)
print(y.data)

5


step 13

In [11]:
x = Variable(np.array(2.0))
y = Variable(np.array(3.0))

z = add(square(x), square(y))    # z = x^2 + y^2
z.backward()
print("z {}, x.grad {}, y.grad {}".format(z.data, x.grad, y.grad))

x.cleargrad()
y.cleargrad()
t = add(exp(x), exp(y))    # t = e^x + e^y
t.backward()
print("t {}, x.grad {}, y.grad {}".format(t.data, x.grad, y.grad))

z 13.0, x.grad 4.0, y.grad 6.0
t 27.47459302211832, x.grad 7.38905609893065, y.grad 20.085536923187668


step 14

In [13]:
x = Variable(np.array(3.0))
y = add(x,x)
y.backward()
print(x.grad)

x.cleargrad()
y = add(add(x,x),x)
y.backward()
print(x.grad)

2.0
3.0


step 16

In [27]:
x = Variable(np.array(2.0))
a = square(x)
y = add(square(a), square(a))    # y = (x^2)^2 + (x^2)^2 = 2x^4
y.backward()

print(y.data)
print(x.grad)

32.0
64.0


step 17

In [24]:
for i in range(10):
    x = Variable(np.random.randn(10000))
    y = square(square(square(x)))

step 18

In [14]:
x0 = Variable(np.array(2.0))
x1 = Variable(np.array(2.0))
t = add(x0, x1)
y = add(x0, t)
y.backward()

print(y.grad, t.grad)
print(x0.grad, x1.grad)

None None
2.0 1.0


In [3]:
with no_grad():    # 기울기 계산이 필요 없을 때 (= 순전파 계산만 필요할 때)
    x = Variable(np.array(2.0))
    y = square(x)
    print(y.data)

4.0


step 19

In [13]:
x = Variable(np.array([[[1,2,3], [4,5,6], [7,8,9]], [[1,2,3], [4,5,6], [7,8,9]]]))
print(x.shape)
print(x)

(2, 3, 3)
variable( [[[1 2 3]
           [4 5 6]
           [7 8 9]]
         
          [[1 2 3]
           [4 5 6]
           [7 8 9]]] )


step 20

In [20]:
a = Variable(np.array(3.0))
b = Variable(np.array(2.0))
c = Variable(np.array(1.0))

# y = add(mul(a,b), c)
y = a * b + c
y.backward()
print(y)
print(a.grad)
print(b.grad)

variable( 7.0 )
2.0
3.0


step 21

In [21]:
x = Variable(np.array(2.0))
y = x + np.array(33.0)
print(y)

variable(35.0)


In [15]:
x = Variable(np.array(2.0))
y = x + 3.0
z = x * 3.0
print(y, z, sep='\n')

variable(5.0)
variable(6.0)


In [22]:
x = Variable(np.array(2.0))
y = 3.0 * x + 1.0
print(y)

variable(7.0)


In [2]:
x = Variable(np.array([1.0]))
y = np.array([2.3]) + x
print(y)

variable([3.3])


step 22

In [3]:
x = Variable(np.array(2.0))
y1 = 2.0 - x
y2 = x - 1.0
y3 = x / 4.0
y4 = 8.0 / x
y5 = x ** 3
print(y1)
print(y2)
print(y3)
print(y4)
print(y5)

variable(0.0)
variable(1.0)
variable(0.5)
variable(4.0)
variable(8.0)


In [6]:
x = Variable(np.array(1.0))
y = (x + 3) ** 2
y.backward()

print(y)
print(x.grad)

variable(16.0)
8.0
