# Variable & Function

In [1]:
import weakref

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

class Function:
    def __call__(self, *inputs):
        
        # save the input variable
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        if Config.enable_backprop:
            self.generation = max([x.generation for x in inputs])

            for output in outputs:
                output.set_creator(self)

            self.inputs = inputs
            self.outputs = [weakref.ref(output) for output in outputs]
        
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError()
        
    def backward(self, gys):
        raise NotImplementedError()

In [2]:
class Variable:
    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.name = name
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)
        
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None
            

    def clear_grad(self):
        self.grad = None
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        if self.data is None:
            return 'Variable(None)'
        p = str(self.data).replace('\n', '\n' + ' ' * 9)
        return 'Variable(' + p + ')'
        
    @property
    def shape(self):
        return self.data.shape
    
    @property
    def ndim(self):
        return self.data.ndim

    @property
    def size(self):
        return self.data.size

    @property
    def dtype(self):
        return self.data.dtype

## Functions

In [3]:
class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return y
    
    def backward(self, gy):
        return gy, gy
    
class Exp(Function):
    def forward(self, x):
        return np.exp(x)
    
    def backward(self, gy):
        x = self.input.data
        gx = np.exp(x) * gy
        return gx

    
def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

def exp(x):
    return Exp()(x)


def numerical_diff(f, x, eps=1e-4):
    x0 = Variable(x.data - eps)
    x1 = Variable(x.data + eps)
    y0 = f(x0)
    y1 = f(x1)
    return (y1.data - y0.data) / (2 * eps)

## Config

In [4]:
import contextlib


class Config:
    enable_backprop = True


@contextlib.contextmanager
def using_config(name, value):
    old_value = getattr(Config, name)
    setattr(Config, name, value)
    try:
        yield
    finally:
        setattr(Config, name, old_value)


def no_grad():
    return using_config('enable_backprop', False)

# Mul operator

In [5]:
class Mul(Function):
    def forward(self, x0, x1):
        y = x0 * x1
        return y
    
    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        return gy * x1, gy * x0
    
def mul(x0, x1):
    return Mul()(x0, x1)

In [6]:
a = Variable(np.array(3.0))
b = Variable(np.array(2.0))
c = Variable(np.array(1.0))

y = add(mul(a, b), c)
y.backward()

print(y)
print(a.grad)
print(b.grad)

Variable(7.0)
2.0
3.0


# Operator overload - `__add__`, `__mul__`

In [7]:
class Variable:
    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.name = name
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)
        
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None
    
    def __add__(self, other):
        return add(self, other)
    
    def __mul__(self, other):
        return mul(self, other)
    
    def clear_grad(self):
        self.grad = None
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        if self.data is None:
            return 'Variable(None)'
        p = str(self.data).replace('\n', '\n' + ' ' * 9)
        return 'Variable(' + p + ')'
        
    @property
    def shape(self):
        return self.data.shape
    
    @property
    def ndim(self):
        return self.data.ndim

    @property
    def size(self):
        return self.data.size

    @property
    def dtype(self):
        return self.data.dtype

In [8]:
a = Variable(np.array(3.0))
b = Variable(np.array(2.0))
c = Variable(np.array(1.0))

y = a * b + c
y.backward()

print(y)
print(a.grad)
print(b.grad)

Variable(7.0)
2.0
3.0


# Operator overload - with `ndarray`

transform numpy array to `Variable`

In [9]:
def as_variable(obj):
    if isinstance(obj, Variable):
        return obj
    return Variable(obj)

In [10]:
class Function:
    def __call__(self, *inputs):
        inputs = [as_variable(x) for x in inputs]
        
        # save the input variable
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        if Config.enable_backprop:
            self.generation = max([x.generation for x in inputs])

            for output in outputs:
                output.set_creator(self)

            self.inputs = inputs
            self.outputs = [weakref.ref(output) for output in outputs]
        
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError()
        
    def backward(self, gys):
        raise NotImplementedError()

In [29]:
class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return y
    
    def backward(self, gy):
        return gy, gy

class Mul(Function):
    def forward(self, x0, x1):
        y = x0 * x1
        return y
    
    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        return gy * x1, gy * x0
    
def add(x0, x1):
    return Add()(x0, x1)

In [15]:
x = Variable(np.array(2.0))
y = x + np.array(3.0)
print(y)

Variable(5.0)


# Operator overload - with `float` and `int`

transform `int` and `float` to `Variable`

In [30]:
def add(x0, x1):
    x1 = as_array(x1)
    return Add()(x0, x1)

def mul(x0, x1):
    x1 = as_array(x1)
    return Mul()(x0, x1)

In [17]:
x = Variable(np.array(2.0))
y = x + 3.0
print(y)

Variable(5.0)


# Problem 1 : when first argument is `float` or `int`


In [19]:
y = 2.0 * x

TypeError: unsupported operand type(s) for *: 'float' and 'Variable'

1. try to run `2.0`'s `__mul__` method
2. but `2.0` is `float` so no `__mul__` method is implemented
3. Next try to run special method of `x` that is right of `*`
4. `x` is at right so it will try `__rmul__` rather `__mul__`
5. But there is no `__rmul__` method implemented at **Variable** class

In [31]:
class Variable:
    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.name = name
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)
        
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None
    
    def __add__(self, other):
        return add(self, other)
    
    def __radd__(self, other):
        return add(self, other)
    
    def __mul__(self, other):
        return mul(self, other)

    def __rmul__(self, other):
        return mul(self, other)
    
    def clear_grad(self):
        self.grad = None
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        if self.data is None:
            return 'Variable(None)'
        p = str(self.data).replace('\n', '\n' + ' ' * 9)
        return 'Variable(' + p + ')'
        
    @property
    def shape(self):
        return self.data.shape
    
    @property
    def ndim(self):
        return self.data.ndim

    @property
    def size(self):
        return self.data.size

    @property
    def dtype(self):
        return self.data.dtype

In [32]:
x = Variable(np.array(2.0))
y = 3.0 * x + 1.0
print(y)

Variable(7.0)


# Problem 2 : when `ndarray` is at left

In [35]:
x = Variable(np.array(2.0))
y = np.array([2.0]) + x
print(y)

[Variable(4.0)]


In [36]:
x = Variable(np.array(2.0))
y = x + np.array([2.0])
print(y)

Variable([4.])


We hope `x` operate the `__radd__` rather using `numpy`'s `__add__` method!

**we need `operator priority`!**

In [37]:
print(np.array([2.0]).__array_priority__)

0.0


In [38]:
print(Variable(np.array([2.0])).__array_priority__)

AttributeError: 'Variable' object has no attribute '__array_priority__'

In [39]:
class Variable:
    __array_priority__ = 200
    
    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.name = name
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)
        
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None
    
    def __add__(self, other):
        return add(self, other)
    
    def __radd__(self, other):
        return add(self, other)
    
    def __mul__(self, other):
        return mul(self, other)

    def __rmul__(self, other):
        return mul(self, other)
    
    def clear_grad(self):
        self.grad = None
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        if self.data is None:
            return 'Variable(None)'
        p = str(self.data).replace('\n', '\n' + ' ' * 9)
        return 'Variable(' + p + ')'
        
    @property
    def shape(self):
        return self.data.shape
    
    @property
    def ndim(self):
        return self.data.ndim

    @property
    def size(self):
        return self.data.size

    @property
    def dtype(self):
        return self.data.dtype

In [40]:
x = Variable(np.array(2.0))
y = np.array([2.0]) + x
print(y)

Variable([4.])


In [41]:
x = Variable(np.array(2.0))
y = x + np.array([2.0])
print(y)

Variable([4.])


# More operator

- `__neg__`
- `__sub__`
- `__rsub__`
- `__truediv__`
- `__rtruediv__`
- `__pow__`

In [42]:
class Neg(Function):
    def forward(self, x):
        return -x
    
    def backward(self, gy):
        return -gy
    
def neg(x):
    return Neg()(x)

Variable.__neg__ = neg

In [43]:
x = Variable(np.array(2.0))
y = -x
print(y)

Variable(-2.0)


In [48]:
class Sub(Function):
    def forward(self, x0, x1):
        return x0 - x1
    
    def backward(self, gy):
        return gy, -gy
    
def sub(x0, x1):
    x1 = as_array(x1)
    return Sub()(x0, x1)

Variable.__sub__ = sub

In [49]:
def rsub(x0, x1):
    x1 = as_array(x1)
    return Sub()(x1, x0)

Variable.__rsub__ = rsub

In [50]:
x = Variable(np.array(2.0))
y1 = 2.0 - x
y2 = x - 1.0
print(y1)
print(y2)

Variable(0.0)
Variable(1.0)


In [51]:
class Div(Function):
    def forward(self, x0, x1):
        y = x0 / x1
        return y
    
    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        gx0 = gy / x1
        gx1 = gy * (-x0 / x1**2)
        return gx0, gx1
    
def div(x0, x1):
    x1 = as_array(x1)
    return Div()(x0, x1)

def rdiv(x0, x1):
    x1 = as_array(x1)
    return Div()(x1, x0)

Variable.__truediv__ = div
Variable.__rtruediv__ = rdiv

In [53]:
x = Variable(np.array(2.0))
y1 = 1.0 / x
y2 = x / 1.0
print(y1)
print(y2)

Variable(0.5)
Variable(2.0)


In [56]:
class Pow(Function):
    def __init__(self, c):
        self.c = c
    
    def forward(self, x):
        y = x ** self.c
        return y
    
    def backward(self, gy):
        x = self.inputs[0].data
        c = self.c
        gx = c * x ** (c - 1) * gy
        return gx
    
def pow(x, c):
    return Pow(c)(x)

Variable.__pow__ = pow

In [57]:
x = Variable(np.array(2.0))
y = x ** 3
print(y)

Variable(8.0)


# Apply operators to `Variable`

In [58]:
class Variable:
    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f'{type(data)} is not supported')

        self.data = data
        self.name = name
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def backward(self, retain_grad=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_set = set()
        
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation)
                
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                
                # TODO : fix grad to be able to accumulate
                if x.grad is None:
                    x.grad = gx
                else:
                    # if we use += it will be overwrite and it could cause a problem
                    # so we use = + to copy
                    x.grad = x.grad + gx
            
                if x.creator is not None:
                    add_func(x.creator)
        
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None
    
    def __add__(self, other):
        return add(self, other)
    
    def __radd__(self, other):
        return add(self, other)
    
    def __sub__(self, other):
        return sub(self, other)
    
    def __rsub__(self, other):
        return rsub(self, other)
    
    def __mul__(self, other):
        return mul(self, other)

    def __rmul__(self, other):
        return mul(self, other)
    
    def __truediv__(self, other):
        return div(self, other)

    def __rtruediv__(self, other):
        return rdiv(self, other)
    
    def __neg__(self):
        return neg(self)
    
    def __pow__(self, c):
        return pow(self, c)
    
    def clear_grad(self):
        self.grad = None
        
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        if self.data is None:
            return 'Variable(None)'
        p = str(self.data).replace('\n', '\n' + ' ' * 9)
        return 'Variable(' + p + ')'
        
    @property
    def shape(self):
        return self.data.shape
    
    @property
    def ndim(self):
        return self.data.ndim

    @property
    def size(self):
        return self.data.size

    @property
    def dtype(self):
        return self.data.dtype

In [59]:
x = Variable(np.array(2.0))
y = -x
print(y)

x = Variable(np.array(2.0))
y1 = 2.0 - x
y2 = x - 1.0
print(y1)
print(y2)

x = Variable(np.array(2.0))
y1 = 1.0 / x
y2 = x / 1.0
print(y1)
print(y2)

x = Variable(np.array(2.0))
y = x ** 3
print(y)

Variable(-2.0)
Variable(0.0)
Variable(1.0)
Variable(0.5)
Variable(2.0)
Variable(8.0)
