In [13]:
import numpy as np


def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x


class Variable:
    def __init__(self, data):
        # 要求输入一个ndarray的数组
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError('{} is not supported'.format(type(data)))

        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0

    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1 

    # 当多次处理同一个变量，要重置导数
    def cleargrad(self):
        self.grad = None

    def backward(self):
        # 不用对最后的dy进行手动设grad为1
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        # 修改funcs的添加逻辑，处理复杂计算图的梯度优先问题
        # funcs = [self.creator]
        # 下面while只支持单个输入输出
        # while funcs:
        #     f = funcs.pop()
        #     x, y = f.input, f.output
        #     x.grad = f.backward(y.grad)
        #     if self.creator is not None:
        #         funcs.append(x.creator)
        funcs = []
        seen_set = set()
        # 调用add_func 函数来添加现在变量的creator seen_set是为了防止重复添加，funcs是为了排序来处理复杂计算图
        def add_func(f):
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x :x.generation)
        
        add_func(self.creator)
        while funcs:
            f = funcs.pop()
            gys = [output.grad for output in f.outputs]  # 取出输出的梯度
            gxs = f.backward(*gys)  # 反向传播得到输入的梯度
            # 鉴定是否为元组，或者说数据保存为元组是因为会出现return x1, x2这种类型
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            # 使用zip来设置每一对的导数
            for x, gx in zip(f.inputs, gxs):
                # 这里是用输出端传播的导数进行赋值的，如果是两个一样的变量，那么没有相加而是赋值了两次
                # x.grad = gx
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                if x.creator is not None:
                    add_func(x.creator)


class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y))for y in ys]
        
        self.generation = max([x.generation for x in inputs])

        for output in outputs:
            output.set_creator(self)
        self.inputs = inputs
        self.outputs = outputs
        #
        return outputs if len(outputs) > 1 else outputs[0]

    def forward(self, xs):
        raise NotImplementedError

    def backward(self, gys):
        raise NotImplementedError

In [14]:
class Add(Function):
    def forward(self, x0,x1):
        y = x1+x0
        return y
    def backward(self, gy):
        return gy, gy
    
def add(x0,x1):
    return Add()(x1,x0)

In [15]:
class Square(Function):
    def forward(self, x):
        y = x**2
        return y

    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2*x*gy
        return gx
def square(x):
    return Square()(x)

In [16]:
x0 = Variable(np.array(2.0))
x1 = Variable(np.array(3.0))
z = add(square(x0),square(x1))
z.backward()
print(z.data)
print(x0.grad)
print(x1.grad)

13.0
4.0
6.0


In [17]:
# 验证重复使用一个变量能不能处理
x = Variable(np.array(3.0))
y =  add(add(x,x),x)
y.backward()
print(x.grad)

3.0


In [18]:
x = Variable(np.array(2.0))
a = square(x)
y = add (square(a),square(a))
y.backward()
print(x.grad)
print(y.data)

64.0
32.0


###  Cpython 使用两种方式管理内存: 引用计数 垃圾回收
#### 引用计数是对象创建时引用计数为0 当它被引用时 数+1 为0 删除