In [6]:
import numpy as np
from typing import Optional, Set, Type, Union

class Mul(Operation) :
    def __call__(self, x, y):
        self.var = (x, y)
        out = x.data * y.data
        return out
    def backward(self, grad):
        x, y = self.var
        x.backward(grad * y.data)
        y.backward(grad * x.data)

class Add(Operation) :
    def __call__(self, x, y):
        self.var = (x, y)
        out = x.data + y.data
        return out
    def backward(self, grad):
        x, y = self.var
        x.backward(grad)
        y.backward(grad)

class Tensor :
    def __init__(self, data=None, *, requires_grad=False, creator=None):
        assert isinstance(requires_grad, bool)
        assert isinstance(creator, (Operation, None.__class__))
        self.data = None
        if isinstance(data, (int, float, bool)):
            data = [data]
        if isinstance(data, (list, tuple)):
            data = np.array(data)
        if isinstance(data, np.ndarray):
            self.data = data.copy()
        elif isinstance(data, Tensor):
            raise ValueError("输入的是 Tensor")
        else:
            raise ValueError("输入类型未知", type(data), data)
        if creator is None:
            creator = Assign()
            creator(self)
        self.creator = creator
        self.requires_grad = requires_grad
        self.grad = None
        
    
    def op(self, Op:Type[Add], input_vars):
        if Op == None:
            return 
        tensor_vars = tuple(
            Tensor(var) if not isinstance(var, Tensor) else var for var in input_vars
        )
        f = Op()
        op_out = f(*tensor_vars)
        return Tensor(op_out, creator=f)
        
    
    def __mul__(self, other):  # 乘法
        return self.op(Mul, (self, other))
    def __rmul__(self, other): # 乘法
        return self.op(Mul, (other, self))
    
    def __add__(self, other):  # 加法
        return self.op(Add, (self, other))
    def __radd__(self, other): # 加法
        return self.op(Add, (other, self))
    
    def __str__(self):
        return self.__repr__()

    def __repr__(self):
        return "Tensor with shape: {}\n{}".format(self.shape, self.data)
    
    @property
    def shape(self):
        return self.data.shape
    
    def broadcastable(self, grad, ashape):
        """
        保证传递的梯度shape一致，用于兼容广播机制的反向传播
        :param grad:
        :param ashape:
        :return:
        """
        if grad.shape == ashape:
            return grad
        grad_bak = grad.sum(axis=tuple(range(grad.ndim - len(ashape))))
        keepdims = tuple(n for (n, i) in enumerate(grad_bak.shape) if i != ashape[n])
        if keepdims:
            grad_bak = grad_bak.sum(axis=keepdims, keepdims=True)
        return grad_bak
    
    def backward(self, grad=None):
        if not self.requires_grad:
            return
        if grad is None:
            grad = np.ones_like(self.data, dtype=np.float64)
        try:    
            self.grad += grad
        except ValueError:  # self.grad.shape 长度不等于 grad.shape，用于适应广播机制
            grad = self.broadcastable(grad, self.grad.shape)
            self.grad += grad
        self.grad += grad
        if self.creator:
            self.creator.backward(grad)

NameError: name 'Operation' is not defined

In [5]:
def fun(a, b ,*,c):
    print(a, b, c)

fun(1, 2, c = 3)

1 2 3


In [2]:
a = Tensor([[2, 4]])
d = Tensor(1)
e = a * d
print(e)
# e.backward(1)
# print(a.grad, d.grad)
# print(Tensor(1))

Tensor with shape: (1, 2)
[[2 4]]


In [14]:
from nf import Tensor
import numpy as np
import torch
from torch.autograd import *
from time import time


def func(x,y,z):
    f0 = (x[1,0].T * y[0,1].T).T * z * x
    f1 = f0 * (x + y + z) * y * y * y * (y+z) #! 有错[9,23,29]
    f2 = y[0,3] + x[0,2]
    f3 = y * y - z
    f4 = z - x
    f5 = -x.flatten() + y.flatten() - (x*z).flatten() * 2.0
    f6 = f1[1,3] + f1[0,3] * f2 - z[0,1] ** 2.2
    f7 = f3 + f4 + f6
    f8 = f7 - f3 + f4 * 3.6
    f9 = f8.flatten() / f5 + f7.flatten()
    f10 = -f9 * f5
    f11 = ((x*z) @ x.transpose(3, 4) @ y.permute(0,4,2,3,1)).transpose(0,4)
    f12 = f11.transpose(3,4).flatten() * 5.0 ** x.transpose(1,4).flatten() / y.flatten() * (x/z).flatten() + 2.0
    f13 = f10.reshape(f11.shape) * f11 / f12.reshape(f11.shape)
    f14 = (x.transpose(3,4) @ y).permute(0,2,4,3,1) @ f13.permute(4,2,0,1,3)
    f15 = f14.sum() * f14.mean((0,2))
    return f15

def th_grad_Test(x,y,z):
    x = Variable(torch.from_numpy(x), requires_grad=True)
    y = Variable(torch.from_numpy(y), requires_grad=True)
    z = Variable(torch.from_numpy(z), requires_grad=True)
    t1 = time()
    f9 = func(x, y, z)
    t2 = time() - t1
    t1 = time()
    f9.backward(torch.ones_like(f9), retain_graph=True)
    print("th", t2, time() - t1)

    return [x.grad.numpy(), y.grad.numpy(), z.grad.numpy()]


def nf_grad_Test(x,y,z):
    x = Tensor(x, requires_grad=True)
    y = Tensor(y, requires_grad=True)
    z = Tensor(z, requires_grad=True)
    t1 = time()
    f9 = func(x,y,z)
    t2 = time() - t1
    t1 = time()
    f9.backward()
    print("nf", t2, time() - t1)
    return [x.grad, y.grad, z.grad]

def test1():
    np.random.seed(28)
    x = np.random.random([2,4,6,3,4])
    y = np.random.random([2,4,6,3,4])
    z = np.random.random([2,4,1,1,4])

    grad_th = th_grad_Test(x,y,z)
    grad_nf = nf_grad_Test(x,y,z)

    for (thi, nfi) in zip(grad_th, grad_nf):
        a = np.allclose(nfi, thi)
        print(a)

In [27]:
test1()

th 0.0018968582153320312 0.0019490718841552734
nf 0.0017888545989990234 0.011336088180541992
True
True
True
