# 神经网络编程框架
- 参考配置：
    - 系统：Ubuntu 20.04
    - CUDA：12.4
    - gcc：9.5
    - cmake：4.0.1

- 作业要求
    1. 实现ppt中列举的TensorOp子类并测试；
    2. 实现ppt中列举的Module、Optimizer、Scheduler子类；
    3. 使用简易框架进行ResidualMLP实验，实现模型存储和读取，测试阶段不计算梯度。

完成上述要求后可以通过以下代码进行测试

In [None]:
#!pip3 install --upgrade --no-deps git+https://github.com/dlsys10714/mugrade.git
!pip3 install pybind11

In [None]:
!make

In [None]:
%set_env PYTHONPATH ./python
%set_env NEEDLE_BACKEND nd

In [None]:
import sys
sys.path.append('./python')
import needle as ndl
import numpy as np

In [None]:
如下是一些简单的测试样例，不报错即为通过作业要求中的1。其余部分请自行设计测试用例，确保代码的正确性。

In [None]:
#EWiseAdd
def test_add():
    np.testing.assert_allclose(ndl.add(np.array([1,2]), np.array([3,4])).numpy(),
                               np.array([4,6]))
test_add()
#scalar_add
def test_scalar_add():
    np.testing.assert_allclose(ndl.add_scalar(np.array([1,2]), 3).numpy(),
                               np.array([4,5]))
test_scalar_add()   

In [None]:
def test_mul():
    np.testing.assert_allclose(ndl.multiply(np.array([1,2]), np.array([3,4])).numpy(),
                               np.array([3,8]))
def test_scalar_mul():
    np.testing.assert_allclose(ndl.mul_scalar(np.array([1,2]), 3).numpy(),
                               np.array([3,6]))
test_mul()
test_scalar_mul()

In [None]:
def test_power_scalar_forward():
    print(ndl.power_scalar(ndl.Tensor(2),scalar=2).numpy())
    print(ndl.power_scalar(ndl.Tensor([[0.5, 2.0, 3.0]]), scalar=2).numpy())
    np.testing.assert_allclose(
        ndl.power_scalar(ndl.Tensor([[0.5, 2.0, 3.0]]), scalar=2).numpy(),
        np.array([[0.25, 4.0, 9.0]]),
    )
test_power_scalar_forward()

In [None]:
def test_divide_forward():
    np.testing.assert_allclose(
        ndl.divide(
            ndl.Tensor([[3.3, 4.35, 1.2], [2.45, 0.95, 2.55]]),
            ndl.Tensor([[4.6, 4.35, 4.8], [0.65, 0.7, 4.4]]),
        ).numpy(),
        np.array(
            [
                [0.717391304348, 1.0, 0.25],
                [3.769230769231, 1.357142857143, 0.579545454545],
            ]
        ),
    )
test_divide_forward()
#scalerdiv
def test_divide_scalar_forward():
    np.testing.assert_allclose(
        ndl.divide_scalar(ndl.Tensor([[1.7, 1.45]]), scalar=12).numpy(),
        np.array([[0.141666666667, 0.120833333333]]),
    )
test_divide_scalar_forward()

In [None]:
#transpose
def test_transpose_forward():
    np.testing.assert_allclose(
        ndl.transpose(ndl.Tensor([[[1.95]], [[2.7]], [[3.75]]]), axes=(1, 2)).numpy(),
        np.array([[[1.95]], [[2.7]], [[3.75]]]),
    )
    np.testing.assert_allclose(
        ndl.transpose(
            ndl.Tensor([[[[0.95]]], [[[2.55]]], [[[0.45]]]]), axes=(2, 3)
        ).numpy(),
        np.array([[[[0.95]]], [[[2.55]]], [[[0.45]]]]),
    )
    np.testing.assert_allclose(
        ndl.transpose(
            ndl.Tensor(
                [
                    [[[0.4, 0.05], [2.95, 1.3]], [[4.8, 1.2], [1.65, 3.1]]],
                    [[[1.45, 3.05], [2.25, 0.1]], [[0.45, 4.75], [1.5, 1.8]]],
                    [[[1.5, 4.65], [1.35, 2.7]], [[2.0, 1.65], [2.05, 1.2]]],
                ]
            )
        ).numpy(),
        np.array(
            [
                [[[0.4, 2.95], [0.05, 1.3]], [[4.8, 1.65], [1.2, 3.1]]],
                [[[1.45, 2.25], [3.05, 0.1]], [[0.45, 1.5], [4.75, 1.8]]],
                [[[1.5, 1.35], [4.65, 2.7]], [[2.0, 2.05], [1.65, 1.2]]],
            ]
        ),
    )
    np.testing.assert_allclose(
        ndl.transpose(ndl.Tensor([[[2.45]], [[3.5]], [[0.9]]]), axes=(0, 1)).numpy(),
        np.array([[[2.45], [3.5], [0.9]]]),
    )
    np.testing.assert_allclose(
        ndl.transpose(ndl.Tensor([[4.4, 2.05], [1.85, 2.25], [0.15, 1.4]])).numpy(),
        np.array([[4.4, 1.85, 0.15], [2.05, 2.25, 1.4]]),
    )
    np.testing.assert_allclose(
        ndl.transpose(
            ndl.Tensor([[0.05, 3.7, 1.35], [4.45, 3.25, 1.95], [2.45, 4.4, 4.5]])
        ).numpy(),
        np.array([[0.05, 4.45, 2.45], [3.7, 3.25, 4.4], [1.35, 1.95, 4.5]]),
    )
    np.testing.assert_allclose(
        ndl.transpose(
            ndl.Tensor(
                [
                    [[0.55, 1.8, 0.2], [0.8, 2.75, 3.7], [0.95, 1.4, 0.8]],
                    [[0.75, 1.6, 1.35], [3.75, 4.0, 4.55], [1.85, 2.5, 4.8]],
                    [[0.2, 3.35, 3.4], [0.3, 4.85, 4.85], [4.35, 4.25, 3.05]],
                ]
            ),
            axes=(0, 1),
        ).numpy(),
        np.array(
            [
                [[0.55, 1.8, 0.2], [0.75, 1.6, 1.35], [0.2, 3.35, 3.4]],
                [[0.8, 2.75, 3.7], [3.75, 4.0, 4.55], [0.3, 4.85, 4.85]],
                [[0.95, 1.4, 0.8], [1.85, 2.5, 4.8], [4.35, 4.25, 3.05]],
            ]
        ),
    )
test_transpose_forward()

In [None]:
def test_reshape_forward():
    np.testing.assert_allclose(
        ndl.reshape(
            ndl.Tensor(
                [
                    [2.9, 2.0, 2.4],
                    [3.95, 3.95, 4.65],
                    [2.1, 2.5, 2.7],
                    [1.9, 4.85, 3.25],
                    [3.35, 3.45, 3.45],
                ]
            ),
            shape=(15,),
        ).numpy(),
        np.array(
            [
                2.9,
                2.0,
                2.4,
                3.95,
                3.95,
                4.65,
                2.1,
                2.5,
                2.7,
                1.9,
                4.85,
                3.25,
                3.35,
                3.45,
                3.45,
            ]
        ),
    )
    np.testing.assert_allclose(
        ndl.reshape(
            ndl.Tensor(
                [
                    [[4.1, 4.05, 1.35, 1.65], [3.65, 0.9, 0.65, 4.15]],
                    [[4.7, 1.4, 2.55, 4.8], [2.8, 1.75, 2.8, 0.6]],
                    [[3.75, 0.6, 0.0, 3.5], [0.15, 1.9, 4.75, 2.8]],
                ]
            ),
            shape=(2, 3, 4),
        ).numpy(),
        np.array(
            [
                [
                    [4.1, 4.05, 1.35, 1.65],
                    [3.65, 0.9, 0.65, 4.15],
                    [4.7, 1.4, 2.55, 4.8],
                ],
                [[2.8, 1.75, 2.8, 0.6], [3.75, 0.6, 0.0, 3.5], [0.15, 1.9, 4.75, 2.8]],
            ]
        ),
    )
test_reshape_forward()

In [None]:
def test_broadcast_to_forward():
    np.testing.assert_allclose(
        ndl.broadcast_to(ndl.Tensor([[1.85, 0.85, 0.6]]), shape=(3, 3, 3)).numpy(),
        np.array(
            [
                [[1.85, 0.85, 0.6], [1.85, 0.85, 0.6], [1.85, 0.85, 0.6]],
                [[1.85, 0.85, 0.6], [1.85, 0.85, 0.6], [1.85, 0.85, 0.6]],
                [[1.85, 0.85, 0.6], [1.85, 0.85, 0.6], [1.85, 0.85, 0.6]],
            ]
        ),
    )
test_broadcast_to_forward()

In [None]:
def test_summation_forward():
    np.testing.assert_allclose(
        ndl.summation(
            ndl.Tensor(
                [
                    [2.2, 4.35, 1.4, 0.3, 2.65],
                    [1.0, 0.85, 2.75, 3.8, 1.55],
                    [3.2, 2.3, 3.45, 0.7, 0.0],
                ]
            )
        ).numpy(),
        np.array(30.5),
    )
    np.testing.assert_allclose(
        ndl.summation(
            ndl.Tensor(
                [
                    [1.05, 2.55, 1.0],
                    [2.95, 3.7, 2.6],
                    [0.1, 4.1, 3.3],
                    [1.1, 3.4, 3.4],
                    [1.8, 4.55, 2.3],
                ]
            ),
            axes=1,
        ).numpy(),
        np.array([4.6, 9.25, 7.5, 7.9, 8.65]),
    )
    np.testing.assert_allclose(
        ndl.summation(
            ndl.Tensor([[1.5, 3.85, 3.45], [1.35, 1.3, 0.65], [2.6, 4.55, 0.25]]),
            axes=0,
        ).numpy(),
        np.array([5.45, 9.7, 4.35]),
    )
test_summation_forward()

In [None]:
def test_matmul_forward():
    np.testing.assert_allclose(
        ndl.matmul(
            ndl.Tensor([[4.95, 1.75, 0.25], [4.15, 4.25, 0.3], [0.3, 0.4, 2.1]]),
            ndl.Tensor([[1.35, 2.2, 1.55], [3.85, 4.8, 2.6], [1.15, 0.85, 4.15]]),
        ).numpy(),
        np.array(
            [[13.7075, 19.5025, 13.26], [22.31, 29.785, 18.7275], [4.36, 4.365, 10.22]]
        ),
    )
    np.testing.assert_allclose(
        ndl.matmul(
            ndl.Tensor([[3.8, 0.05], [2.3, 3.35], [1.6, 2.6]]),
            ndl.Tensor([[1.1, 3.5, 3.7], [0.05, 1.25, 1.0]]),
        ).numpy(),
        np.array(
            [[4.1825, 13.3625, 14.11], [2.6975, 12.2375, 11.86], [1.89, 8.85, 8.52]]
        ),
    )
test_matmul_forward()

In [None]:
def test_negate_forward():
    np.testing.assert_allclose(
        ndl.negate(ndl.Tensor([[1.45, 0.55]])).numpy(), np.array([[-1.45, -0.55]])
    )
test_negate_forward()

In [None]:
#求导
def test_compute_gradient():
    gradient_check(
        lambda A, B, C: ndl.summation((A @ B + C) * (A @ B), axes=None),
        ndl.Tensor(np.random.randn(10, 9)),
        ndl.Tensor(np.random.randn(9, 8)),
        ndl.Tensor(np.random.randn(10, 8)),
        backward=True,
    )
    gradient_check(
        lambda A, B: ndl.summation(ndl.broadcast_to(A, shape=(10, 9)) * B, axes=None),
        ndl.Tensor(np.random.randn(10, 1)),
        ndl.Tensor(np.random.randn(10, 9)),
        backward=True,
    )
    gradient_check(
        lambda A, B, C: ndl.summation(
            ndl.reshape(A, shape=(10, 10)) @ B / 5 + C, axes=None
        ),
        ndl.Tensor(np.random.randn(100)),
        ndl.Tensor(np.random.randn(10, 5)),
        ndl.Tensor(np.random.randn(10, 5)),
        backward=True,
    )

    # check gradient of gradient
    x2 = ndl.Tensor([6])
    x3 = ndl.Tensor([0])
    y = x2 * x2 + x2 * x3
    y.backward()
    grad_x2 = x2.grad
    grad_x3 = x3.grad
    # gradient of gradient
    grad_x2.backward()
    grad_x2_x2 = x2.grad
    grad_x2_x3 = x3.grad
    x2_val = x2.numpy()
    x3_val = x3.numpy()
    assert y.numpy() == x2_val * x2_val + x2_val * x3_val
    assert grad_x2.numpy() == 2 * x2_val + x3_val
    assert grad_x3.numpy() == x2_val
    assert grad_x2_x2.numpy() == 2
    assert grad_x2_x3.numpy() == 1

def gradient_check(f, *args, tol=1e-6, backward=False, **kwargs):
       eps = 1e-4
       numerical_grads = [np.zeros(a.shape) for a in args]
       for i in range(len(args)):
           for j in range(args[i].realize_cached_data().size):
               args[i].realize_cached_data().flat[j] += eps
               f1 = float(f(*args, **kwargs).numpy().sum())
               args[i].realize_cached_data().flat[j] -= 2 * eps
               f2 = float(f(*args, **kwargs).numpy().sum())
               args[i].realize_cached_data().flat[j] += eps
               numerical_grads[i].flat[j] = (f1 - f2) / (2 * eps)
       if not backward:
           out = f(*args, **kwargs)
           computed_grads = [
               x.numpy()
               for x in out.op.gradient_as_tuple(ndl.Tensor(np.ones(out.shape)), out)
           ]
       else:
           out = f(*args, **kwargs).sum()
           out.backward()
           computed_grads = [a.grad.numpy() for a in args]
       error = sum(
           np.linalg.norm(computed_grads[i] - numerical_grads[i]) for i in range(len(args))
       )
       assert error < tol
       return computed_grads