In [1]:
# Code to set up the assignment
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive
# !git clone https://github.com/AllenWrong/10714-final-proj.git

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive


In [2]:
%cd 10714-final-proj/
!pwd

/content/drive/MyDrive/10714-final-proj
/content/drive/MyDrive/10714-final-proj


In [3]:
!pip install pybind11
# !make

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Import

In [4]:
import sys
sys.path.append("python/")

In [5]:
import needle as ndl
from needle import ops
import numpy as np

In [6]:
import imp
imp.reload(ndl)
imp.reload(ndl.backend_ndarray.ndarray)

<module 'needle.backend_ndarray.ndarray' from '/content/drive/MyDrive/10714-final-proj/python/needle/backend_ndarray/ndarray.py'>

In [13]:
import imp
imp.reload(ops)

<module 'needle.ops' from '/content/drive/MyDrive/10714-final-proj/python/needle/ops.py'>

In [None]:
a = ndl.Tensor([
    [1, 2, 3, 4],
    [2, 3, 4, 5], 
    [6, 7, 8, 9]
])

In [None]:
b = ops.get_item(a, (slice(1, 3, 1), slice(1, 3, 1)))
b

needle.Tensor([[3. 4.]
 [7. 8.]])

In [None]:
c = a[1:3, 1:3]

In [None]:
c.sum().backward()

In [None]:
a.grad

needle.Tensor([[0. 0. 0. 0.]
 [0. 1. 1. 0.]
 [0. 1. 1. 0.]])

In [None]:
grad_c = ndl.Tensor([[2, 3]])
b.sum().backward()

In [None]:
a.grad

needle.Tensor([[0. 0. 0. 0.]
 [0. 1. 1. 0.]
 [0. 1. 1. 0.]])

## Implement the op  `__getitem__` for Tensor

In [7]:
a = np.array([
    [1, 2, 3, 4],
    [2, 3, 4, 5], 
    [6, 7, 8, 9]
]).astype("float32")

### Torch example

In [46]:
import torch

In [47]:
th_tensor = torch.tensor(a, requires_grad=True)
b = th_tensor[1:3, 1:3]

b.sum().backward()
print(th_tensor.grad)

tensor([[0., 0., 0., 0.],
        [0., 1., 1., 0.],
        [0., 1., 1., 0.]])


### Needle example

In [48]:
import needle as ndl

In [49]:
nd_tensor = ndl.Tensor(a)
b_nd = nd_tensor[1:3, 1:3]

b_nd.sum().backward()
print(nd_tensor.grad)

[[0. 0. 0. 0.]
 [0. 1. 1. 0.]
 [0. 1. 1. 0.]]


## Implement `ndl.cat([...], axis=)`

In [8]:
a = np.array([
    [1, 2, 3, 4],
    [2, 3, 4, 5], 
    [6, 7, 8, 9]
]).astype("float32")

### Torch example

In [51]:
th1 = torch.tensor(a[:, 0:2], requires_grad=True)
th2 = torch.tensor(a[:, 2:], requires_grad=True)

print(th1)
print(th2)

tensor([[1., 2.],
        [2., 3.],
        [6., 7.]], requires_grad=True)
tensor([[3., 4.],
        [4., 5.],
        [8., 9.]], requires_grad=True)


In [52]:
# forward
th12 = torch.cat([th1, th2], axis=1)
th12

tensor([[1., 2., 3., 4.],
        [2., 3., 4., 5.],
        [6., 7., 8., 9.]], grad_fn=<CatBackward0>)

In [53]:
# backward
th12.sum().backward()
print(th1.grad)
print(th2.grad)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


### Needle example

In [9]:
nd1 = ndl.Tensor(a[:, 0:2])
nd2 = ndl.Tensor(a[:, 2:])

print(nd1)
print(nd2)

[[1. 2.]
 [2. 3.]
 [6. 7.]]
[[3. 4.]
 [4. 5.]
 [8. 9.]]


In [12]:
# forward
nd12 = ops.cat([nd1, nd2], axis=1)
nd12

needle.Tensor([[1. 2. 3. 4.]
 [2. 3. 4. 5.]
 [6. 7. 8. 9.]])

In [13]:
# backward
nd12.sum().backward()
print(nd1.grad)
print(nd2.grad)

[[1. 1.]
 [1. 1.]
 [1. 1.]]
[[1. 1.]
 [1. 1.]
 [1. 1.]]


## Implement `inv`

Following is the formula about how we get the gradient of inv op

In matrix calculus, we have the following formulas.

$$
dXY = d(X)Y + XdY \\
XX^{-1} = I \\
dI = \mathbf{0}
$$

Using the above three formulas, we can get the calculus between $X^{-1}$ and $X$:

$$
d(X)X^{-1}+XdX^{-1} = \mathbf{0} \\
dX = -XdX^{-1}X \\
dX^{-1} = -X^{-1}d(X)X^{-1}
$$

The relationship between the calculus and derivation is (f: $R^{m\times n} → R$ is scalar function):

$$
df = tr((\frac{\partial f}{\partial X})^TdX)
$$

For the same scalar function, we can get the follow formula:

$$
df = tr((\frac{\partial f}{\partial X})^TdX) \\
df = tr((\frac{\partial f}{\partial X^{-1}})^TdX^{-1})
$$

Replace the $dX^{-1}$ using $-X^{-1}d(X)X^{-1}$, we can get:

$$
tr((\frac{\partial f}{\partial X})^T dX) =
tr((\frac{\partial f}{\partial X^{-1}})^T (-X^{-1})d(X)X^{-1})
$$

There exists a formula for trace: $tr(AB)=tr(BA)$. Using this formula, we can rewrite the above formula:

$$
tr((\frac{\partial f}{\partial X})^T dX) =
tr(X^{-1}(\frac{\partial f}{\partial X^{-1}})^T (-X^{-1})d(X))
$$

Which means:

$$
\frac{\partial f}{\partial X} = (X^{-1}(\frac{\partial f}{\partial X^{-1}})^T (-X^{-1}))^T
$$

Following this formula, we will implement our gradient method for **inv**.

**Using row operation, we implement the inv. Following is a demo program using numpy.**

In [None]:
import numpy as np

x = np.array([
    [3., 2., 3.],
    [2., 3., 4.],
    [6., 7., 8.]
])

class linalg:
    @staticmethod
    def inv(x):
        aug = np.concatenate([x, np.eye(x.shape[0])], axis=1)
        row, col = x.shape

        for i in range(row):
            aug[i, :] = aug[i, :] / aug[i, i]
            for j in range(i + 1, row):
                aug[j, :] = aug[j, :] - aug[j, i] * aug[i, :]

        for i in range(row - 1, 0, -1):
            for j in range(i - 1, -1, -1):
                aug[j, :] -= aug[j, i] * aug[i, :]

        return aug[:, col:]


import time
# test inv
for _ in range(100):
    arr = np.eye(100) + np.random.rand(100, 100)
    start_time = time.time()
    B = np.linalg.inv(arr)
    print("np.inv:", (time.time() - start_time) / 1000)
    start_time = time.time()
    arr_inv = linalg.inv(arr)
    print("nd.inv:", (time.time() - start_time) / 1000)
    assert np.allclose(B, arr_inv)

### Torch example

In [6]:
import torch

In [7]:
# example array
x = np.array([
    [3., 2., 3.],
    [2., 3., 4.],
    [6., 7., 8.]
])

In [8]:
a_tensor = torch.tensor(x, requires_grad=True)
print(a_tensor.grad)

None


In [9]:
b = torch.inverse(a_tensor)
b.retain_grad()
print(b)

tensor([[ 0.5000, -0.6250,  0.1250],
        [-1.0000, -0.7500,  0.7500],
        [ 0.5000,  1.1250, -0.6250]], dtype=torch.float64,
       grad_fn=<LinalgInvExBackward0>)


In [10]:
b.sum().backward()
th_grad = a_tensor.grad
print(a_tensor.grad)
print(b.grad)

tensor([[-1.8489e-32, -1.1102e-16,  1.1102e-16],
        [-2.0817e-17, -2.5000e-01,  2.5000e-01],
        [ 2.0817e-17,  2.5000e-01, -2.5000e-01]], dtype=torch.float64)
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [11]:
print((-b @ b.grad @ b).T)

tensor([[-1.8489e-32, -1.1102e-16,  1.1102e-16],
        [-2.0817e-17, -2.5000e-01,  2.5000e-01],
        [ 2.0817e-17,  2.5000e-01, -2.5000e-01]], dtype=torch.float64,
       grad_fn=<PermuteBackward0>)


### Needle example

In [12]:
nd_a = ndl.Tensor(x)

inv_a = ops.inv(nd_a)

In [13]:
print(inv_a)

needle.Tensor([[ 0.5000001  -0.625       0.12499997]
 [-1.         -0.75        0.74999994]
 [ 0.49999994  1.125      -0.62499994]])

In [14]:
inv_a.sum().backward()
print(inv_a.grad)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [17]:
print("nd:\n", nd_a.grad)
print("th:\n", th_grad)

nd:
 [[-5.3290705e-15  5.9604645e-08 -5.9604645e-08]
 [ 2.2351742e-08 -2.5000000e-01  2.5000000e-01]
 [-2.2351740e-08  2.4999994e-01 -2.4999994e-01]]
th:
 tensor([[-1.8489e-32, -1.1102e-16,  1.1102e-16],
        [-2.0817e-17, -2.5000e-01,  2.5000e-01],
        [ 2.0817e-17,  2.5000e-01, -2.5000e-01]], dtype=torch.float64)


In [18]:
a = np.random.rand(10, 10) + np.eye(10)

In [24]:
torch.set_printoptions(precision=6)

In [25]:
th_tensor = torch.tensor(a, requires_grad=True)
th_inv = torch.inverse(th_tensor)
th_inv.sum().backward()
print(th_tensor.grad)

tensor([[-0.279087,  0.024167, -0.123001, -0.042911,  0.045321, -0.111772,
         -0.025394, -0.022768, -0.033002, -0.073750],
        [-0.218708,  0.018938, -0.096390, -0.033627,  0.035516, -0.087590,
         -0.019900, -0.017842, -0.025862, -0.057795],
        [-0.051874,  0.004492, -0.022862, -0.007976,  0.008424, -0.020775,
         -0.004720, -0.004232, -0.006134, -0.013708],
        [-0.278418,  0.024109, -0.122707, -0.042808,  0.045212, -0.111504,
         -0.025333, -0.022713, -0.032923, -0.073574],
        [-0.284610,  0.024645, -0.125435, -0.043760,  0.046218, -0.113984,
         -0.025896, -0.023218, -0.033655, -0.075210],
        [-0.399524,  0.034596, -0.176081, -0.061428,  0.064879, -0.160006,
         -0.036352, -0.032593, -0.047244, -0.105577],
        [ 0.068034, -0.005891,  0.029985,  0.010461, -0.011048,  0.027247,
          0.006190,  0.005550,  0.008045,  0.017978],
        [ 0.393118, -0.034041,  0.173258,  0.060443, -0.063838,  0.157440,
          0.035769,  0

In [22]:
nd_tensor = ndl.Tensor(a)
nd_inv = ndl.inv(nd_tensor)
nd_inv.sum().backward()
print(nd_tensor.grad)

[[-0.2790872   0.02416668 -0.12300128 -0.04291064  0.04532084 -0.1117717
  -0.02539386 -0.02276762 -0.03300214 -0.07375051]
 [-0.21870756  0.01893832 -0.09639043 -0.0336271   0.03551585 -0.08759034
  -0.01990001 -0.01784194 -0.02586227 -0.05779489]
 [-0.05187404  0.00449185 -0.02286229 -0.00797581  0.00842378 -0.02077505
  -0.00471995 -0.00423184 -0.00613412 -0.01370803]
 [-0.27841833  0.02410874 -0.12270655 -0.04280782  0.04521225 -0.11150396
  -0.02533302 -0.0227131  -0.03292308 -0.07357381]
 [-0.28460988  0.02464499 -0.12543544 -0.04375984  0.04621769 -0.11398362
  -0.0258964  -0.02321814 -0.03365524 -0.07520997]
 [-0.39952424  0.03459566 -0.17608133 -0.06142833  0.06487858 -0.16000573
  -0.03635236 -0.03259271 -0.0472439  -0.10557681]
 [ 0.06803431 -0.00589129  0.02998462  0.01046054 -0.01104807  0.02724708
   0.0061904   0.00555013  0.00804508  0.01797849]
 [ 0.39311755 -0.03404105  0.17325786  0.06044335 -0.06383821  0.15743995
   0.03576946  0.03207     0.04648632  0.10388381]
 

## Tries

In [19]:
def concatenate(its: tuple, axis):
    # compute out shape
    it_shape = its[0].shape
    out_shape = list(it_shape)
    out_shape[axis] = 0

    for tensor in its:
        # check shape
        for i in range(len(tensor.shape)):
            if i == axis:
                out_shape[axis] += tensor.shape[i]
            else:
                assert tensor.shape[i] == it_shape[i], \
                f"shape on axis {i} must be eq, except shape on axis {axis}!"

    # create a empty array and set value
    ret_arr = empty(out_shape)
    raw_idxs = []
    for i in range(len(out_shape) - 1):
        # build indexes
        raw_idxs.append(slice(0, out_shape[i], 1))

    start_idx = 0
    for tensor in its:
        idxs = raw_idxs.copy()
        idxs.insert(axis, slice(start_idx, start_idx + tensor.shape[axis], 1))
        ret_arr[tuple(idxs)] = tensor
        start_idx += tensor.shape[axis]
    
    # return
    return ret_arr


def _upper_trangle(a):
    x = array(a.numpy())
    for i in range(x.shape[1]):
        scale = x[i, i]
        for j in range(i+1, x.shape[0]):
            x[j, :] = x[j, :] - (x[j, i] / scale).broadcast_to(x[i, :].shape) * x[i, :]

    return x


def det(x):
    x = _upper_trangle(x)
    prod = 1
    for i in range(x.shape[0]):
        prod *= x[i, i]
    return prod


def _adj(x):
    adj_x = full(x.shape, 0.0)

    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            if i == 0 and j == 0:
                sub_block = [i+1:, j+1:]

            sub_block = x[:i, :j]
            if j + 1 < adj_x.shape[1]:
                sub_block = np.concatenate([sub_block, x[:i, j + 1:]], axis=1)
                if i + 1 < adj_x.shape[0]:
                    sub_block_temp = np.concatenate([x[i + 1:, :j], x[i + 1:, j + 1:]], axis=1)
                    sub_block = np.concatenate([sub_block, sub_block_temp], axis=0)
            else:
                if i + 1 < len(adj_x):
                    sub_block = np.concatenate([sub_block, x[i + 1:, :j]], axis=0)

            adj_x[j, i] = (-1) ** (i + 1 + j + 1) * det(sub_block)

    return adj_x


def inv(x):
    adj_x = _adj(x)
    return adj_x / det(x).broadcast_to(adj_x.shape)

In [21]:
np.linalg.inv(x)

array([[ 0.5  , -0.625,  0.125],
       [-1.   , -0.75 ,  0.75 ],
       [ 0.5  ,  1.125, -0.625]])