In [1]:
import torch
import numpy as np

In [4]:
x = torch.tensor([[1],
                  [2]])

tensor([1, 2])

In [29]:
x1 = 1
x2 = 2
x = torch.tensor([x1,x2], requires_grad=True, dtype=torch.float)
def h(x):
    return torch.stack([x[0]*x[1], x[0]**2 + x[1], (x[1]**2)*x[0]], dim=0)

h(x).ndim

1

### Understanding autograd in the context of vector-valued and matrix valued functions

- In the below code, we begin with a 1-D vector $x$ as our input to the computational graph
- we next apply vector-valued function $h(x) \in \mathbb{R}^{3 \times 1}$
- and then apply matrix-valued function $F(h(x)) \in \mathbb{R}^{3 \times 2}$
- we thus get $\frac{\partial F}{\partial x} = \frac{\partial F}{\partial h} \cdot \frac{\partial h}{\partial x}$
where $\frac{\partial F}{\partial h} \in \mathbb{R}^{3 \times 2 \times 3}$ and $\frac{\partial h}{\partial x} \in \mathbb{R}^{3 \times 2}$
$\implies \frac{\partial F}{\partial h} \cdot \frac{\partial h}{\partial x} \in \mathbb{R}^{3 \times 2 \times 2}$ as we contract along the depth dimension of
$\frac{\partial F}{\partial h}$ and the row dimension of $\frac{\partial h}{\partial x}$

In [30]:
x1 = 1
x2 = 2
x = torch.tensor([x1,x2], requires_grad=True, dtype=torch.float)

def h(x):
    return torch.stack([x[0]*x[1], x[0]**2 + x[1], (x[1]**2)*x[0]], dim=0)

def F(u):
    return torch.stack([torch.stack([u[0]**2, u[0]+u[1]+u[2]], dim=0),
                        torch.stack([u[2]**2+u[1], u[0]*u[1]*u[2]], dim=0),
                        torch.stack([u[0]+5, u[1]+u[2]], dim=0)],
                       dim=0)

u = h(x)
v = F(u)

v.backward()
x.grad.zero_()


RuntimeError: grad can be implicitly created only for scalar outputs