## 2.1

In [12]:
import torch

### Broadcasting

Refer to: https://docs.pytorch.org/docs/stable/notes/broadcasting.html

To align the shapes, we pad the shorter tensor's shape with $1$.

Denote shape of tensor $A, B$ as $A=(A_1, A_2, \dots, A_n), B=(B_1, B_2, \dots, B_n)$

If $A_i = B_i$ or $A_i=1$ or $B_i=1$ for all $i$, then broadcasting is valid with result $C_i = \max(A_i, B_i)$; otherwise, it fails.


In [None]:
x = torch.arange(2*3*4).reshape(2, 3, 4)
y = torch.arange(3).reshape(       3, 1)
x, y

(tensor([[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11]],
 
         [[12, 13, 14, 15],
          [16, 17, 18, 19],
          [20, 21, 22, 23]]]),
 tensor([[0],
         [1],
         [2]]))

In [None]:
y2 = y.unsqueeze(0).expand(2, 3, 4)  # y2 is the padding result in this sample
assert torch.equal(x + y, x + y2)
y2

tensor([[[0, 0, 0, 0],
         [1, 1, 1, 1],
         [2, 2, 2, 2]],

        [[0, 0, 0, 0],
         [1, 1, 1, 1],
         [2, 2, 2, 2]]])

In [None]:
x + y

tensor([[[ 0,  1,  2,  3],
         [ 5,  6,  7,  8],
         [10, 11, 12, 13]],

        [[12, 13, 14, 15],
         [17, 18, 19, 20],
         [22, 23, 24, 25]]])

### Exercises

#### Q2

In [None]:
x = torch.arange(2*3*4).reshape(2, 3, 4)
# y = torch.arange(3*3).reshape(     3, 3)  # fail
y = torch.arange(3*1).reshape(     3, 1)  # ok
x + y

tensor([[[ 0,  1,  2,  3],
         [ 5,  6,  7,  8],
         [10, 11, 12, 13]],

        [[12, 13, 14, 15],
         [17, 18, 19, 20],
         [22, 23, 24, 25]]])