In [2]:
import torch

In [3]:
x = torch.tensor(3.0)
y = torch.tensor(2.0)

x + y, x * y, x / y, x ** y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

In [4]:
x = torch.arange(3)
x

tensor([0, 1, 2])

In [5]:
x.reshape(-1, 1)

tensor([[0],
        [1],
        [2]])

In [6]:
x[2]

tensor(2)

In [7]:
len(x)

3

In [8]:
x.shape

torch.Size([3])

In [9]:
A = torch.arange(6).reshape(-1, 2)
A

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [10]:
A.T

tensor([[0, 2, 4],
        [1, 3, 5]])

Symmetric matrices are the subset of square matrices that are equal to their own transpose $\mathbf{A} = \mathbf{A}^T$

In [11]:
A = torch.tensor([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
A, A.T, A == A.T

(tensor([[1, 2, 3],
         [2, 0, 4],
         [3, 4, 5]]),
 tensor([[1, 2, 3],
         [2, 0, 4],
         [3, 4, 5]]),
 tensor([[True, True, True],
         [True, True, True],
         [True, True, True]]))

In [12]:
torch.arange(24).reshape(-1, 3, 4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

A handy property of same shape matrices is that we can use element-wise computation like the Hadamard product $A \circ B$

In [13]:
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
B = A.clone()
A, A + B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

In [14]:
A * B # Hadamard product

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

Adding or multiplying a scalar and a tensor produces a new tensor with the same shape as the old tensor

In [15]:
a = 2
X = torch.arange(24).reshape(-1, 3, 4)
a + X, a * X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 tensor([[[ 0,  2,  4,  6],
          [ 8, 10, 12, 14],
          [16, 18, 20, 22]],
 
         [[24, 26, 28, 30],
          [32, 34, 36, 38],
          [40, 42, 44, 46]]]),
 torch.Size([2, 3, 4]))

In [16]:
A, A.sum(), A.shape

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor(15.),
 torch.Size([2, 3]))

In [17]:
A.shape, A.sum(dim=0)

(torch.Size([2, 3]), tensor([3., 5., 7.]))

In [18]:
A.shape, A.sum(dim=1)

(torch.Size([2, 3]), tensor([ 3., 12.]))

In [19]:
A.sum(dim=[0,1]) == A.sum()

tensor(True)

In [20]:
A.mean(), A.sum() / A.numel()

(tensor(2.5000), tensor(2.5000))

Again, we can calculate something along a certain dimension

In [21]:
A.mean(dim=0), A

(tensor([1.5000, 2.5000, 3.5000]),
 tensor([[0., 1., 2.],
         [3., 4., 5.]]))

In [22]:
sum_A = A.sum(axis=1, keepdims=True)
sum_A, sum_A.shape

(tensor([[ 3.],
         [12.]]),
 torch.Size([2, 1]))

In [23]:
A / sum_A


tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

In [24]:
A.cumsum(axis=0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

Given two vectors $\mathbf{x}, \mathbf{y} \in \mathbb{R}^d$, the *dot product* is $\mathbf{x}^T \mathbf{Y}$. This is also known as the inner product $<\mathbf{x}, \mathbf{y}>$

In [25]:
y = torch.ones(3, dtype = torch.float32)
x = torch.arange(3, dtype = torch.float32)
x, y, torch.dot(x, y)

(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))

In [26]:
# Similarly
sum(x * y)

tensor(3.)

For example, given some set of values, denoted by a vector $\mathbf{x} \in \mathbb{R}^n$ and a set of weights, denoted by $\mathbf{w} \in \mathbb{R}^n$ the weighted sum of the values in $\mathbf{x}$ according to the weights $\mathbf{w}$ could be expressed as the dot product $\mathbf{x}^T \mathbf{w}$. When the weights are nonnegative and sum to $1$, the dot product expresses a weighted average. After normalizing two vectors to have unit length, the dot products express the cosine of the angle between them. Later in this section, we will formally introduce this notion of length.

In [27]:
A, x

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([0., 1., 2.]))

In [28]:
A.shape, x.shape, torch.mv(A, x), A@x

(torch.Size([2, 3]), torch.Size([3]), tensor([ 5., 14.]), tensor([ 5., 14.]))

In [29]:
B = torch.ones(3, 4)
A, B, torch.mm(A, B), A@B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

In [30]:
A.shape, B.shape, (A@B).shape

(torch.Size([2, 3]), torch.Size([3, 4]), torch.Size([2, 4]))

A norm is a function that maps a vector to a scalar and should satisfy the following properties:
1. $||\alpha \mathbf{x}|| = |\alpha|||\mathbf{x}||$
2. $||\mathbf{x}+\mathbf{y}|| \le ||\mathbf{x}|| + ||\mathbf{y}||$
3. $||\mathbf{x}|| \ge 0 \text{ for all } \mathbf{x} \ne 0$

There are many norms, but we are most used to the l_2 norm:
$||\mathbf{x}|| = \sqrt{sum_{i=1}^n x_i^2}$


In [31]:
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [32]:
torch.abs(u).sum()

tensor(7.)

We can also have norms on matrices, for example the Frobenius norm which is similar to the l2 norm of a matrix-shaped vector

In [33]:
torch.norm(torch.ones((4, 9)))

tensor(6.)

In [34]:
torch.ones((4, 9))

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.]])

Exercises

In [44]:
A = torch.arange(16).reshape((4, -1))
B = torch.arange(16).reshape((4, -1))
A, B

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15]]),
 tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15]]))

In [45]:
A == (A.T).T

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [46]:
A.T + B.T == (A + B).T

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [47]:
A + A.T

tensor([[ 0,  5, 10, 15],
        [ 5, 10, 15, 20],
        [10, 15, 20, 25],
        [15, 20, 25, 30]])

In [51]:
len(X), X

(2,
 tensor([[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11]],
 
         [[12, 13, 14, 15],
          [16, 17, 18, 19],
          [20, 21, 22, 23]]]))

In [56]:
X = X.unsqueeze(0)
X

tensor([[[[[ 0,  1,  2,  3],
           [ 4,  5,  6,  7],
           [ 8,  9, 10, 11]],

          [[12, 13, 14, 15],
           [16, 17, 18, 19],
           [20, 21, 22, 23]]]]])

In [57]:
len(X), X.shape

(1, torch.Size([1, 1, 2, 3, 4]))

In [59]:
A, A / A.sum(axis=1)

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15]]),
 tensor([[0.0000, 0.0455, 0.0526, 0.0556],
         [0.6667, 0.2273, 0.1579, 0.1296],
         [1.3333, 0.4091, 0.2632, 0.2037],
         [2.0000, 0.5909, 0.3684, 0.2778]]))

In [60]:
A.sum(axis=1)

tensor([ 6, 22, 38, 54])

In [63]:
A[:,2]

tensor([ 2,  6, 10, 14])