# Chapter 3: Linear Algebra

In [1]:
import torch

## 1. Scalars

Scalars are known as 0th order tensors. Scalars are implemented as tensors that contain only one element

In [2]:
x = torch.tensor(3.0)
y = torch.tensor(2.0)

In [3]:
x + y

tensor(5.)

In [4]:
x * y

tensor(6.)

In [5]:
x / y

tensor(1.5000)

In [6]:
x ** y

tensor(9.)

## 2. Vectors

Vectors are known as 1st order tensors

In [9]:
x = torch.arange(3)
x

tensor([0, 1, 2])

In [10]:
x[2]

tensor(2)

n-dimensionality of the vector corresponds to the tensor's length

In [12]:
len(x)

3

In [13]:
# shape
x.shape

torch.Size([3])

## 3. Matrices

Matrices are known as 2nd order tensors

In [14]:
A = torch.arange(6).reshape(3, 2)
A # 3 x 2

tensor([[0, 1],
        [2, 3],
        [4, 5]])

### Transpose

In [15]:
A.T # 2 x 3

tensor([[0, 2, 4],
        [1, 3, 5]])

## 4. Tensors

Tensors becomes more important when we work with images. Each image is in 3rd order tensor. Axes = (heigh, width, channel). A collection of image is represented by a 4th order tensor. Distinct images are indexed along the first axis.

In [16]:
torch.arange(24).reshape(2, 3, 4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

## 5. Basic Properties of Tensor Arithmetic

In [18]:
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
B = A.clone() # Assign a copy of A to B by allocating new memory
A, B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[0., 1., 2.],
         [3., 4., 5.]]))

In [19]:
A * B

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

Adding or multiplying a scalar produces a result with the same shape as original tensor

In [20]:
a = 2
X = torch.arange(24).reshape(2, 3, 4)
X

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [21]:
a + X

tensor([[[ 2,  3,  4,  5],
         [ 6,  7,  8,  9],
         [10, 11, 12, 13]],

        [[14, 15, 16, 17],
         [18, 19, 20, 21],
         [22, 23, 24, 25]]])

In [22]:
(a * X)

tensor([[[ 0,  2,  4,  6],
         [ 8, 10, 12, 14],
         [16, 18, 20, 22]],

        [[24, 26, 28, 30],
         [32, 34, 36, 38],
         [40, 42, 44, 46]]])

## 6. Reduction

In [29]:
# sum of elements
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [30]:
A.sum()

tensor(15.)

In [31]:
# sum all elements along the rows (axis=0)
A.sum(axis=0)

tensor([3., 5., 7.])

In [32]:
# sum elements of all columns
A.sum(axis=1)

tensor([ 3., 12.])

In [33]:
# mean
A.mean()

tensor(2.5000)

In [34]:
A.mean(axis=0)

tensor([1.5000, 2.5000, 3.5000])

## 7. Non-Reduction Sum

Keeping the number of axes unchanged

In [37]:
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [35]:
A.sum(axis=1)

tensor([ 3., 12.])

In [36]:
A.sum(axis=1, keepdims=True)

tensor([[ 3.],
        [12.]])

In [39]:
# can just divide with broadcasting
sum_A = A.sum(axis=1, keepdims=True)
A / sum_A

tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

In [40]:
# compute cumulative sum along the rows
A.cumsum(axis=0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

## 8. Dot Products

Dot product is the sum over the products of the elements at the same position. $\sum_{i=1}^{d}x_{i}y_{i}$
- When weights are non-negative and sum to one, the dot product expresses a weighted average
- After normalizing two vectors to have unit length, the dot product expresses the cosine of the angle between them

In [41]:
y = torch.ones(3, dtype=torch.float32)

In [42]:
x

tensor([0., 1., 2.])

In [43]:
y

tensor([1., 1., 1.])

In [44]:
torch.dot(x, y)

tensor(3.)

## 9. Matrix-Vector Products

In [45]:
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [46]:
x

tensor([0., 1., 2.])

In [47]:
torch.mv(A, x)

tensor([ 5., 14.])

In [48]:
torch.matmul(A, x)

tensor([ 5., 14.])

## 10. Matrix-Matrix Multiplication

In [50]:
B = torch.ones(3, 4)
B

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [51]:
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [52]:
torch.mm(A, B)

tensor([[ 3.,  3.,  3.,  3.],
        [12., 12., 12., 12.]])

In [53]:
torch.matmul(A, B)

tensor([[ 3.,  3.,  3.,  3.],
        [12., 12., 12., 12.]])

## 11. Norms

Norm of a vector tell us how big the magnitude is. 
- L2 norm = $||X||_{2} = \sqrt{\sum_{i=1}^{n}x_{i}^2}$
- L1 norm = $||X||_{1} = \sqrt{\sum_{i=1}^{n}|x_{i}|}$

In [56]:
u = torch.tensor([3.0, -4.0])
u

tensor([ 3., -4.])

In [57]:
#  L2 Norm
torch.norm(u)

tensor(5.)

In [59]:
# L1 norm
torch.abs(u).sum()

tensor(7.)