# Linear Algebra



In [1]:
import torch

Scalars are implemented as tensors 
that contain only one element

In [2]:
x = torch.tensor(3.0)
y = torch.tensor(2.0)

x + y, x * y, x / y, x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

You can think of vectors
as fixed-length arrays of scalars

In [3]:
x = torch.arange(3)
x

tensor([0, 1, 2])

We access a tensor's elements via indexing

In [4]:
x[2]

tensor(2)

In code, this corresponds to the tensor's length

In [7]:
print(len(x))
print(x.shape)

3
torch.Size([3])


Tensors with just one axis have shapes with just one element

In [6]:
x.shape

torch.Size([3])

We can convert any appropriately sized $m \times n$ tensor 
into an $m \times n$ matrix

In [8]:
A = torch.arange(6).reshape(3, 2)
A

tensor([[0, 1],
        [2, 3],
        [4, 5]])

Matrix's transpose

In [9]:
A.T

tensor([[0, 2, 4],
        [1, 3, 5]])

Symmetric matrices are the subset of square matrices
that are equal to their own transposes:
$\mathbf{A} = \mathbf{A}^\top$
  
 对称矩阵

In [14]:
A = torch.tensor([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
A == A.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

Tensors
give us a generic way to describe 
extensions to $n^{\mathrm{th}}$-order arrays

In [15]:
torch.arange(24).reshape(2, 3, 4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [16]:
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
B = A.clone()
A, A + B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

Elementwise product of two matrices
is called their *Hadamard product*

In [17]:
A * B

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

Adding or multiplying a scalar and a tensor

In [18]:
a = 2
X = torch.arange(24).reshape(2, 3, 4)
a + X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

The sum of a tensor's elements

In [19]:
x = torch.arange(3, dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2.]), tensor(3.))

Sums over the elements of tensors of arbitrary shape

In [20]:
A.shape, A.sum()

(torch.Size([2, 3]), tensor(15.))

Specify the axes 
along which the tensor should be reduced

In [21]:
A.shape, A.sum(axis=0).shape

(torch.Size([2, 3]), torch.Size([3]))

In [22]:
A.shape, A.sum(axis=1).shape

(torch.Size([2, 3]), torch.Size([2]))

In [23]:
A.sum(axis=[0, 1]) == A.sum()

tensor(True)

A related quantity is the *mean*, also called the *average*

In [24]:
A.mean(), A.sum() / A.numel()

(tensor(2.5000), tensor(2.5000))

In [25]:
A.mean(axis=0), A.sum(axis=0) / A.shape[0]

(tensor([1.5000, 2.5000, 3.5000]), tensor([1.5000, 2.5000, 3.5000]))

Keep the number of axes unchanged

In [26]:
sum_A = A.sum(axis=1, keepdims=True)
sum_A, sum_A.shape

(tensor([[ 3.],
         [12.]]),
 torch.Size([2, 1]))

Divide `A` by `sum_A` with broadcasting

In [27]:
A / sum_A

tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

The cumulative sum of elements of `A` along some axis

In [28]:
A.cumsum(axis=0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

The *dot product* of two vectors is a sum over the products of the elements at the same position

In [29]:
y = torch.ones(3, dtype = torch.float32)
x, y, torch.dot(x, y)

(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))

We can calculate the dot product of two vectors 
by performing an elementwise multiplication followed by a sum

In [30]:
torch.sum(x * y)

tensor(3.)

The matrix-vector product $\mathbf{A}\mathbf{x}$
is simply a column vector of length $m$,
whose $i^\mathrm{th}$ element is the dot product 
$\mathbf{a}^\top_i \mathbf{x}$

In [31]:
A.shape, x.shape, torch.mv(A, x), A@x

(torch.Size([2, 3]), torch.Size([3]), tensor([ 5., 14.]), tensor([ 5., 14.]))

We can think of the matrix-matrix multiplication $\mathbf{AB}$
as performing $m$ matrix-vector products 
or $m \times n$ dot products 
and stitching the results together 
to form an $n \times m$ matrix

In [32]:
B = torch.ones(3, 4)
torch.mm(A, B), A@B

(tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

The $\ell_2$ *norm*
$$\|\mathbf{x}\|_2 = \sqrt{\sum_{i=1}^n x_i^2}$$

In [33]:
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

The $\ell_1$ norm
$$\|\mathbf{x}\|_1 = \sum_{i=1}^n \left|x_i \right|$$

In [34]:
torch.abs(u).sum()

tensor(7.)

The *Frobenius norm*, 
which is much easier to compute
$$\|\mathbf{X}\|_F = \sqrt{\sum_{i=1}^m \sum_{j=1}^n x_{ij}^2}$$

In [35]:
torch.norm(torch.ones((4, 9)))

tensor(6.)

## 练习
给出两个矩阵A和B，证明“它们转置的和”等于“它们和的转置”，即A⊤+B⊤=(A+B)⊤。  
给定任意方阵A，A+A⊤总是对称的吗?为什么?  
本节中定义了形状(2,3,4)的张量X。len(X)的输出结果是什么？  
对于任意形状的张量X,len(X)是否总是对应于X特定轴的长度?这个轴是什么?  
运行A/A.sum(axis=1)，看看会发生什么。请分析一下原因？  
考虑一个具有形状(2,3,4)的张量，在轴0、1、2上的求和输出是什么形状?  
为linalg.norm函数提供3个或更多轴的张量，并观察其输出。对于任意形状的张量这个函数计算得到什么?  

In [47]:
## 1.
A = torch.randn(3, 4)
print(A)
A == A.T.T

tensor([[-1.3670, -1.5078, -1.2647,  0.2733],
        [ 1.2319,  0.9475,  0.5649,  2.6652],
        [ 0.9261, -1.2197,  0.7675, -0.8550]])


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [48]:
## 2.
A = torch.randn(3, 4)
B = torch.randn(3, 4)
A.T+B.T == (A+B).T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True]])

In [51]:
## 3 因为 (A+A.T).T = A.T + A.T.T(第二个已经证实了)= A.T + A (第一个已经证实了) 
A = torch.randn(3, 3)
A + A.T == (A + A.T).T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

In [58]:
## 4 

print(X.shape)
print(len(X))

torch.Size([2, 3, 4])
2


In [64]:
## 5 总是第一个
X = torch.randn(3,5,4)
print(len(X))
X = torch.randn(100,5,4)
print(len(X))
X = torch.randn(98,5,4)
print(len(X))

3
100
98


In [67]:
## 6 报错，无法broadcast
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
print(A/A.sum(axis=0))
A/A.sum(axis=1)

tensor([[0.0000, 0.2000, 0.2857],
        [1.0000, 0.8000, 0.7143]])


RuntimeError: The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 1

In [72]:
## 7 总是第一个
X = torch.randn(3,5,4)
print(X.sum(axis=0).shape) # 按照0轴压扁所以 剩下5，4
print(X.sum(axis=1).shape) # 按照1轴压扁所以 剩下3，4
print(X.sum(axis=2).shape)

torch.Size([5, 4])
torch.Size([3, 4])
torch.Size([3, 5])


In [86]:
## 8
torch.linalg.norm(torch.randn(3,5,4))
torch.linalg.norm(torch.randn(3,5,4,6,7))
torch.linalg.norm(torch.randn(3,5,4,6))

tensor(19.6066)