# Linear Algebra

In [None]:
import torch
import numpy as np

## Scalars

In [None]:
x = torch.tensor(3.0)
y = torch.tensor(2.0)

print(x+y)
print(x-y)
print(x*y)
print(x/y)
print(x**y)

tensor(5.)
tensor(1.)
tensor(6.)
tensor(1.5000)
tensor(9.)


## Vectors

In [None]:
X = torch.arange(4)
X

tensor([0, 1, 2, 3])

In [None]:
X[3]

tensor(3)

In [None]:
len(X)

4

In [None]:
X.shape

torch.Size([4])

## Matrix

In [None]:
A = torch.arange(20).reshape(5, 4)
A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])

In [None]:
A.T

tensor([[ 0,  4,  8, 12, 16],
        [ 1,  5,  9, 13, 17],
        [ 2,  6, 10, 14, 18],
        [ 3,  7, 11, 15, 19]])

In [None]:
B = torch.tensor([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
B

tensor([[1, 2, 3],
        [2, 0, 4],
        [3, 4, 5]])

In [None]:
B == B.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

## Tensors

In [None]:
X = torch.arange(24).reshape(2, 3, 4)
X

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [None]:
A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
B = A.clone()

print(A)
print(A+B)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])
tensor([[ 0.,  2.,  4.,  6.],
        [ 8., 10., 12., 14.],
        [16., 18., 20., 22.],
        [24., 26., 28., 30.],
        [32., 34., 36., 38.]])


In [None]:
A*B

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.],
        [144., 169., 196., 225.],
        [256., 289., 324., 361.]])

In [None]:
A.sum()

tensor(190.)

In [None]:
A.shape

torch.Size([5, 4])

In [None]:
A.sum(axis=0)

tensor([40., 45., 50., 55.])

In [None]:
A.sum(axis=1)

tensor([ 6., 22., 38., 54., 70.])

In [None]:
A.mean()

tensor(9.5000)

In [None]:
A.sum()/A.numel()

tensor(9.5000)

### Understanding Axes

In [None]:
print(X.shape)

torch.Size([2, 3, 4])


In [None]:
res = X.sum(axis=0, keepdims=True)
print(res.shape)
print(res)

torch.Size([1, 3, 4])
tensor([[[12, 14, 16, 18],
         [20, 22, 24, 26],
         [28, 30, 32, 34]]])


In [None]:
res = X.sum(axis=1, keepdims=True)
print(res.shape)
print(res)

torch.Size([2, 1, 4])
tensor([[[12, 15, 18, 21]],

        [[48, 51, 54, 57]]])


In [None]:
res = X.sum(axis=2, keepdims=True)
print(res.shape)
print(res)

torch.Size([2, 3, 1])
tensor([[[ 6],
         [22],
         [38]],

        [[54],
         [70],
         [86]]])


### Cumulative sum

In [None]:
A.cumsum(axis=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  6.,  8., 10.],
        [12., 15., 18., 21.],
        [24., 28., 32., 36.],
        [40., 45., 50., 55.]])

In [None]:
A.cumsum(axis=1)

tensor([[ 0.,  1.,  3.,  6.],
        [ 4.,  9., 15., 22.],
        [ 8., 17., 27., 38.],
        [12., 25., 39., 54.],
        [16., 33., 51., 70.]])

### Products

In [None]:
o1 = torch.arange(4)
o2 = (torch.arange(4)+4)*5

print(o1)
print(o2)

tensor([0, 1, 2, 3])
tensor([20, 25, 30, 35])


In [None]:
torch.dot(o1, o2)

tensor(190)

In [None]:
m1 = torch.arange(20).reshape(5, 4)
print(m1)

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])


In [None]:
print(m1.shape)
print(o1.shape)
torch.mv(m1, o1)

torch.Size([5, 4])
torch.Size([4])


tensor([ 14,  38,  62,  86, 110])

In [None]:
m2 = torch.arange(20).reshape(4, 5)
print(m2)

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]])


In [None]:
print(m1.shape)
print(m2.shape)

mm = torch.mm(m1, m2)
print(mm.shape)
print(mm)

torch.Size([5, 4])
torch.Size([4, 5])
torch.Size([5, 5])
tensor([[ 70,  76,  82,  88,  94],
        [190, 212, 234, 256, 278],
        [310, 348, 386, 424, 462],
        [430, 484, 538, 592, 646],
        [550, 620, 690, 760, 830]])


## Norms

<p>In linear algebra, a vector norm is a function f that maps a vector to a scalar, satisfying a handful of properties.
</p> 
There are 3 properties of norms : <br/> 
1. Given any vector x, the first property says that if we scale all the elements of a vector
by a constant factor α, its norm also scales by the absolute value of the same constant factor:
<br />
<center>f (αx) = |α|f (x) </center><br />
2. The second property is the familiar triangle inequality: <br />
<center>f (x + y) ≤ f (x) + f (y) </center><br />
3. The third property simply says that the norm must be non-negative: <br />
<center>f (x) ≥ 0</center> <br />
4. The final property requires that the smallest norm is achieved and only achieved by a vector consisting of all zeros. <br />
<center>∀i, [x]i = 0 ⇔ f (x) = 0 </center>

In [None]:
u = torch.tensor([3.0, -4.0, 5.0])

### L1 Norm

Sum of absolute values of vector elements.<br />
<center>

![L1 Normalization](https://drive.google.com/uc?id=1-J6flCaFuEUUCx6ZbROQJgIc4MtgzrfI)

</center>


In [None]:
L1_NORM = torch.abs(u).sum()
print(L1_NORM)

tensor(12.)


### L2 Norm

The default normalization provided by PyTorch.<br />
<center>

![L2 Normalization](https://drive.google.com/uc?id=1UBTM8fDqw_TlFkFVDMRsQe-jegVP7uZZ)

</center>


In [None]:
L2_NORM = torch.norm(u)
print(L2_NORM)

tensor(7.0711)


### LP Norm

Generalized form of the L normalization.<br />
<center>

![L2 Normalization](https://drive.google.com/uc?id=1shWPW7ka9qrNZgrIS2ZO9D7tXQIlLfYY)

</center>


### Frobenius Norm

L2 norm, but for matrices.

<center>

![Frobenium Normalization](https://drive.google.com/uc?id=1l2CDaZxpeS0pJOUrub-b-dfzhvNqopKf)

</center>

In [None]:
torch.norm(torch.ones((4, 9)))

tensor(6.)

## Exercises

<pre>
1. Prove that the transpose of a matrix Aʼs transpose is A: (A⊤)⊤ = A.
2. Given two matrices A and B, show that the sum of transposes is equal to the transpose of a
sum: A⊤ + B⊤ = (A + B)⊤.
3. Given any square matrix A, is A + A⊤ always symmetric? Why?
4. We defined the tensor X of shape (2, 3, 4) in this section. What is the output of len(X)?
5. For a tensor X of arbitrary shape, does len(X) always correspond to the length of a certain
axis of X? What is that axis?
6. Run A / A.sum(axis=1) and see what happens. Can you analyze the reason?
7. When traveling between two points in Manhattan, what is the distance that you need to cover
in terms of the coordinates, i.e., in terms of avenues and streets? Can you travel diagonally?
8. Consider a tensor with shape (2, 3, 4). What are the shapes of the summation outputs along
axis 0, 1, and 2?
9. Feed a tensor with 3 or more axes to the linalg.norm function and observe its output. What
does this function compute for tensors of arbitrary shape?
</pre>

In [None]:
A = torch.arange(20).reshape(4, 5)
B = A.clone()+4

In [None]:
(A.T.T == A).all()

tensor(True)

In [None]:
((A.T + B.T)==(A+B).T).all()

tensor(True)

In [None]:
sq = torch.rand(5, 5)
s = sq + sq.T
(s.T == s).all()

tensor(True)

In [None]:
len(torch.rand(2,3,4))

2

In [None]:
print("axis 0")

axis 0


In [None]:
A/A.sum(axis=1)

RuntimeError: ignored

In [None]:
print("|x1-x2|+|y1-y2|")
print("No")

|x1-x2|+|y1-y2|
No


In [None]:
q8 = torch.randn(2, 3, 4)
print(q8.sum(axis=0))
print(q8.sum(axis=1))
print(q8.sum(axis=2))

tensor([[ 2.3063,  0.6032,  0.2663, -0.7617],
        [-0.9980,  0.2149, -0.2618,  1.8162],
        [ 0.8835, -0.5496,  3.2250, -0.6768]])
tensor([[ 1.8520, -1.7091, -0.6258, -0.2245],
        [ 0.3398,  1.9776,  3.8554,  0.6023]])
tensor([[-1.7589, -0.2923,  1.3437],
        [ 4.1730,  1.0636,  1.5385]])


In [None]:
q9 = torch.arange(24).reshape(2, 3, 4)
print(np.linalg.norm(q9))

65.75712889109438
