# CB02-4: Introduction to PyTorch
Step Zero: install pytorch through [PyTorch](https://pytorch.org/)

## Part One: Tensors

### 01 Create Tensor

In [1]:
import torch
torch.__version__

'2.3.1+cpu'

In [2]:
empty = torch.empty(3, 4)
zeros = torch.zeros(3,4)
ones = torch.ones(3,4)
empty, zeros, ones

(tensor([[2.4427e-04, 1.6689e-42, 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]]),
 tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

In [3]:
torch.manual_seed(0)
random = torch.rand(3,4) # 0-1
random

tensor([[0.4963, 0.7682, 0.0885, 0.1320],
        [0.3074, 0.6341, 0.4901, 0.8964],
        [0.4556, 0.6323, 0.3489, 0.4017]])

In [6]:
torch.manual_seed(0)
random2 = torch.rand(3,4)
random2 == random

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

### create tensors from exsiting tensors

In [None]:
x = torch.ones(2,3,4)

empty_like = torch.empty_like(x)
zeros_liek = torch.zeros_like(x)
ones_like = torch.ones_like(x)
rand_like = torch.rand_like(x)
empty_like, empty_like.shape, zeros_liek, zeros_liek.shape, ones_like, ones_like.shape, rand_like, rand_like.shape

### 02 shape (intuitive understanding)

In [26]:
x = torch.ones(2,3,4) # Note: mapping relationship
x.shape, x.sum()#.item()

(torch.Size([2, 3, 4]), tensor(24.))

In [12]:
x #  Note: mapping relationship

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

### sum

In [28]:
x.sum()

tensor(24.)

In [37]:
x.sum(dim=2), x.sum(dim=1), x.sum(dim=0)

(tensor([[4., 4., 4.],
         [4., 4., 4.]]),
 tensor([[3., 3., 3., 3.],
         [3., 3., 3., 3.]]),
 tensor([[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]))

In [38]:
x.sum(dim=(1,2)), x.sum(dim=(0,2)), x.sum(dim=(0,1))

(tensor([12., 12.]), tensor([8., 8., 8.]), tensor([6., 6., 6., 6.]))

### reshape

In [39]:
a = torch.rand(2,4)
b = a.reshape(2,2,2)
a.shape, b.shape

(torch.Size([2, 4]), torch.Size([2, 2, 2]))

### 03 DataType-dtype

In [40]:
# default data type
float_x = torch.tensor([[2.5, 0.1], [1.0, 3.0]]) # default dtype is float32
int_x = torch.tensor([2, 0]) # default dtype is int64
float_x.dtype, int_x.dtype

(torch.float32, torch.int64)

In [41]:
a = torch.ones((3,4), dtype=torch.int16)
b = torch.rand((3,4), dtype=torch.float16)
a.dtype, b.dtype

(torch.int16, torch.float16)

In [42]:
c = b.to(torch.float32)
c.dtype

torch.float32

### 04 element-wise operations(same dimension)

In [43]:
ones = torch.zeros(2, 2) + 1
twos = torch.ones(2, 2) * 2
threes = (torch.ones(2, 2) * 7 - 1) / 2
ones, twos, threes

(tensor([[1., 1.],
         [1., 1.]]),
 tensor([[2., 2.],
         [2., 2.]]),
 tensor([[3., 3.],
         [3., 3.]]))

In [44]:
fours = twos ** 2
sqrt_two = twos ** 0.5
fours, sqrt_two

(tensor([[4., 4.],
         [4., 4.]]),
 tensor([[1.4142, 1.4142],
         [1.4142, 1.4142]]))

In [45]:
fives = ones + fours
fives

tensor([[5., 5.],
        [5., 5.]])

In [46]:
tens = twos * fives
tens

tensor([[10., 10.],
        [10., 10.]])

### 05 broadcasting

In [47]:
# Tensor broadcasting: Same as numpy's boradcasting
a = torch.rand(2,4)
b = torch.ones(1,4) * 2
c = a * b
a, b, c, c == a * 2

(tensor([[0.5932, 0.1123, 0.1535, 0.2417],
         [0.7262, 0.7011, 0.2038, 0.6511]]),
 tensor([[2., 2., 2., 2.]]),
 tensor([[1.1863, 0.2247, 0.3069, 0.4834],
         [1.4525, 1.4022, 0.4076, 1.3021]]),
 tensor([[True, True, True, True],
         [True, True, True, True]]))

Rules:
-   Each tensor must have at least one dimension - no empty tensors.
-   Comparing the dimension sizes of the two tensors, *going from last
    to first:*
    -   Each dimension must be equal, *or*
    -   One of the dimensions must be of size 1, *or*
    -   The dimension does not exist in one of the tensors

In [48]:
# boradcasting examples:
a = torch.ones(4,3,2)

b = torch.rand(3,2) #(4,3,2)
b_ = torch.rand(1,3,2) #(4,3,2)

c = torch.rand(4,3) # #(4,3,2)
c_ = torch.rand(4,3,1) #(4,3,2)

d = torch.rand(1,2) #(4,3,2)
d_ = torch.rand(1,1,2) #(4,3,2)

e = torch.rand(4,2) #(4,3,2)
e_ = torch.rand(4,1,2) #(4,3,2)


In [49]:
# a * X
try: a * b 
except: print("a * b error")

try: a * b_
except: print("a * b_ error")

try: a * c
except: print("a * c error")

try: a * c_
except: print("a * c_ error")

try: a * d
except: print("a * d error")

try: a * d_
except: print("a * d_ error")

try: a * e
except: print("a * e error")

a * c error
a * e error


### 06 In-place Operations
Most binary operations on tensors will return a third, new tensor. When
we say `c = a * b` (where `a` and `b` are tensors), the new tensor `c`
will occupy a region of memory distinct from the other tensors.

In [50]:
import torch
a = torch.ones(2,2)
print(a)
a.sin_()
a.cos()
# torch.sin_(a) # torch.sin(a)
a 

tensor([[1., 1.],
        [1., 1.]])


tensor([[0.8415, 0.8415],
        [0.8415, 0.8415]])

In [51]:
# more exaplmes
a = torch.ones(2,2)
b = torch.ones(2,2)
a.add(b)
a.sub(b)
a

tensor([[1., 1.],
        [1., 1.]])

In [52]:
c = a.add_(b)
a == c

tensor([[True, True],
        [True, True]])

### 07. Three Kinds of  Muls

In [115]:
a = torch.ones(3,4)
b = torch.ones(3,4) * 2
a, b
# torch.matmul(a, b.T, out=c)
# torch.mul(a, b, out=d)
# e = torch.empty(1)
# torch.dot(a, b.T, out=e)
# c, d, e 

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]))

In [117]:
# element-wise mul: Hamamard product
a * b == torch.mul(a, b), a * b == a.mul(b), a*b

(tensor([[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]),
 tensor([[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]),
 tensor([[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]))

In [123]:
# dot product
a.reshape(-1).dot(b.reshape(-1)) # == (a*b).sum()


tensor(24.)

In [112]:
# matrix multiplication
a = torch.ones(3,3,4)
b = torch.ones(1,4,5)

torch.matmul(a, b)

#note: torch.mm, torch.mv

tensor([[[4., 4., 4., 4., 4.],
         [4., 4., 4., 4., 4.],
         [4., 4., 4., 4., 4.]],

        [[4., 4., 4., 4., 4.],
         [4., 4., 4., 4., 4.],
         [4., 4., 4., 4., 4.]],

        [[4., 4., 4., 4., 4.],
         [4., 4., 4., 4., 4.],
         [4., 4., 4., 4., 4.]]])

### 08 Copying Tensors

In [124]:
# assingment does not copy: just a label of the same memory
a =  torch.ones(3,4)
b = a
a[0][0] = 0
b, a

(tensor([[0., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([[0., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

In [125]:
a =  torch.zeros(3,4)
b = a.clone()
a[0][0] = 1
a, b
# Note: if source tensor(a) has autograd, so will the clone(b)

(tensor([[1., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]))

In this case, you *don't* want the cloned copy
of your source tensor to track gradients - performance is improved with
autograd's history tracking turned off. For this, you can use the
`.detach()` method on the source tensor:

In [126]:
a = torch.ones(3,4, requires_grad=True)

b = a.clone()

c = a.detach().clone()

a, b, c

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]], requires_grad=True),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]], grad_fn=<CloneBackward0>),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

## 09 Squeeze & Unsqueeze

### Unsqueeze

In [127]:
x = torch.ones(10, 10, 10)
x.shape

torch.Size([10, 10, 10])

In [128]:
# add one dimension
x.unsqueeze(0).shape, x.unsqueeze(1).shape, x.unsqueeze(2).shape

(torch.Size([1, 10, 10, 10]),
 torch.Size([10, 1, 10, 10]),
 torch.Size([10, 10, 1, 10]))

### Squeeze

In [129]:
y =  x.unsqueeze(0)
y.shape

torch.Size([1, 10, 10, 10])

In [130]:
y.squeeze(0).shape, y.squeeze(1).shape, y.squeeze(2).shape

(torch.Size([10, 10, 10]),
 torch.Size([1, 10, 10, 10]),
 torch.Size([1, 10, 10, 10]))

### In-place version

In [131]:
x = torch.empty(2,3,4)
x.unsqueeze(0).shape
x.shape

torch.Size([2, 3, 4])

In [132]:
x.unsqueeze_(0)
x.shape

torch.Size([1, 2, 3, 4])

In [133]:
x.squeeze(0)
x.shape

torch.Size([1, 2, 3, 4])

In [134]:
x.squeeze_(0)
x.shape

torch.Size([2, 3, 4])

## 10 NumPy Birdge

In [135]:
import numpy as np
import torch
nd_array = np.ones((2,2)) # default: float64

tensor = torch.from_numpy(nd_array).to(torch.float32)

nd_array, tensor

(array([[1., 1.],
        [1., 1.]]),
 tensor([[1., 1.],
         [1., 1.]]))

In [137]:
tensor = torch.ones(2,3,4)
nd_array = tensor.numpy()
type(nd_array), nd_array   
#Note: They are using the same memory

(numpy.ndarray,
 array([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],
 
        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]], dtype=float32))