# The fundamentals of PyTorch

Resource for reading: https://www.learnpytorch.io/00_pytorch_fundamentals/

In [1]:
import torch
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

print(torch.__version__)

1.13.1


## PyTorch Tensors
    - fundmental building blocks, usually created behind the scenes

In [2]:
## Creating tensor

# Scalar tensor
scalar = torch.tensor(7)

# Get tensor as Python int
scalar.item() 
# Get tensor dimension (number of brackets)
scalar.ndim

0

In [3]:
# Vector tensor
vector = torch.tensor([7, 7])

# Get tensor shape
vector.shape

torch.Size([2])

In [4]:
# MATRIX tensor
MATRIX = torch.tensor([[7, 8],
                       [9, 5]])
# Indexing
MATRIX[0]

tensor([7, 8])

In [5]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 7],
                        [2, 5, 4]]])
TENSOR.ndim, TENSOR.shape

(3, torch.Size([1, 3, 3]))

In [6]:
## Random tensors 
## - often start of NN (iterativley updating the random numbers)

# Create random tensor of size ([3, 4]) -> (2-dimensional)
random_tensor = torch.rand(3, 4) # number of parameters = ndim
random_tensor

tensor([[0.8364, 0.9553, 0.6310, 0.1325],
        [0.5200, 0.2362, 0.4182, 0.9037],
        [0.4784, 0.7252, 0.4921, 0.5970]])

In [7]:
# Almost any data can be represented by tensors
# Create random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, width, color channels (R, G, B)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and ones

In [8]:
# Create all-zeros or all-ones tensor
zeros = torch.zeros(size=(3, 4))
ones  = torch.ones(size=(3,4))
zeros, ones, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

### Range of tensors 

In [9]:
# Create range of tensors
one_to_ten = torch.arange(start=1, end=11, step=1)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [10]:
# Create tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor Datatypes

In [11]:
# Float tensor - 32 is single-precision, detail/precision when computing
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                              dtype=None, # datatype of tensor (float32, float16, float64)
                              device=None, # what device the tensor is on (default is "cpu")
                              requires_grad=False) # whether or not to track gradients with this tensors operations
float_16_tensor = float_32_tensor.type(torch.float16) # or torch.half

### Getting information from tensors
**Note:** Tensor datatypes is one of the 3 big errors you'll run into with `PyTorch` & deep learning (often has to do with matrix multiplication):
1. Tensors are not right datatype
2. Tensors are not right shape
3. Tensors are not on the right device - tensors might live on different devices (e.g. gpu & cpu)

How to get that info:
1. `tensor.dtype`
2. `tensor.shape` or `tensor.size`
3. `tensor.device`

In [12]:
print('Tensor: ', float_32_tensor)
print('Datatype: ', float_32_tensor.dtype)
print('Shape/Size: ', float_32_tensor.shape, float_32_tensor.size())
print('Device: ', float_32_tensor.device)

Tensor:  tensor([3., 6., 9.])
Datatype:  torch.float32
Shape/Size:  torch.Size([3]) torch.Size([3])
Device:  cpu


### Manipulating Tensors (tensor operations)
Tensor operations include:
* Addition
* Subtraction
* Division
* Multiplication (element-wise)
* Matrix multiplication

Tensor aggregations include:
* Min, Max, Mean, Sum etc.


In [13]:
# Create tensor, basic operations
tensor1 = torch.tensor([[1, 2, 3],
                        [4, 5, 6]])
# tensor1 + 10
# tensor1 - 10
# tensor1 / 10
# tensor1 * 10 (element-wise)
# In-built function: e.g. torch.add(), torch.mul()

In [14]:
# Matrix multiplication (dot product, @)
tensor2 = torch.tensor([[4, 5],
                        [6, 7],
                        [8, 9]])
print(tensor1 @ tensor2) # or torch.mm(tensor1, tensor2)

# Main rules when multiplying tensors:
# - Inner dimensions must match (m x n @ n x k)
# - The resulting has shape of outer dimension (m x k) 

tensor([[ 40,  46],
        [ 94, 109]])


In [15]:
# To alter shape of tensors we can transpose the tensors
tensor1.T

tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [16]:
# Finding min, max and sum of tensor (tensor aggregation -> elements becomes fewer)
min  = torch.min(tensor1).item()
max  = torch.max(tensor1).item()
mean = torch.mean(tensor1.type(torch.float32)).item() # need to convert to right datatype to use mean-function
sum  = torch.sum(tensor1).item() 

# Finding the indecies for those values (usefull when using soft_max-function)
ind_min = tensor1.argmin().item()
ind_max = tensor1.argmax().item()


#### Layout operations on tensors:
* **Reshaping** - reshapes an input tensor into a defined shape
* **View** - return a view of an input tensor of certain shape but keep memory as the original tensor
* **Stacking** - combine multiple tensors: 
                   - on top of each other (vstack or dim=0),
                   - side by side (hstack or dim=1) or
                   - with regards to a given dimension (dim=...)
* **Squeez** - removes all "1" dimensions from an input tensor
* **Unsqueez** - adds a "1" dimension to a target tensor (dim must be specified)
* **Permute** - rearranges the dimensions of a target tensor in a specified order

In [17]:
x = torch.arange(1., 13.)
x, x.shape

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]),
 torch.Size([12]))

In [18]:
# Add an extra dimension with torch.reshape 
# - if compatible with original dimensions 
# - That is: product in reshape function must
#            be the same as the size for input tensor
x_reshaped = x.reshape(2, 2, 3)
x_reshaped

tensor([[[ 1.,  2.,  3.],
         [ 4.,  5.,  6.]],

        [[ 7.,  8.,  9.],
         [10., 11., 12.]]])

In [19]:
# Change the view - similar to reshape but here z
#                   shares memory with original tensor x
# This means that changing z changes x (shared memory)
z = x.view(1, 12)
z, z.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]]),
 torch.Size([1, 12]))

In [20]:
# Stack tensors with respect to different dimensions
x_stacked = torch.stack([x, x, x, x], dim=1)
x_reshaped_stacked = torch.stack([x_reshaped, x_reshaped], dim=2)
x_stacked, x_reshaped_stacked

(tensor([[ 1.,  1.,  1.,  1.],
         [ 2.,  2.,  2.,  2.],
         [ 3.,  3.,  3.,  3.],
         [ 4.,  4.,  4.,  4.],
         [ 5.,  5.,  5.,  5.],
         [ 6.,  6.,  6.,  6.],
         [ 7.,  7.,  7.,  7.],
         [ 8.,  8.,  8.,  8.],
         [ 9.,  9.,  9.,  9.],
         [10., 10., 10., 10.],
         [11., 11., 11., 11.],
         [12., 12., 12., 12.]]),
 tensor([[[[ 1.,  2.,  3.],
           [ 1.,  2.,  3.]],
 
          [[ 4.,  5.,  6.],
           [ 4.,  5.,  6.]]],
 
 
         [[[ 7.,  8.,  9.],
           [ 7.,  8.,  9.]],
 
          [[10., 11., 12.],
           [10., 11., 12.]]]]))

In [21]:
# Squeeze, unsqueeze and permutation
z_squeezed = z.squeeze()
print(z, '\n', z.shape)
print('\n After squeezing:\n')
print(z_squeezed, '\n', z_squeezed.shape)

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]]) 
 torch.Size([1, 12])

 After squeezing:

tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]) 
 torch.Size([12])


In [22]:
z_unsqueezed = z.unsqueeze(dim=0)
print(z, '\n', z.shape)
print('\n After unsqueezing:\n')
print(z_unsqueezed, '\n', z_unsqueezed.shape)

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]]) 
 torch.Size([1, 12])

 After unsqueezing:

tensor([[[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]]]) 
 torch.Size([1, 1, 12])


In [23]:
# Permuting something is also a view of something
# This means x_original and x_permuted will share memory 
# i. e. changing x_original will change x_permuting
x_original = torch.rand(size=(224, 224, 3)) # original: (height, width, color channel) i. e. image data
# Permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0, 1) # permuted: (color channel, height, width)
print(x_original.shape)
print('\n After permuting: \n')
print(x_permuted.shape)

torch.Size([224, 224, 3])

 After permuting: 

torch.Size([3, 224, 224])


#### Indexing
Indexing with `PyTorch` is very similar to indexing with `NumPy`.

In [24]:
# Create tensor
tensor3 = torch.arange(1, 10).reshape(1, 3, 3)
tensor3

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [25]:
# Outer bracket 
tensor3[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [26]:
# Middle bracket (second row)
tensor3[0][1]

tensor([4, 5, 6])

In [27]:
# Inner bracket (first row, third element)
tensor3[0][0][2]

tensor(3)

In [28]:
# Slicing
# Everything in first dimension and then first element in 
# second dimension gives us:
tensor3[:][0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [29]:
# First element in first dimension and then everything in 
# second dimension gives us the same:
tensor3[0][:]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [30]:
# First element in first dimension and everything in 
# second dimension and second element in third dimension 
# gives us:
tensor3[0][:][1]

tensor([4, 5, 6])

In [31]:
# First element in first dimension and third element in 
# second dimension and everything in third dimension 
# gives us the third row:
print(tensor3[0][2][:], '\n')

# Note: With our tensor tensor3[:] and tensor3[0] returns almost the 
# same thing since tensor3's dimension is [1, 3, 3] (i. e. in it's 
# first dimension it only has one element). However tensor3[0] will
# slice the tensor so that the outer brackets disappear while 
# tensor3[:] will keep the outer brackets:
print(tensor3[0])
print(tensor3[:])

tensor([7, 8, 9]) 

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])


### PyTorch & NumPy
`PyTorch` builds upon `NumPy` and so `PyTorch` has functionality to interact with `NumPy`:
* Data in `NumPy` -> `PyTorch` tensor: `torch.from_numpy(ndarray)`
* `PyTorch` tensor -> `Numpy` array: `torch.Tensor.numpy()` 

**Note:** The default datatype for a `NumPy` array is float64 while the default datatype for a `PyTorch` tensor is float32.

In [32]:
import numpy as np

# From NumPy array to tensor 
nparray  = np.arange(1.0, 10.0)
pytensor = torch.from_numpy(nparray)
nparray.dtype, pytensor.dtype 

(dtype('float64'), torch.float64)

In [33]:
# From tensor to NumPy array
pytensor2 = torch.ones(7)
numpy_array = pytensor2.numpy()
pytensor2.dtype, numpy_array.dtype

(torch.float32, dtype('float32'))

#### Reproducability 
Briefley how a NN learns:

"start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the data -> iterate"

We don't want completely random (sudo-random) numbers in tensors. We need our experiment to be somewhat reproduceable. 

To reduce randomness of NN and PyTorch we introduce the concept of a **random seed**.

The **random seed** essentialy "flavours" the randomness.

Resource: https://en.wikipedia.org/wiki/Random_seed

In [42]:
# Create random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[0.1053, 0.2695, 0.3588, 0.1994],
        [0.5472, 0.0062, 0.9516, 0.0753],
        [0.8860, 0.5832, 0.3376, 0.8090]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [41]:
# Lets make some random but reproducable tensors
# Set random seed (here in notebook we have to set seed before every function call)
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

# Tensors still random but reproducable

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


#### Running objects on GPUs (faster computations)
Getting a GPU:
* Use Google Colab Pro
* Use your own (requires a GPU driver, CUDA + NVIDIA) 
* Use cloud computing - GCP, AWS, Azure (renting)

Resource: https://pytorch.org/get-started/locally/

Resource: https://pytorch.org/docs/stable/notes/cuda.html#best-practices

In [46]:
### Checking for GPU access with PyTorch
torch.cuda.is_available()

False

In [47]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

# Counting number of devices/GPUs available
torch.cuda.device_count()

0

In [53]:
# Moving tensor (and models) to target device (default is on cpu)
tensor_on_CPU = torch.tensor([1, 2, 3])
tensor_on_GPU = tensor_on_CPU.to(device)

# And moving back on CPU to be able to convert to numpy array
tensor_back_on_cpu = tensor_on_GPU.cpu().numpy()