# 00. PyTorch Fundamentals 

## Introduction

In [None]:
print("Hello World!")

Hello World!


In [None]:
!nvidia-smi

Tue May 16 14:53:32 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   74C    P0    30W /  70W |    813MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.0.0+cu118


## Introduction to Tensors

Scalar

In [None]:
#scalar
scalar = torch.tensor(7) 
scalar

tensor(7)

In [None]:
scalar.shape

torch.Size([])

In [None]:
scalar.ndim

0

Vector

In [None]:
#vector
vector = torch.tensor([2, 3]) 
vector

tensor([2, 3])

In [None]:
vector.shape

torch.Size([2])

In [None]:
vector.ndim

1

Matrix

In [None]:
#MATRIX
MATRIX = torch.tensor([[1, 2],
                       [3, 4]])
MATRIX

tensor([[1, 2],
        [3, 4]])

In [None]:
MATRIX.shape

torch.Size([2, 2])

In [None]:
MATRIX.ndim

2

Tensor

In [None]:
#TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 4, 5],
                        [6, 7, 8]]]) 
TENSOR

tensor([[[1, 2, 3],
         [3, 4, 5],
         [6, 7, 8]]])

In [None]:
TENSOR.shape

torch.Size([1, 3, 3])

In [None]:
TENSOR.ndim

3

## Random Tensors

In [None]:
random_MATRIX = torch.rand(2, 3)
random_MATRIX

tensor([[0.0290, 0.4019, 0.2598],
        [0.3666, 0.0583, 0.7006]])

In [None]:
random_TENSOR = torch.rand(2, 3, 3)
random_TENSOR

tensor([[[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631],
         [0.7749, 0.8208, 0.2793]],

        [[0.6817, 0.2837, 0.6567],
         [0.2388, 0.7313, 0.6012],
         [0.3043, 0.2548, 0.6294]]])

Image Tensor

In [None]:
random_image_size_tensor = torch.rand(size=(3,224, 224)) #color channels, height, width
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

## Zeros and ones

In [None]:
zeros = torch.zeros(size=(2, 3))
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [None]:
random_MATRIX*zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [None]:
ones = torch.ones(2, 3)
ones

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [None]:
ones.dtype, random_MATRIX.dtype

(torch.float32, torch.float32)

## Range of tensors and tensors-like

In [None]:
tensor = torch.arange(start = 1, end= 10, step = 2)
tensor

tensor([1, 3, 5, 7, 9])

In [None]:
tensor_like = torch.zeros_like(tensor)
tensor_like

tensor([0, 0, 0, 0, 0])

## Tensor datatypes


In [None]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype = None, device = None, requires_grad = False)
float_32_tensor, float_32_tensor.dtype 

(tensor([3., 6., 9.]), torch.float32)

In [None]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype = torch.float16, device = None, requires_grad = False)
float_16_tensor, float_16_tensor.dtype 

(tensor([3., 6., 9.], dtype=torch.float16), torch.float16)

## Getting information from tensors

1. Tensors not right datatype - *tensor.dtype*
2. Tensors not right shape/size - *tensor.shape* / *tensor.size()*
3. Tensors not on right device - *tensor.device*

In [None]:
tensor = torch.rand(3, 4, dtype = torch.float16, device = None, requires_grad = False)
print(tensor)
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Size of tensor: {tensor.shape}")
print(f"Device tensor is on: {tensor.device}")

tensor([[0.7529, 0.4648, 0.4209, 0.1514],
        [0.7988, 0.2935, 0.8662, 0.0073],
        [0.9775, 0.1924, 0.8052, 0.4419]], dtype=torch.float16)
Datatype of tensor: torch.float16
Size of tensor: torch.Size([3, 4])
Device tensor is on: cpu


## Manipulating Tensors (tensor operations)

- addition
- substraction
- multiplication (element-wise)
- division
- matrix multiplication

In [None]:
tensor = torch.tensor([1, 2, 3])
tensor

tensor([1, 2, 3])

In [None]:
tensor + 10

tensor([11, 12, 13])

In [None]:
tensor * 10

tensor([10, 20, 30])

In [None]:
tensor - 10

tensor([-9, -8, -7])

In [None]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [None]:
torch.add(tensor, 10)

tensor([11, 12, 13])

Matrix multiplication:

- element-wise multiplication
- matrix multiplication

The two main rules when performing matrix multiplication:
1. The **inner dimension** must match.
- (3, 2) @ (3, 2) won't work
- (2, 3) @ (3, 2) will work
- (3, 2) @ (2, 3) will work

2. The resulting matrix has the shape of the **outer dimensions**.
- (2, 3) @ (3, 2) -> (2, 2)
- (3, 2) @ (2, 3) -> (3, 3)





In [None]:
#Element-wise multiplication
tensor * tensor

tensor([1, 4, 9])

In [None]:
#Matrix multiplication
%%time
torch.matmul(tensor, tensor)

CPU times: user 1.1 ms, sys: 8 µs, total: 1.11 ms
Wall time: 1.02 ms


tensor(14)

In [None]:
tensor @ tensor

tensor(14)

In [None]:
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
print(value)  

tensor(14)
CPU times: user 345 µs, sys: 0 ns, total: 345 µs
Wall time: 352 µs


One of the most common errors in deep learning: shape errors

In [None]:
torch.matmul(torch.rand(3, 3), torch.rand(3, 2)).shape

torch.Size([3, 2])

In [None]:
# Shapes for matrix multiplication

tensor_A = torch.tensor([[1, 2], 
                         [3, 4], 
                         [5, 6]])

tensor_B = torch.tensor([[7, 10], 
                         [8, 11], 
                         [9, 12]])

torch.mm(tensor_A, tensor_B.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [None]:
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

To fix our tensor shape issues, we can manipulate the shape with **transpose**.

In [None]:
tensor_B

tensor([[ 7, 10],
        [ 8, 11],
        [ 9, 12]])

In [None]:
tensor_B.shape

torch.Size([3, 2])

In [None]:
tensor_B.T

tensor([[ 7,  8,  9],
        [10, 11, 12]])

In [None]:
tensor_B.T.shape

torch.Size([2, 3])

## Tensor aggregation
* min
* max
* mean
* sum


In [None]:
x = torch.arange(1, 100, 10)
x, x.dtype

(tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91]), torch.int64)

In [None]:
torch.min(x), x.min()

(tensor(1), tensor(1))

In [None]:
torch.max(x), x.max()

(tensor(91), tensor(91))

In [None]:
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(46.), tensor(46.))

In [None]:
torch.sum(x), x.sum()

(tensor(460), tensor(460))

Positional min/max
* arg max
* arg min




In [None]:
x

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [None]:
torch.argmin(x), x.argmin(), x[torch.argmin(x)]

(tensor(0), tensor(0), tensor(1))

In [None]:
torch.argmax(x), x.argmax(), x[torch.argmax(x)]

(tensor(9), tensor(9), tensor(91))

## Reshaping, stackigin, squeezing and unsqueezing tensors

* **Reshaping** - reshapes an input tensor to a defined shape
* **View** - return a view of an input tensor of a certain shape but keeps the same memory
* **Stacking** - combine multiple tensors on top of each otger (vstack) or side by side (hstack) 
* **Squeeze** - removes all `1` dimensions from a tensor
* **Unsqueeze** - adds a `1` dimendion to a target tensor
* **Permute** - return a view of the input with dimensions permuted (swapped) in a certain way





In [None]:
import torch
x = torch.arange(1, 10.)
x, x.shape, x.dtype

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]), torch.float32)

In [None]:
# Add an extra dimension

x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Change the view

z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Changing z changez x (z shares the same memory as x)

z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [None]:
# Stack tensors on top of each other

x_stacked = torch.stack([x, x, x, x], dim = 1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [None]:
x_reshaped, x_reshaped.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Squeeze

x_squeezed = x_reshaped.squeeze()
x_squeezed, x_squeezed.shape

(tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [None]:
# Unsqueeze

x_unsqueezed = x_squeezed.unsqueeze(dim = 0)
x_unsqueezed, x_unsqueezed.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Permute

x_original = torch.rand(size = (224, 224, 3)) # image - [height, width, colour_channels] 
x_original.shape 

torch.Size([224, 224, 3])

In [None]:
x_permuted = x_original.permute(2, 0, 1) # image - [colour_channels, height, width] 
x_permuted.shape

torch.Size([3, 224, 224])

In [None]:
print(f"Original shape: {x_original.shape}.")
print(f"Permuted shape: {x_permuted.shape}.")

Original shape: torch.Size([224, 224, 3]).
Permuted shape: torch.Size([3, 224, 224]).


In [None]:
x_original[0, 0, 0] = 6969
x_original[0, 0, 0], x_permuted[0, 0, 0]

(tensor(6969.), tensor(6969.))

## Indexing (selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy.

In [None]:
import torch
x = torch.arange(1, 10).reshape(1, 3 ,3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [None]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
x[0, 0], x[0][0]

(tensor([1, 2, 3]), tensor([1, 2, 3]))

In [None]:
x[0, 0, 0], x[0][0][0] 

(tensor(1), tensor(1))

In [None]:
# use " : " to select " all " of a target dimension
x[:, 0]

tensor([[1, 2, 3]])

In [None]:
x[:, :, 1]

tensor([[2, 5, 8]])

In [None]:
x[0, 0, 0]

tensor(1)

In [None]:
x[:, 1, 1]

tensor([5])

In [None]:
x[0, 0, :]

tensor([1, 2, 3])

In [None]:
x[0, 2, 2]

tensor(9)

In [None]:
x[:, :, 2]

tensor([[3, 6, 9]])

## PyTorch tensors & NumPy

**NumPy** is a popular scientific Python numerical computing library. (`import numpy`)

PyTorch has functionality to interact wit it.

* Data in NumPy -> want in PyTorch tensor -> `torch.from_numpy(np_array)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

*Default Datatype of NumPy is float64*

*Default Datatype of PyTorch is float32* 


In [None]:
# NumPy array to Tensor

import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # when converting from numpy -> pytorch
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
array.dtype

dtype('float64')

In [None]:
tensor.dtype

torch.float64

In [None]:
array = array + 1
array, tensor 

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
#Tensor to Numpy array

tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [None]:
tensor = tensor + 1
tensor, numpy_tensor 

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (taking a random out of random)

To reduce the randomness in neural networks and PyTorch comes the concept of a **random seed**.

In [None]:
import torch

random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.9401, 0.4664, 0.5540, 0.2197],
        [0.5556, 0.5158, 0.4683, 0.8639],
        [0.3936, 0.7495, 0.6087, 0.3865]])
tensor([[0.2788, 0.6852, 0.6579, 0.4634],
        [0.7546, 0.8112, 0.5402, 0.7937],
        [0.2781, 0.4120, 0.1031, 0.6997]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [None]:
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on the GPUs

GPUs = faster computation on numbers, thanks to CUDA + NVIDIA hardware + PyTorch woring behind the supports_unicode_filenames

In [None]:
!nvidia-smi

Tue May 16 14:53:33 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   75C    P0    30W /  70W |    813MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# Check for GPU acces with PyTorch

torch.cuda.is_available()

True

In [None]:
# Setup device agnostic code

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# Count number of devices

torch.cuda.device_count()

1

 ## Putting tensors (and models) on the GPU

*You can't convert Tensors from GPU to NumPy.*

In [None]:
tensor = torch.tensor([1, 2, 3], device = "cpu")

print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [None]:
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

In [None]:
tensor_back_on_cpu = tensor_on_gpu.cpu()
tensor_back_on_cpu, tensor_back_on_cpu.device, tensor_back_on_cpu.numpy()

(tensor([1, 2, 3]), device(type='cpu'), array([1, 2, 3]))

## Practice

In [None]:
import torch

1. Read documentation: torch.Tensor and torch.cuda.

In [None]:
# I will

2. Create a random tensor with shape (7, 7)

In [None]:
random_tensor_1 = torch.rand(7, 7)
random_tensor_1, random_tensor_1.shape

(tensor([[0.8694, 0.5677, 0.7411, 0.4294, 0.8854, 0.5739, 0.2666],
         [0.6274, 0.2696, 0.4414, 0.2969, 0.8317, 0.1053, 0.2695],
         [0.3588, 0.1994, 0.5472, 0.0062, 0.9516, 0.0753, 0.8860],
         [0.5832, 0.3376, 0.8090, 0.5779, 0.9040, 0.5547, 0.3423],
         [0.6343, 0.3644, 0.7104, 0.9464, 0.7890, 0.2814, 0.7886],
         [0.5895, 0.7539, 0.1952, 0.0050, 0.3068, 0.1165, 0.9103],
         [0.6440, 0.7071, 0.6581, 0.4913, 0.8913, 0.1447, 0.5315]]),
 torch.Size([7, 7]))

3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).

In [None]:
random_tensor_2 = torch.rand(1, 7)

torch.matmul(random_tensor_1, random_tensor_2.T) 

tensor([[1.9625],
        [1.0950],
        [0.9967],
        [1.8910],
        [1.9205],
        [1.0674],
        [1.6949]])

4. Set the random seed to 0 and do exercises 2 & 3 over again.

In [None]:
torch.manual_seed(0)
random_tensor_1 = torch.rand(7, 7)

torch.manual_seed(0)
random_tensor_2 = torch.rand(1, 7)

print(random_tensor_1 == random_tensor_2)
print(torch.matmul(random_tensor_1, random_tensor_2.T))


tensor([[ True,  True,  True,  True,  True,  True,  True],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False]])
tensor([[1.5985],
        [1.1173],
        [1.2741],
        [1.6838],
        [0.8279],
        [1.0347],
        [1.2498]])


5. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.

In [None]:
torch.cuda.manual_seed(1234)

6. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).

In [None]:
torch.manual_seed(1234)
random_tensor_1 = torch.rand(2, 3, device = "cuda")

torch.manual_seed(1234)
random_tensor_2 = torch.rand(2, 3, device = "cuda")

# or we could use tensor.to("cuda")

print(random_tensor_1 == random_tensor_2)

tensor([[True, True, True],
        [True, True, True]], device='cuda:0')


7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).

In [None]:
tensor_ex_7 = torch.mm(random_tensor_1, random_tensor_2.T) 

tensor_ex_7_2 = 


8. Find the maximum and minimum values of the output of 7.

In [None]:
print(f"Maximum value: {torch.max(tensor_ex_7)}") 
print(f"Minimum value: {torch.min(tensor_ex_7)}") 

Maximum value: 1.4577524662017822
Minimum value: 0.8358409404754639


9. Find the maximum and minimum index values of the output of 7.

In [None]:
tensor_ex_7

tensor([[0.9792, 0.8358],
        [0.8358, 1.4578]], device='cuda:0')

In [None]:
print(f"Maximum index value: {torch.argmax(tensor_ex_7)}") 
print(f"Minimum index value: {torch.argmin(tensor_ex_7)}") 

Maximum index value: 3
Minimum index value: 1


10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [None]:
torch.manual_seed(7)
random_tensor = torch.rand(1, 1, 1, 10)
random_tensor_squeezed = random_tensor.squeeze()

print(f"Initial tensor: {random_tensor} with the shape: {random_tensor.shape}")
print(f"Final tensor: {random_tensor_squeezed} with the shape: {random_tensor_squeezed.shape}")


Initial tensor: tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]]) with the shape: torch.Size([1, 1, 1, 10])
Final tensor: tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513]) with the shape: torch.Size([10])
