In [None]:
!nvidia-smi

Sun Jul  6 10:00:52 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   50C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## 00. Pytorch Fundamentals

Resource Notebook: https://www.learnpytorch.io/00_pytorch_fundamentals/

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)
print(pd.__version__)

2.6.0+cu124
2.2.2


## Introduction to Tensors

### Creating Tensors

In [None]:
# scalar

scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
scalar.ndim

0

In [None]:
scalar.item()

7

In [None]:
vector = torch.tensor([7, 1])

In [None]:
vector.ndim

1

In [None]:
vector.shape

torch.Size([2])

In [None]:
# MATRIX
MATRIX = torch.tensor([[7, 8],[9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [None]:
MATRIX.ndim

2

In [None]:
MATRIX[0]

tensor([7, 8])

In [None]:
MATRIX.shape

torch.Size([2, 2])

In [None]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])

TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [None]:
TENSOR.ndim

3

In [None]:
TENSOR.shape  # "1 3x3 Tensor"

torch.Size([1, 3, 3])

In [None]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

### Random tensors

Why random tensors?

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

`Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers`


Torch random tensors - https://docs.pytorch.org/docs/stable/generated/torch.rand.html


In [None]:
# Create a random tensor of size (3, 4)

random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.3004, 0.2950, 0.4955, 0.3066],
        [0.4867, 0.6855, 0.2284, 0.8802],
        [0.2658, 0.5789, 0.5020, 0.3160]])

In [None]:
random_tensor.ndim

2

In [None]:
# Create a random tensor with similar shape to an image tensor

random_image_size_tensor = torch.rand(size=(224, 224, 4)) # height, width, color channels (R, G, B)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 4]), 3)

### Zeros and ones

In [None]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
zeros*random_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
ones.dtype

torch.float32

### Create a range of tensors and tensors-like

In [None]:
# use torch.range() - deprecated, instead use torch.arange()

one_to_ten = torch.arange(start=1, end=11, step=1)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
# creating tensors like -> like method creates a tensor with same shape & ndim
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensory datatypes

**Note:** Tensor datatypes is one of the 3 big issues/errors with PyTorch & deep learning

1. Tensors not right datatype
2. Tensors not the right shape
3. Tensors not on the right device

In [None]:
# float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float32, # what datatype is the tensor (i.e. float32)
            device="cuda", # CPU by default - what deivce is your tensor on
            requires_grad=False # whether or not to track gradients with this tensors operations
                               )
float_32_tensor

tensor([3., 6., 9.], device='cuda:0')

In [None]:
float_16_tensor = float_16_tensor=float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], device='cuda:0', dtype=torch.float16)

In [None]:
float_16_tensor * float_32_tensor # damn

tensor([ 9., 36., 81.], device='cuda:0')

In [None]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [None]:
long_tensor = torch.tensor([3, 6, 9], dtype=torch.long)
long_tensor

tensor([3, 6, 9])

### Getting information from tensors (tensor attributes)

1. Tensors not right datatype - to get datatype from tensor, can use `tensor.dtype`
2. Tensors not the right shape - to get shape from a tensor, can use `tensor.shape`
3. Tensors not on the right device - to get device from a tensor, can use `tensor.device`

In [None]:
# Create a tensor
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.1801, 0.7188, 0.9869, 0.9138],
        [0.8422, 0.9264, 0.2782, 0.0510],
        [0.0819, 0.2607, 0.0165, 0.8365]])

In [None]:
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Size of tensor: {some_tensor.size()}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.1801, 0.7188, 0.9869, 0.9138],
        [0.8422, 0.9264, 0.2782, 0.0510],
        [0.0819, 0.2607, 0.0165, 0.8365]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Size of tensor: torch.Size([3, 4])
Device tensor is on: cpu


In [None]:
# Get the number of available GPU devices
num_gpus = torch.cuda.device_count()
print(f"Number of available GPUs: {num_gpus}")

# Get the name of each GPU device
for i in range(num_gpus):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

Number of available GPUs: 1
GPU 0: Tesla T4


### Manipulating Tensors (tensor operations)

Tensor operations include:

* Addition
* Subtraction
* Multiplication (element-wise)
* division
* Matrix multiplication

In [None]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1, 2, 3], dtype=torch.float32,device='cuda')
tensor = tensor + 10
tensor

tensor([11., 12., 13.], device='cuda:0')

In [None]:
# Multiply tensor by 10
tensor = tensor * 10
tensor

tensor([110., 120., 130.], device='cuda:0')

In [None]:
tensor = tensor - 10
tensor

tensor([100., 110., 120.], device='cuda:0')

In [None]:
# Tryout Pytorch in-built functinos
torch.mul(tensor, 10) # simply multiplies every element in the tensor by a constant

tensor([1000., 1100., 1200.], device='cuda:0')

In [None]:
torch.add(tensor, 9)

tensor([109., 119., 129.], device='cuda:0')

### Matrix Multiplication

Two main ways of performing multiplication in neural networks and deep learning:

1. Element-wise multiplication
2. Matrix multiplication (dot product)

`@` stands for matmul

In [None]:
# Element wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")


tensor([100., 110., 120.], device='cuda:0') * tensor([100., 110., 120.], device='cuda:0')
Equals: tensor([10000., 12100., 14400.], device='cuda:0')


In [None]:
# Matrix multiplication
%%time
torch.matmul(tensor, tensor)

CPU times: user 24.5 ms, sys: 13.7 ms, total: 38.2 ms
Wall time: 127 ms


tensor(36500., device='cuda:0')

In [None]:
%%time
# Matrix multiplication by hand
matmul_ = 0
for i in range(len(tensor)):
  matmul_ += tensor[i]*tensor[i]

print(matmul_)
# Damn shii is slow as hell boi

tensor(36500., device='cuda:0')
CPU times: user 2.09 ms, sys: 0 ns, total: 2.09 ms
Wall time: 4.91 ms


In [None]:
tensor @ tensor

tensor(36500., device='cuda:0')

### One of the most common errors in deep learning: shape errors

In [None]:
# Shapes for matrix multiplications
tensor_A = torch.tensor([[1, 2],
                        [3,4],
                        [5,6]])

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])

# torch.mm(tensor_A, tensor_B) # torch.mm == to torch.matmul

To fix our tensor shape issues, we can manipulate the shape of one of out tensors using a torch.transpose

A **transpose** switches the axes or dimensions of a given tensor

In [None]:
tensor_B.T

tensor([[ 7,  8,  9],
        [10, 11, 12]])

In [None]:
# marix multiplication operations works when tensor B is tranposed

out = torch.mm(tensor_A, tensor_B.T)
print(out.shape)
print(out)

torch.Size([3, 3])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


## Finding the min, max, sum, etc (tensor aggregation)

In [None]:
# createa a tensor
x = torch.arange(0, 100, 10)
x, x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [None]:
# Find the min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [None]:
# Find the max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [None]:
# Find the mean - note the torch.mean() functions requires a tensor of float32 datatype or smaller to work (no long)
print(torch.mean(x, dtype=torch.float32))
x = x.type(torch.float32)
torch.mean(x)
x.mean()

tensor(45.)


tensor(45.)

In [None]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450.), tensor(450.))

### Finding the positional min and max

In [None]:
# find the position in the tensor that has the minimum value with argim -> returns index of occurence
x.argmin()

tensor(0)

In [None]:
# similarly for max
x.argmax()

tensor(9)

In [None]:
x[torch.argmax(x)]

tensor(90.)

### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as teh original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack), or defined dimension
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [None]:
# create a tensor

In [None]:
import torch # just in case we restart from here somehow (idek)
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [None]:
# reshape, note the total number of elements must be met, i.e. we went from 1x12, to 3x4, which are both 12
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Change the view
z = x.view(9, 1)
z, z.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [None]:
# Changing z changes x (because a view of a tensor shares the same memory as the original input)
z[0, :] = 5
z, x

(tensor([[5.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [None]:
# Stack tensors on top each other
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
y = torch.randn(2, 3)
print(f"y: {y}")
y_concat = torch.cat((y, y), 1)
y_concat

y: tensor([[ 1.2372, -0.3872, -1.0781],
        [-0.0111,  1.2093,  0.5314]])


tensor([[ 1.2372, -0.3872, -1.0781,  1.2372, -0.3872, -1.0781],
        [-0.0111,  1.2093,  0.5314, -0.0111,  1.2093,  0.5314]])

In [None]:
# Squeeze
x_reshaped, x_reshaped.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
print(f"reshaped: {x_reshaped.squeeze()}") # removes all single dimensions -> basically remove extra dimensions from x_reshaped
x_reshaped.squeeze().shape

reshaped: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])


torch.Size([9])

In [None]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim
x_unsqueezed = x_reshaped.unsqueeze(2)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")


New tensor: tensor([[[5.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]])
New shape: torch.Size([1, 9, 1])


In [None]:
# torch.permute - rearranges the dimensions of a target tensor in a specified order (useful in images)
x_original = torch.rand(size=(224, 224, 3)) # [heigh, width, colour_channels]
print(f"x_original: {x_original.shape}")
x_original[0, 0, 0] = 7558

# Permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0
print(f"x_permuted: {x_permuted.shape}")

x_original: torch.Size([224, 224, 3])
x_permuted: torch.Size([3, 224, 224])


In [None]:
print(x_original[0, 0, 1])
print(x_permuted[0, 0, 1])

tensor(0.3830)
tensor(0.6452)


### Indexing
Indexing with PyTorch is similar to indexing with NumPy

In [None]:
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
print(f"x: {x}", f"\n {x.shape}")

x: tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]]) 
 torch.Size([1, 3, 3])


In [None]:
# Let's index with our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
# Let's index on the middle bracker
x[0][0]

tensor([1, 2, 3])

In [None]:
x[0][0][0]

tensor(1)

In [None]:
# better/easier to type syntax:
x[0, 1, 2]

tensor(6)

In [None]:
# you can as well use ":" to select "all" of a target dimension
x[0, :, 0]

tensor([1, 4, 7])

In [None]:
x[:, :, 1]

tensor([[2, 5, 8]])

In [None]:
x[0, :, 0:2]

tensor([[1, 2],
        [4, 5],
        [7, 8]])

In [None]:
x[0,2,2]

tensor(9)

In [None]:
x[0, :, 2]

tensor([3, 6, 9])

### PyTorch tensors & NumPy

Numpy is a populat scientific Python numerical computing library.

And because of this, PyTorch has functionality to interact with it.

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [None]:
# NumPy array to tensor
import torch
import numpy as np

array = np.arange(1. , 8.)
tensor = torch.from_numpy(array)
array, tensor
# numpy default data type is float64 (long)

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
tensor = tensor.type(dtype=torch.float32)
tensor

tensor([1., 2., 3., 4., 5., 6., 7.])

In [None]:
array = np.arange(1. , 8.)
tensor = torch.from_numpy(array).type(torch.float32)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [None]:
# Change the value of array, what will this do to `tensor`?
array = array + 1
array, tensor
# as we can see, we do not have any shared memory between the original array and the tensor

(array([3., 4., 5., 6., 7., 8., 9.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [None]:
# Tensor to NumPy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, tensor.dtype, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 torch.float32,
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [None]:
# Change the tensor - what happens to numpy tensor?
tensor = tensor + 1
tensor, numpy_tensor # same thing, no shared memory

(tensor([3., 3., 3., 3., 3., 3., 3.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility (trying to take the random out of random)

In short, how a neural network learns:

`start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the data -> again -> again -> again`

To reduce the "randomness" in neural networks and PyTorch, comes the concept of a **random seed**.

A random seed, "flavour" the randomness.

In [None]:
# create two random tensors

random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"tensor A:\n {random_tensor_A}")
print(f"tensor B:\n {random_tensor_B}")
print(random_tensor_A == random_tensor_B)

tensor A:
 tensor([[0.3186, 0.8523, 0.8267, 0.0425],
        [0.0770, 0.3695, 0.2232, 0.0668],
        [0.2244, 0.9182, 0.8839, 0.1728]])
tensor B:
 tensor([[0.4244, 0.8516, 0.6999, 0.5567],
        [0.3712, 0.3395, 0.8094, 0.5438],
        [0.9313, 0.4030, 0.9663, 0.8054]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [None]:
# Let's make some random but reproducable tensors

# set the random seed
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)

print(random_tensor_C == random_tensor_D)

# Note that the manual seed function only applies for the next call of random

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on the GPUs (for faster computation)

GPUs = faster computation on numbers, thanks to CUDA + NVIDIA hardware + PyTorch working behind the scenes to make everything good (hunky dory).

### 1. Getting a GPU

1. Use Google Colab for a free GPU - Easiest
2. Use your own GPU - takes some setup.
3. Use cloud computing - GCP, AWS (AHHHH), Azure

https://pytorch.org/get-started/locally/

In [None]:
torch.cuda.is_available()

True

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
torch.cuda.device_count()

1

For PyTorch sicne it's capable of running compute on the GPU or CPU, it's best practice to setup device agnostic code: https://docs.pytorch.org/docs/stable/notes/cuda.html

E.g. rjn on GPU if available, else default to CPU.

### 3. Putting tensors (and models) on the GPU

The reason we want our tensors/models on the GPU is because using GPU results in faster computations.

In [None]:
# Create a tensor

tensor = torch.tensor([1,2,3])

print(tensor.device)

# Shift the tensor to the target device
tensor = tensor.to(device)
tensor

cpu


tensor([1, 2, 3], device='cuda:0')

In [None]:
# If tensor in on GPU, can't transofrm it to NumPy
tensor.cpu().numpy() # we use .cpu() to bring the tensor to host memory first

array([1, 2, 3])

Note we can aswell use `cupy`, which can operate on GPU memory natively

```python
import cupy as cp
```

## Exercises and Extra-curriculum