## 00. Pytorch Fundamentals

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.8.0+cu126


### Introduction to Tensors

In [None]:
# Scalar
scalar = torch.tensor(4)
scalar

tensor(4)

In [None]:
scalar.ndim

0

In [None]:
scalar.item()

4

In [None]:
# Vector
vector = torch.tensor([1,2])
vector

tensor([1, 2])

In [None]:
vector.ndim

1

In [None]:
vector.shape

torch.Size([2])

In [None]:
# MATRIX
MATRIX = torch.tensor([[1,2],[3,4]])
MATRIX

tensor([[1, 2],
        [3, 4]])

In [None]:
MATRIX.ndim

2

In [None]:
MATRIX.shape

torch.Size([2, 2])

In [None]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

### Random Tensors

start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers

In [None]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.4010, 0.3807, 0.8689, 0.7724],
        [0.4458, 0.4821, 0.4042, 0.2046],
        [0.6702, 0.0251, 0.7102, 0.0139]])

In [None]:
# Random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(3, 224, 224))
random_image_size_tensor.shape, random_image_size_tensor.ndim


(torch.Size([3, 224, 224]), 3)

### Zeroes and ones

In [None]:
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
ones = torch.ones(size=(3,4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

### Creating a range of tensors and tensors-like

In [None]:
one_to_ten = torch.arange(start=0, end=10, step=1)
one_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
# Creating tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

In [None]:
float_32_tensor = torch.tensor([3.0 ,6.0, 9.0],
                               dtype=None, # datatype of tensor
                               device=None,
                               requires_grad=False)
float_32_tensor

tensor([3., 6., 9.])

In [None]:
float_32_tensor.dtype

torch.float32

In [None]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [None]:
### Getting information from a tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.3179, 0.9907, 0.5894, 0.3349],
        [0.9543, 0.0502, 0.6032, 0.5362],
        [0.4164, 0.6872, 0.1069, 0.0881]])

In [None]:
some_tensor.dtype, some_tensor.shape, some_tensor.device

(torch.float32, torch.Size([3, 4]), device(type='cpu'))

### Maniputlating Tensors
* Addition
* Subtraction
* Multiplication
* Division
* Matrix Multiplication

In [None]:
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [None]:
tensor*10

tensor([10, 20, 30])

In [None]:
tensor - 10

tensor([-9, -8, -7])

In [None]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [None]:
torch.add(tensor, 10)

tensor([11, 12, 13])

### Matrix Multiplication
1. Element-wise multiplication
2. Matrix Multiplication(dot product)

There are 2 main rules that performing matrix multiplication needs to satisfy:
1. The inner dimensions should match:
* (3,2) @ (3,2) won't work
* (2,3) @ (3,2) will work
* (3,2) @ (2,3) will work

2. The resulting matrix has dimension of outer dimensions:
* (2,3) @ (3,2) -> (2,2)

In [None]:
# 1. Element-wise Multiplication
tensor * tensor

tensor([1, 4, 9])

In [None]:
# Matrix Multiplication(dot product)
torch.matmul(tensor, tensor)

tensor(14)

In [None]:
torch.matmul(torch.rand(3,2), torch.rand(2,3))

tensor([[0.0920, 0.9617, 0.3252],
        [0.0723, 0.7013, 0.2141],
        [0.0804, 0.5918, 0.0956]])

* Transpose of a tensor:
  tensor_B.T

In [None]:
tensor_B = torch.rand(2,3)
torch.matmul(torch.rand(2,3), tensor_B.T)

tensor([[0.6203, 0.7636],
        [0.6478, 0.7624]])

## Finding min,max,sum,etc.

In [None]:
x = torch.arange(1, 100, 10)
x

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [None]:
torch.min(x), x.min()

(tensor(1), tensor(1))

In [None]:
torch.max(x), x.max()

(tensor(91), tensor(91))

In [None]:
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(46.), tensor(46.))

In [None]:
torch.sum(x), x.sum()

(tensor(460), tensor(460))

## Finding positional min and max

In [None]:
x

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [None]:
x.argmin()  # position with min value

tensor(0)

In [None]:
x[0]

tensor(1)

In [None]:
x.argmax() # position with max value

tensor(9)

## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [None]:
# Create tensor
import torch
x = torch.arange(1., 11.)
x, x.shape

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), torch.Size([10]))

In [None]:
# Add an extra dimension
x_reshaped = x.reshape(1, 10)
x_reshaped, x_reshaped.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [None]:
# Change the view
z = x.view(1, 10)
z, z.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [None]:
# Changing z changes x (because a view of a tensor shares the same memory as the original input)
z[:,0] = 5
z, x

(tensor([[ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 tensor([ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]))

In [None]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]])

In [None]:
# Squeezing
x_reshaped, x_reshaped.shape

(tensor([[ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [None]:
x_squeezed = x_reshaped.squeeze()

In [None]:
x_reshaped.squeeze().shape

torch.Size([10])

In [None]:
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
x_unsqueezed, x_unsqueezed.shape

(tensor([[ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [None]:
# torch.permute -  Rearranges the dimensions of a target in a specified order
x_original = torch.rand(size=(224, 224, 3))

# Permute original tensor to a different order
x_permuted = x_original.permute(2, 0, 1)
x_original.shape, x_permuted.shape

(torch.Size([224, 224, 3]), torch.Size([3, 224, 224]))

### Indexing (selecting data from tensors)
Indexing with PyTorch is similar to indexing with Numpy

In [None]:
# Create a tensor
import torch
x = torch.arange(1,10).reshape(1,3,3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [None]:
x[0][0][0]

tensor(1)

In [None]:
# to select "all" of a target dimension
x[:, 0]

tensor([[1, 2, 3]])

In [None]:
# Get all values of 0th and 1st dimensions but only index 1 of 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

## PyTorch tensors and Numpy
Pytorch has functionality to interact with numpy.
* Data in numpy, want in Pytorch tensor -> `torch.from_numpy(ndarray)`
* Pytorch tensor -> Numpy -> `torch.Tensor.numpy()`

In [None]:
# Numpy array to tensor
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # when converting from numpy to pytorch, pytorch reflects numpy default datatype unless changed
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
# Tensor to Numpy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [None]:
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducability (trying to take random out of random)
How a neural network works:
`start with random numbers -> tensor operations -> update random numbers to try and make them of the data -> again -> again...`


* To reduce the randomness in nn and Pytorch comes the concept of **random seed**
* Essentially what the random seed does is "flavour" the randomness

In [None]:
import torch

random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.1663, 0.3108, 0.8666, 0.7122],
        [0.4156, 0.0564, 0.3395, 0.7203],
        [0.2750, 0.4551, 0.2330, 0.4215]])
tensor([[0.2697, 0.8935, 0.5037, 0.0491],
        [0.7546, 0.6460, 0.0287, 0.0481],
        [0.9989, 0.3679, 0.0758, 0.5615]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [None]:
# Let's make some random but reproducible tensors
import torch

RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3,4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on the GPUs (and making faster computations)

GPUs = faster computations on numbers

### 1. Getting a GPU

1. Easiest - use Google Colab
2. Use your own GPU
3. Use Cloud Computing

For 2,3 PyTorch + GPU drivers (CUDA) takes setting up

In [None]:
!nvidia-smi

Sun Oct 19 10:30:43 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   62C    P8             13W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

### 2. Check for GPU access with PyTorch

In [None]:
import torch
torch.cuda.is_available()

True

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# Count number of devices
torch.cuda.device_count()

1

### 3. Putting tensors (and models) on the GPU

The reason we want our tensors/models on the GPU is because using a GPU results in faster computations

In [None]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1,2,3])

# Tensor not on GPU
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [None]:
# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

### 4. Moving tensors back to the CPU

In [None]:
# If tensor is on GPU, can't transform it to Numpy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [None]:
# To fix this,we can first set it to CPU

tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])