# 00. Pytorch fundamentals

Resource notebook: https://www.learnpytorch.io/00_pytorch_fundamentals/

In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib as plt

## Introduction to tensors

### Creating a tensor

In [3]:
# scalar
scalar = torch.tensor(7)
print(scalar.ndim)
print(scalar)

# Get tensor back as Python int
scalar.item()

0
tensor(7)


7

In [4]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [5]:
# MATRIX
MATRIX = torch.tensor([[7,8],
                       [9,10]])

print(MATRIX.ndim)
print(MATRIX[1])
print(MATRIX.shape)

2
tensor([ 9, 10])
torch.Size([2, 2])


In [6]:
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [9, 8, 7]]])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR[0,1])
print(TENSOR.shape)

tensor([[[1, 2, 3],
         [3, 6, 9],
         [9, 8, 7]]])
3
tensor([3, 6, 9])
torch.Size([1, 3, 3])


In [7]:
# Tensor
TENSOR = torch.tensor([[[7,8], [9,10]],
                      [[11,12], [13,14]]])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR[1,0])
print(TENSOR.shape)

tensor([[[ 7,  8],
         [ 9, 10]],

        [[11, 12],
         [13, 14]]])
3
tensor([11, 12])
torch.Size([2, 2, 2])


In [8]:
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [4,5,6]]])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR[0])
print(TENSOR.shape)

tensor([[[1, 2, 3],
         [4, 5, 6]]])
3
tensor([[1, 2, 3],
        [4, 5, 6]])
torch.Size([1, 2, 3])


Note: There was a lot of tensors, what was because i was practicing

## Random Tensor

Random tensors are important because the way many neuronal networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

`Start with random numebrs -> look at data -> update random numbers -> look at data -> update random numbers `

In [9]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.4593, 0.9813, 0.8836, 0.8110],
        [0.9267, 0.9132, 0.1123, 0.5428],
        [0.9238, 0.0061, 0.3982, 0.8754]])

In [10]:
# Create a tensor of size (3,4,2)
random_tensor = torch.rand(3,4,2)
random_tensor

tensor([[[0.8914, 0.6301],
         [0.1992, 0.0191],
         [0.7170, 0.2740],
         [0.0816, 0.4300]],

        [[0.3057, 0.6948],
         [0.1471, 0.5475],
         [0.8702, 0.9457],
         [0.1831, 0.4940]],

        [[0.9579, 0.6229],
         [0.2303, 0.3480],
         [0.5977, 0.9036],
         [0.5759, 0.0309]]])

In [11]:
# Create a random tensor with similar shape to an image tensor

In [12]:
random_image_size_tensor = torch.rand(size=(224,224, 3))# height, width, colour_channels
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

## Zeros and ones

This could be usefull when you want to make zero a hole row or column

In [13]:
# Create a tensor of all zeros
zeros  = torch.zeros(size=(3,3))
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [14]:
# Create a tensor of all ones
ones = torch.ones(size=(3,3))
ones

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

## Creating a range of tensors and tensors-like

In [15]:
# Use torch arrange()
a = torch.arange(0,10)
b = torch.arange(5, 25, 3)
a, b 

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), tensor([ 5,  8, 11, 14, 17, 20, 23]))

In [16]:
# Creating tensosrs like
c = torch.zeros_like(a)
c

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor datatypes

** Note:** Tensor datatype is one of the 3 big errors you'll run into pytorch and deep learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tenosrs not on right device

In [17]:
# Float 32 tensor

float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype=None, # What datatype is the tensor
                               device=None,# On whtat device are the tensors stored on, in example: cpu and gpu0
                               requires_grad=False #Whether or not to track gradients with this tensors operation
                               )
float_32_tensor, float_32_tensor.dtype

(tensor([3., 6., 9.]), torch.float32)

In [18]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor, float_16_tensor.dtype

(tensor([3., 6., 9.], dtype=torch.float16), torch.float16)

In [19]:
float_16_tensor*float_32_tensor

tensor([ 9., 36., 81.])

In [20]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int_32_tensor, int_32_tensor.dtype

(tensor([3, 6, 9], dtype=torch.int32), torch.int32)

In [21]:
float_32_tensor * int_32_tensor, (float_32_tensor * int_32_tensor).dtype

(tensor([ 9., 36., 81.]), torch.float32)

## Geting information from tensors

1. Tensors not right datatype - to get datatype form a tensor, can use `tensor.dtype`
2. Tensors not right shape - to get shape form a tensor, can use `tensor.shape`
3. Tensors not on right device - to get device type form a tensor, can use `tensor.device`

In [22]:
# Create a tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.8769, 0.5895, 0.7034, 0.0814],
        [0.2112, 0.0195, 0.7019, 0.4544],
        [0.8096, 0.1554, 0.0549, 0.2422]])

In [23]:
# Find out details bout some tensor
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"shape of tensor: {some_tensor.shape}")
print(f"device of tensor: {some_tensor.device}")

tensor([[0.8769, 0.5895, 0.7034, 0.0814],
        [0.2112, 0.0195, 0.7019, 0.4544],
        [0.8096, 0.1554, 0.0549, 0.2422]])
Datatype of tensor: torch.float32
shape of tensor: torch.Size([3, 4])
device of tensor: cpu


## Manipulation Tensors (tensor operations)

Tensor operations include:
- Addition
- Subtraction
- Multiplication (element-wise)
- Division (element-wise)
- Matrix multiplication

In [24]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1, 2, 3])
tensor + 10, tensor * 10, tensor - 10

(tensor([11, 12, 13]), tensor([10, 20, 30]), tensor([-9, -8, -7]))

In [25]:
# Try out PyTorch in-built functions
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [26]:
torch.add(tensor, 10)

tensor([11, 12, 13])

## Matrix multiplcation

Two main ways of performing multiplication in neural networks and deep learning

1. Element-wise multiplication
2. Matrix multiplication (dot product)

There are two main rules that performing matrix multiplication needs to satisfy:
1. The **inner dimensions** must match:
* `(3,2) @ (3,2)` won't work
*  `(2,3) @ (3,2)` will work
* `(3,2) @ (2,3)` will work
2. The resulting matrix has the shape of the **outer dimensions**
* `(2, 3) @ (3, 2)` -> `(2,2)`
* `(3, 2) @ (2, 3)` -> `(3,3)`

In [28]:
torch.matmul(torch.rand(3, 2), torch.rand(2,2))

tensor([[0.4307, 0.6812],
        [0.7560, 0.6691],
        [0.1220, 0.1545]])

In [None]:
# Element wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor*tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [None]:
# Matrix multiplication 
torch.matmul(tensor, tensor)

tensor(14)

## One of the most common error in deep learnign: shape errors

In [None]:
# Shapes for matrix multiplication
tensor_a = torch.tensor([[1,2],
                          [3,4],
						  [5,6]])

tensor_b = torch.tensor([[7,10],
                          [8,11],
                          [9,12]])
torch.mm(tensor_a, tensor_b) # torch.mm is an alias por matmul, they are athe same


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [30]:
tensor_a.shape, tensor_b.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

To fix our tensor shape issues, we can manipulate the shape of one of our tensors using a **transpose**

A **transpose** switches the axesor dimensions of a given tensor

In [32]:
tensor_b, tensor_b.T

(tensor([[ 7, 10],
         [ 8, 11],
         [ 9, 12]]),
 tensor([[ 7,  8,  9],
         [10, 11, 12]]))

In [None]:
# The matrix multiplication operations works when  tensor_b is transposed
torch.mm(tensor_a, tensor_b.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

## Finding the min, max, mean, sum, etc. (tensor aggregation)

In [None]:
# Create a tensor
x = torch.arange(0,100, 10)
print(f"min: {torch.min(x)}, {x.min()}") # Both method do the same
print(f"max: {torch.max(x)}, {x.max()}") # Both method do the same

min: 0, 0
max: 90, 90


In [41]:
print(f"mean: {torch.mean(x)}, {x.mean()}")

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

### If we execute the cell above, we get the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[41], line 1
----> 1 print(f"mean: {torch.mean(x)}, {x.mean()}")

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

To solve this problem, we can cast the tensor into another dtype

In [44]:
print(f"mean: {torch.mean(x.type(torch.float32))}")

mean: 45.0


In [45]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

## Finding the positional min and max

In [None]:
x.argmin(), x.argmax()

(tensor(0), tensor(9))

## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - eturn a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors en top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - adds `1` dimensions to a target tensor
* Permute - return a view of the input with dimensions permuted (swapepd) i na certain way

In [52]:
# Let's create a tensor
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [None]:
# Add an extra dimension
x_reshaped = x.reshape(1, 7) # In this case we would been deleting elements

RuntimeError: shape '[1, 7]' is invalid for input of size 9

In [None]:
x_reshaped = x.reshape(2, 7)# In this case we would been trying to add non-exsiting  elements

RuntimeError: shape '[2, 7]' is invalid for input of size 9

In [55]:
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [56]:
# Change the view
z = x.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

Changing z changes x (becasue a view of atensor shares the same memore as the original tensor)

In [57]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [63]:
# Stack tensos on top of each other
x_stacked = torch.stack([x, x, x, x], dim = 0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [69]:
x_hstacked = torch.hstack([x, x, x, x])
x_hstacked

tensor([5., 2., 3., 4., 5., 6., 7., 8., 9., 5., 2., 3., 4., 5., 6., 7., 8., 9.,
        5., 2., 3., 4., 5., 6., 7., 8., 9., 5., 2., 3., 4., 5., 6., 7., 8., 9.])

In [76]:
# torch.squeeze -removes all single dimensions from a target tensor
print(f"previous tensor: {x_reshaped}")
print(f"previous shape: {x_reshaped.shape}")

# Remove extra dimensions from x_reshaped
x_squeezed  = x_reshaped.squeeze()
print(f"\nNew Tensor: {x_squeezed}")
print(f"New Shape: {x_squeezed.shape}")


previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
previous shape: torch.Size([1, 9])

New Tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New Shape: torch.Size([9])


In [None]:
# torch.unsqueeze - Adds a dimension to a target tensor ata specific dim (dimension)
print(f"Previous target: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

# add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=1)
print(f"\nNew Tensor: {x_unsqueezed}")
print(f"New Shape: {x_unsqueezed.shape}")

Previous target: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New Tensor: tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])
New Shape: torch.Size([9, 1])


torch.permute - Rearranges the dimensions of a tensor ina a specified order

In [79]:
x_original = torch.rand(224, 224, 3) # [height, width , colour chanel]

# Permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0 , 1) # shifts axis 0-> 1, 1->2, 2->0
print(f"previous shape: {x_original.shape}")
print(f"new shape: {x_permuted.shape}")

previous shape: torch.Size([224, 224, 3])
new shape: torch.Size([3, 224, 224])


In [81]:
x_original[0,0,0] = 123
x_permuted[0,0,0] 

tensor(123.)

In [85]:
a = torch.rand(4, 2, 3)
b = a.permute(2, 0, 1)
a, b

(tensor([[[0.5370, 0.4565, 0.7345],
          [0.2075, 0.7273, 0.7944]],
 
         [[0.5298, 0.6381, 0.0391],
          [0.1942, 0.0707, 0.6598]],
 
         [[0.9838, 0.5558, 0.8656],
          [0.1831, 0.3513, 0.9607]],
 
         [[0.8039, 0.6093, 0.4027],
          [0.8302, 0.4658, 0.3394]]]),
 tensor([[[0.5370, 0.2075],
          [0.5298, 0.1942],
          [0.9838, 0.1831],
          [0.8039, 0.8302]],
 
         [[0.4565, 0.7273],
          [0.6381, 0.0707],
          [0.5558, 0.3513],
          [0.6093, 0.4658]],
 
         [[0.7345, 0.7944],
          [0.0391, 0.6598],
          [0.8656, 0.9607],
          [0.4027, 0.3394]]]))

## Indexing (Selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy

In [87]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [88]:
# Let's index on our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [90]:
# Let's index on the middle bracket
x[0,0]

tensor([1, 2, 3])

In [97]:
# Let's index on the most inner bracket 
x[0,0,0], x[0,1,:], x[0, :, 2]

(tensor(1), tensor([4, 5, 6]), tensor([3, 6, 9]))

## PyTorch tensors and NumPy

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [100]:
array = np.arange(1., 8.)
tensor = torch.from_numpy(array)
print(array.dtype, tensor.dtype)
array, tensor

float64 torch.float64


(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

**Warning:** The default dtype of np is float64 and the default dtype of pytorch is float32, so have this in mind when converting np to tensor

In [102]:
# Tensor to np array
tensor = torch.ones(7)
np_tensor = tensor.numpy()
tensor, np_tensor, np_tensor.dtype

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32),
 dtype('float32'))

## Reproducibility (trying to take random out of random)

In short how a neuronal network learns:

`start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the data -> again -> again -> again ...`

To reduce the randomness in neural networks and pytorch comes the concept of a **random seed**.

Essentially what the random seed does is "flavour" the randomness.

In [107]:
torch.rand(3,3)

random_tensor_a = torch.rand(3,4)
random_tensor_b = torch.rand(3,4)

print(random_tensor_a)
print(random_tensor_b)
print(random_tensor_a == random_tensor_b)

tensor([[0.8989, 0.2993, 0.1270, 0.8594],
        [0.6112, 0.8200, 0.8346, 0.1894],
        [0.2706, 0.4900, 0.8649, 0.7498]])
tensor([[0.1280, 0.0960, 0.5219, 0.9250],
        [0.6542, 0.6569, 0.2445, 0.1107],
        [0.9174, 0.5613, 0.4935, 0.5492]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


### Let's make some random but reproducible tensors

In [None]:
# set radom seed
torch.manual_seed(42)

random_tensor_c = torch.rand(3,4)
random_tensor_d = torch.rand(3,4)

print(random_tensor_c)
print(random_tensor_d)
print(random_tensor_c == random_tensor_d)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [113]:
# set radom seed
torch.manual_seed(42)
random_tensor_c = torch.rand(3,4)

torch.manual_seed(42)
random_tensor_d = torch.rand(3,4)

print(random_tensor_c)
print(random_tensor_d)
print(random_tensor_c == random_tensor_d)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Runnig tensors and PyTorch objects on the GPUs (and making faster computations)

In [None]:
!nvidia-smi

Thu Feb 13 15:44:42 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.86.16              Driver Version: 570.86.16      CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1650 Ti     Off |   00000000:01:00.0  On |                  N/A |
| N/A   56C    P8              5W /   50W |      36MiB /   4096MiB |     17%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

### 1. Putting tensors (and models) on the GPU

the reason we want our tensors/models on the GPU is because using a GU results in faster computations.

In [9]:
import torch
import numpy as np
# Create a tensor (default on the CPU) 
tensor = torch.tensor([1,2,3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor GPU (if available)
tensor_on_gpu = tensor.to("cuda")
tensor_on_gpu


tensor([1, 2, 3]) cpu


tensor([1, 2, 3], device='cuda:0')

#### 2. Moving tensors back to the CPU

NumPy only works on the CPU

In [10]:
# If tensor is on GPU, can't transform it to numpy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

To fix the GPU tensor with NumPy issue, we can firts set it to the CPU

In [11]:
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])