<a href="https://colab.research.google.com/github/Maniacravi/pytorch-deep-learning/blob/main/00_pytorch_fundamentals_mani.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch Fundamentals

My code along for the first section

## Import statements


In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.6.0+cu124


## Tensors

### Intro

#### Scalars

In [2]:
scalar = torch.tensor(9)
scalar

tensor(9)

In [3]:
scalar.ndim

0

In [4]:
# Get tensor back as Python int
scalar.item()

9

In [5]:
scalar.shape

torch.Size([])

#### Vectors

In [6]:
vector = torch.tensor([9, 9, 9])
vector

tensor([9, 9, 9])

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([3])

#### MATRIX

In [9]:
MATRIX = torch.tensor([[9, 9],
                       [10, 11]])
MATRIX

tensor([[ 9,  9],
        [10, 11]])

In [10]:
MATRIX.ndim

2

In [11]:
MATRIX.shape

torch.Size([2, 2])

In [12]:
# slicing

MATRIX[0]

tensor([9, 9])

In [13]:
MATRIX[1, 0]

tensor(10)

#### TENSOR

In [14]:
TENSOR = torch.tensor([[[1, 2, 3],
                        [4, 5, 6],
                        [7, 8, 9]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [15]:
TENSOR.ndim

3

In [16]:
TENSOR.shape

torch.Size([1, 3, 3])

In [17]:
TENSOR[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

### Creating Tensors

#### Random Tensors

Creating random tensors because they are the most usual way to create tensors in code

In [18]:
# Create a random tensor

random_tensor = torch.rand(3, 4)
random_tensor


tensor([[0.7077, 0.7781, 0.1902, 0.4257],
        [0.7641, 0.5849, 0.4739, 0.7800],
        [0.1288, 0.7124, 0.7527, 0.1230]])

In [19]:
random_tensor.shape

torch.Size([3, 4])

In [20]:
random_tensor.ndim

2

In [21]:
# Create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(3, 224, 224))
random_image_size_tensor, random_image_size_tensor.ndim, random_image_size_tensor.shape

(tensor([[[7.6697e-01, 4.9085e-01, 1.3167e-01,  ..., 3.8243e-01,
           7.3360e-01, 8.9668e-01],
          [7.2546e-02, 2.3710e-01, 6.8070e-02,  ..., 4.9063e-01,
           2.9886e-02, 4.8407e-01],
          [3.9080e-01, 2.8397e-01, 3.1618e-01,  ..., 1.7805e-01,
           1.7132e-01, 7.9116e-01],
          ...,
          [8.4896e-01, 5.4430e-01, 9.7353e-01,  ..., 5.2671e-01,
           3.0679e-01, 6.9587e-01],
          [2.3110e-01, 4.7140e-01, 7.4766e-01,  ..., 5.7430e-01,
           4.0462e-01, 7.4210e-01],
          [2.7185e-01, 5.1077e-01, 3.2483e-01,  ..., 3.4720e-01,
           1.9655e-01, 5.9354e-01]],
 
         [[5.7052e-01, 2.1252e-01, 8.9314e-01,  ..., 8.7786e-04,
           5.5438e-01, 7.1175e-01],
          [8.9351e-02, 9.8299e-01, 1.7210e-01,  ..., 8.5291e-01,
           6.3982e-01, 1.6953e-01],
          [3.5551e-01, 8.6629e-02, 1.2510e-03,  ..., 4.5128e-01,
           9.1655e-01, 4.4797e-01],
          ...,
          [1.6090e-02, 3.3731e-02, 8.4775e-01,  ..., 5.260

In [22]:
# Create a random tensor again
random_tensor_2 = torch.rand(size=(3, 1920, 1080))
random_tensor_2.shape

torch.Size([3, 1920, 1080])

#### Zeros and Ones

In [23]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [24]:
# All ones
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [25]:
ones.dtype # Explore the datatype of the tensor

torch.float32

#### Creating a Range of tensors and tensors-like

In [26]:
# torch.range
torch.range(0,10) # Deprecated

  torch.range(0,10) # Deprecated


tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [27]:
one_to_ten = torch.arange(1, 11)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [28]:
# also can use step
torch.arange(start=0, end=1000, step=77)

tensor([  0,  77, 154, 231, 308, 385, 462, 539, 616, 693, 770, 847, 924])

In [29]:
# Create tensors like
ten_zeros = torch.zeros_like(input = one_to_ten) # Returns a tensor filled with the scalar value 0, with the same size as input
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

In [30]:
# Float_32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None) # What datatype is the tensor


In [31]:
float_32_tensor.dtype

torch.float32

Even when dtype is specified as None - still produces a float32 tensor

In [32]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # What datatype is the tensor
                               device=None, # What device is your tensor on ('cpu', 'gpu' etc)
                               requires_grad=False)
float_32_tensor

tensor([3., 6., 9.])

** Note ** : Learn about tensor datatypes and their differences. May run into errors with this often

In [33]:
float_16_tensor = float_32_tensor.type(torch.float16) # Crearting a float16 tensor from the float32 tensor
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [34]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

This works surprisignly but beware of mixing multiple types of tensors. Sometimes will raise errors

In [35]:
int_32_tensor = float_32_tensor.type(dtype = torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [36]:
int_32_tensor * float_32_tensor

tensor([ 9., 36., 81.])

Still works

#### Getting info from Tensors

In [37]:
# Get dtype
int_32_tensor.dtype

torch.int32

In [38]:
# Get shape
float_32_tensor.shape

torch.Size([3])

In [39]:
# Get device
float_16_tensor.device

device(type='cpu')

In [40]:
# Get ndim
int_32_tensor.ndim

1

#### Manipulating Tensors (tensor operations)

Includes:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Mat Mult

In [41]:
# Addition
tensor = torch.tensor([1, 2, 3])
tensor + 10


tensor([11, 12, 13])

In [42]:
# Add two tensors
int_32_tensor + float_32_tensor

tensor([ 6., 12., 18.])

In [43]:
# Multiply tensor by 10
tensor * 10

tensor([10, 20, 30])

In [44]:
# Subtract by 10
tensor - 10

tensor([-9, -8, -7])

In [45]:
# Torch inbuilt functions
torch.mul(tensor, 10), torch.add(tensor, 10), torch.sub(tensor, 10)

(tensor([10, 20, 30]), tensor([11, 12, 13]), tensor([-9, -8, -7]))

#### Matrix multiplication

In [46]:
# Mat mult
torch.matmul(float_32_tensor, tensor.type(torch.float32))

tensor(42.)

In [47]:
# Another one
torch.matmul(torch.rand(size=(4,4)), torch.rand(size=(4,1)))

tensor([[1.1382],
        [0.8275],
        [0.6420],
        [1.0834]])

In [48]:
# Do the matmul by forloop and see how long the time difference is
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value


CPU times: user 512 µs, sys: 77 µs, total: 589 µs
Wall time: 3.79 ms


tensor(14)

In [49]:
# same using matmul
%%time
torch.matmul(tensor, tensor) # Vectorized version - therefore much faster


CPU times: user 71 µs, sys: 11 µs, total: 82 µs
Wall time: 87.3 µs


tensor(14)

In [50]:
# One of the common errors is a size mismacth error

tensor_A = torch.rand(size=(5,4))
torch.mm(tensor_A, tensor_A) # torch.mm is an alias for torch.matmul

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x4 and 5x4)

In [51]:
# Transpose - we need to transpose the matrix for this to work
torch.mm(tensor_A, tensor_A.T)

tensor([[1.1420, 1.2759, 1.2612, 0.9708, 0.6273],
        [1.2759, 2.1022, 1.5108, 1.6761, 1.4791],
        [1.2612, 1.5108, 1.4957, 1.0834, 0.9736],
        [0.9708, 1.6761, 1.0834, 1.6663, 1.1759],
        [0.6273, 1.4791, 0.9736, 1.1759, 1.5908]])

In [52]:
tensor_B = torch.rand(size=(5, 4))
tensor_B

tensor([[0.8837, 0.3086, 0.3947, 0.5855],
        [0.2076, 0.0512, 0.3011, 0.9263],
        [0.8458, 0.6507, 0.3776, 0.8853],
        [0.7118, 0.2840, 0.0764, 0.6923],
        [0.1752, 0.4710, 0.0115, 0.6621]])

In [53]:
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([5, 4]), tensor_B = torch.Size([5, 4])

New shapes: tensor_A = torch.Size([5, 4]) (same as above), tensor_B.T = torch.Size([4, 5])

Multiplying: torch.Size([5, 4]) * torch.Size([4, 5]) <- inner dimensions match

Output:

tensor([[1.2254, 0.7172, 1.3659, 1.0282, 0.5283],
        [1.5207, 1.0022, 2.0195, 1.4161, 1.0871],
        [1.4124, 1.0084, 1.6585, 1.1968, 0.7303],
        [1.1947, 0.4694, 1.5637, 0.9932, 0.7436],
        [0.9375, 0.9091, 1.4757, 0.8713, 0.9645]])

Output shape: torch.Size([5, 5])


#### Aggregation (min, max, mean, sum)

In [54]:
# Create a tensor
x = torch.arange(-50, 64, step=0.1)
x.shape

torch.Size([1140])

In [55]:
# min
torch.min(x), x.min() # Same

(tensor(-50.), tensor(-50.))

In [56]:
# max
x.max()

tensor(63.9000)

In [57]:
# mean
x.mean()

tensor(6.9500)

In [58]:
# median
x.median()

tensor(6.9000)

In [59]:
# sum
torch.sum(x)

tensor(7923.)

In [60]:
# argmax and argmin
print(f'argmax: {x.argmax()} | x[argmax]: {x[x.argmax()]}')
print(f'argmin: {x.argmin()} | x[argmin]: {x[x.argmin()]}')

argmax: 1139 | x[argmax]: 63.900001525878906
argmin: 0 | x[argmin]: -50.0


#### Reshaping, stacking, squeezing and unsqueezing

Often times you'll want to reshape or change the dimensions of your tensors without actually changing the values inside them.

To do so, some popular methods are:

| Method | One-line description |
| ----- | ----- |
| [`torch.reshape(input, shape)`](https://pytorch.org/docs/stable/generated/torch.reshape.html#torch.reshape) | Reshapes `input` to `shape` (if compatible), can also use `torch.Tensor.reshape()`. |
| [`Tensor.view(shape)`](https://pytorch.org/docs/stable/generated/torch.Tensor.view.html) | Returns a view of the original tensor in a different `shape` but shares the same data as the original tensor. |
| [`torch.stack(tensors, dim=0)`](https://pytorch.org/docs/1.9.1/generated/torch.stack.html) | Concatenates a sequence of `tensors` along a new dimension (`dim`), all `tensors` must be same size. |
| [`torch.squeeze(input)`](https://pytorch.org/docs/stable/generated/torch.squeeze.html) | Squeezes `input` to remove all the dimenions with value `1`. |
| [`torch.unsqueeze(input, dim)`](https://pytorch.org/docs/1.9.1/generated/torch.unsqueeze.html) | Returns `input` with a dimension value of `1` added at `dim`. |
| [`torch.permute(input, dims)`](https://pytorch.org/docs/stable/generated/torch.permute.html) | Returns a *view* of the original `input` with its dimensions permuted (rearranged) to `dims`. |

Why do any of these?

Because deep learning models (neural networks) are all about manipulating tensors in some way. And because of the rules of matrix multiplication, if you've got shape mismatches, you'll run into errors. These methods help you make sure the right elements of your tensors are mixing with the right elements of other tensors.

Let's try them out.

In [61]:
# Create a tensor
x = torch.arange(1., 8.)
x, x.shape, x.ndim

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]), 1)

In [62]:
# Reshape
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape, x_reshaped.ndim

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]), 2)

In [63]:
# View
z = x.view(7,1)
z

tensor([[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.]])

In [64]:
z[4,0] = 10
z, x

(tensor([[ 1.],
         [ 2.],
         [ 3.],
         [ 4.],
         [10.],
         [ 6.],
         [ 7.]]),
 tensor([ 1.,  2.,  3.,  4., 10.,  6.,  7.]))

This changed both z and x since z is a view of x (shares memory)

In [65]:
# Stack

x_vstacked = torch.stack([x, x, x, x], dim=0)
x_vstacked

tensor([[ 1.,  2.,  3.,  4., 10.,  6.,  7.],
        [ 1.,  2.,  3.,  4., 10.,  6.,  7.],
        [ 1.,  2.,  3.,  4., 10.,  6.,  7.],
        [ 1.,  2.,  3.,  4., 10.,  6.,  7.]])

In [66]:
# same as vstack function
x_vstacked = torch.vstack([x, x, x, x])
x_vstacked

tensor([[ 1.,  2.,  3.,  4., 10.,  6.,  7.],
        [ 1.,  2.,  3.,  4., 10.,  6.,  7.],
        [ 1.,  2.,  3.,  4., 10.,  6.,  7.],
        [ 1.,  2.,  3.,  4., 10.,  6.,  7.]])

In [67]:
x_hstacked = torch.stack([x, x, x, x], dim=1)
x_hstacked.shape

torch.Size([7, 4])

In [68]:
# Same as hstack?? - not really
x_hstacked = torch.hstack((x, x, x, x))
x_hstacked.shape

torch.Size([28])

* torch.stack(..., dim=1) creates a new dimension and places the input tensors along it.

* torch.hstack(...) for 1D tensors, concatenates them into a single longer 1D tensor.

In [69]:
# Squeeze
y = torch.rand(size=(8,1))
print(f'Shape before squeeze: {y.shape}')
y = y.squeeze()
print(f'Shape after squeeze: {y.shape}')


Shape before squeeze: torch.Size([8, 1])
Shape after squeeze: torch.Size([8])


In [70]:
# Unsqueeze
y_unsqueezed = y.unsqueeze(dim=0)
print(f'Shape after unsqueeze: {y_unsqueezed.shape}')

Shape after unsqueeze: torch.Size([1, 8])


In [71]:
y_unsqueezed = y.unsqueeze(dim=1) # Different dim
print(f'Shape after unsqueeze: {y_unsqueezed.shape}')

Shape after unsqueeze: torch.Size([8, 1])


In [72]:
# Permute - fancy word for rearrange
x = torch.rand(size=(10, 20, 3))
x_permuted = x.permute(2, 0, 1)
print(f'Shape before permute: {x.shape}')
print(f'Shape after permute: {x_permuted.shape}') # Keep in mind - x_permuted is a view of x - so changes to permuted will affect x

Shape before permute: torch.Size([10, 20, 3])
Shape after permute: torch.Size([3, 10, 20])


In this case, the original tensor x has shape (10, 20, 3). By calling x.permute(2, 0, 1), you are telling PyTorch to put the dimension at index 2 (size 3) at the first position (index 0), the dimension at index 0 (size 10) at the second position (index 1), and the dimension at index 1 (size 20) at the third position (index 2). This results in a new shape of (3, 10, 20).





### Indexing

In [73]:
# create a tensor
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [74]:
x[0], x[0].shape

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 torch.Size([3, 3]))

In [75]:
# Now on middle bracket (dim=1)
x[0][0], x[0][0].shape

(tensor([1, 2, 3]), torch.Size([3]))

In [76]:
# dim=2
x[0][0][0]

tensor(1)

In [77]:
x[0,0,0] # Same

tensor(1)

#### Slicing

In [78]:
# Get all values of 0th dimension and the 0 index of 1st dimension
x[:, 0]

tensor([[1, 2, 3]])

In [79]:
x[:, :, 0]

tensor([[1, 4, 7]])

In [80]:
x[0,:,:]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [81]:
x[:]

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [82]:
x[:,0,:].squeeze()

tensor([1, 2, 3])

In [83]:
x[0][:][1:]

tensor([[4, 5, 6],
        [7, 8, 9]])

### PyTorch and NumPy

In [84]:
array = np.arange(1,20, dtype=np.float32)
array

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19.], dtype=float32)

In [85]:
tensor = torch.from_numpy(array)
tensor

tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14.,
        15., 16., 17., 18., 19.])

In [86]:
# Changes to numpy array doesn't affect tensor
array = array + 2
array, tensor

(array([ 3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14., 15.,
        16., 17., 18., 19., 20., 21.], dtype=float32),
 tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14.,
         15., 16., 17., 18., 19.]))

In [87]:
numpy_tensor = tensor.numpy()
numpy_tensor

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19.], dtype=float32)

### Randomization

In [3]:
# Setting the seed manually
import torch
rand_A = torch.rand(3, 4)
rand_B = torch.rand(3, 4)

print(rand_A)
print(rand_B)
print(rand_A == rand_B)

tensor([[0.6735, 0.3701, 0.7214, 0.3780],
        [0.6992, 0.9114, 0.9924, 0.1716],
        [0.6936, 0.3188, 0.0591, 0.4788]])
tensor([[0.0063, 0.2648, 0.1222, 0.6034],
        [0.8949, 0.2540, 0.1369, 0.0122],
        [0.6043, 0.2515, 0.7232, 0.1437]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [4]:
# Now we set the seed
torch.manual_seed(0)
rand_A = torch.rand(3, 4)

torch.manual_seed(0)
rand_B = torch.rand(3, 4)

print(rand_A)
print(rand_B)
print(rand_A == rand_B)

tensor([[0.4963, 0.7682, 0.0885, 0.1320],
        [0.3074, 0.6341, 0.4901, 0.8964],
        [0.4556, 0.6323, 0.3489, 0.4017]])
tensor([[0.4963, 0.7682, 0.0885, 0.1320],
        [0.3074, 0.6341, 0.4901, 0.8964],
        [0.4556, 0.6323, 0.3489, 0.4017]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


### Running on GPUs

In [5]:
# check if a GPU is present
!nvidia-smi

Fri Jun 13 01:51:40 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   47C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [6]:
torch.cuda.is_available()

True

In [7]:
# Device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [8]:
torch.cuda.device_count()

1

In [9]:
# Can use this but of code to make sure it will select GPU if one is available
if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available

In [10]:
device

'cuda'

#### Putting Tensors on GPU

In [11]:
tensor = torch.tensor([1, 2, 3])
tensor, tensor.device

(tensor([1, 2, 3]), device(type='cpu'))

In [12]:
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

In [13]:
# IF tensor on GPU, can't convert to numpy
# tensor_on_gpu.numpy() - Will Error

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [14]:
# convert back to cpu first
tensor_on_gpu.to('cpu').numpy() # This only returns a copy

array([1, 2, 3])