## Setup

In [40]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

import torch
print(torch.__version__)

2.2.1+cu118


Course resources:
1. https://github.com/mrdbourke/pytorch-deep-learning
2. https://www.learnpytorch.io
3. https://pytorch.org/get-started/locally/

# 00. PyTorch Fundamentals

## Tensors

A lot of tensors can be used and manipulated like numpy arrays, so if you know those then it's not that difficult to understand.

### Creating torch.tensors

In [41]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [42]:
print(scalar.ndim)
print(scalar.shape)
print(scalar.item())

0
torch.Size([])
7


In [43]:
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [44]:
print(vector.ndim)
print(vector.shape)

1
torch.Size([2])


In [45]:
vector[0]

tensor(7)

We write matrices and tensors with capital letters, that's just the nomenclature for some reason.

In [46]:
MATRIX = torch.tensor([[1,2],
                       [3,4]])
MATRIX

tensor([[1, 2],
        [3, 4]])

In [47]:
print(MATRIX.ndim)
print(MATRIX.shape)

2
torch.Size([2, 2])


In [48]:
MATRIX[0]

tensor([1, 2])

In [49]:
TENSOR = torch.tensor([[[1,2,3],
                        [3,4,5],
                        [6,7,8]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 4, 5],
         [6, 7, 8]]])

In [50]:
print(TENSOR.ndim)
print(TENSOR.shape)
print(TENSOR[0,2,:])

3
torch.Size([1, 3, 3])
tensor([6, 7, 8])


### Random tensors


These are quite important, as they help with initializing parameters.
https://pytorch.org/docs/stable/generated/torch.rand.html?highlight=torch+rand#torch.rand

In [51]:
LMAO = torch.tensor(np.random.random((3,3,3)))
LMAO

tensor([[[0.5248, 0.3534, 0.4839],
         [0.2566, 0.1186, 0.2558],
         [0.1720, 0.9143, 0.3714]],

        [[0.9264, 0.8851, 0.2130],
         [0.4785, 0.5945, 0.1588],
         [0.1296, 0.9536, 0.8643]],

        [[0.1536, 0.7884, 0.3535],
         [0.2415, 0.9085, 0.7094],
         [0.9922, 0.5499, 0.1826]]], dtype=torch.float64)

In [52]:
LMAO2 = torch.rand(3,4)
LMAO2

tensor([[0.9121, 0.0594, 0.0303, 0.6972],
        [0.7694, 0.7202, 0.5097, 0.3691],
        [0.0836, 0.8479, 0.8324, 0.9666]])

In [53]:
RANDOM_IMAGE_TENSOR = torch.rand(size=(3,256,256))
RANDOM_IMAGE_TENSOR.shape,RANDOM_IMAGE_TENSOR.ndim

(torch.Size([3, 256, 256]), 3)

### Zero tensors and stuff

In [54]:
ZEROS = torch.zeros(size=(3,256,256))
ONES = torch.ones(size=(3,256,256))

In [55]:
(ZEROS*RANDOM_IMAGE_TENSOR == ZEROS)._is_all_true()

tensor(True)

In [56]:
(ONES*RANDOM_IMAGE_TENSOR == RANDOM_IMAGE_TENSOR)._is_all_true()

tensor(True)

### Range of tensors and tensors-like

In [57]:
range = torch.arange(start=1,end=11,step=2,dtype=int,requires_grad=False)
range

tensor([1, 3, 5, 7, 9])

In [58]:
ONES_LIKE_IMAGE = torch.ones_like(RANDOM_IMAGE_TENSOR)
#zeros_like also exists
ONES_LIKE_IMAGE.shape == RANDOM_IMAGE_TENSOR.shape

True

### Dtypes



https://pytorch.org/docs/stable/tensors.html#data-types

Use torch._dtype_

In [59]:
f32_tensor = torch.tensor([3.0,6.0,9.0],
                          dtype=None, #datatype
                          device="cpu", #cpu or gpu
                          requires_grad=False) #whether or not to track the gradients with this tensor
f32_tensor.dtype

torch.float32

In [60]:
i32_tensor = torch.tensor([3.0,6.0,9.0],dtype=torch.int32)
i32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [61]:
i16_tensor = f32_tensor.type(torch.int16)
i16_tensor

tensor([3, 6, 9], dtype=torch.int16)

In [62]:
i16_tensor*i32_tensor,(i32_tensor*f32_tensor).dtype

(tensor([ 9, 36, 81], dtype=torch.int32), torch.float32)

### Operations

1. Addition
2. Subtraction
3. Multiplication (element wise)
4. Division (element wise)
5. Matrix Multiplication (dot)

Most of these operations work exactly like they do with numpy arrays

In [63]:
SOME_TENSOR = torch.rand(size=(3,3))
SOME_TENSOR

tensor([[0.5488, 0.4018, 0.6715],
        [0.1417, 0.4979, 0.9651],
        [0.4052, 0.8402, 0.1938]])

In [64]:
((SOME_TENSOR + 100)/100)**7 - 1

tensor([[0.0391, 0.0285, 0.0480],
        [0.0100, 0.0354, 0.0695],
        [0.0287, 0.0603, 0.0136]])

In [65]:
torch.div(torch.add(SOME_TENSOR,100),100)
#* Use the normal operators generally

tensor([[1.0055, 1.0040, 1.0067],
        [1.0014, 1.0050, 1.0097],
        [1.0041, 1.0084, 1.0019]])

In [66]:
ANOTHER_TENSOR = torch.rand(size=(1,3))
ANOTHER_TENSOR

tensor([[0.6632, 0.9761, 0.8028]])

#### Matrix multiplication

http://matrixmultiplication.xyz

In [67]:
SOME_TENSOR*SOME_TENSOR,SOME_TENSOR@SOME_TENSOR

(tensor([[0.3012, 0.1615, 0.4509],
         [0.0201, 0.2479, 0.9313],
         [0.1642, 0.7060, 0.0376]]),
 tensor([[0.6302, 0.9848, 0.8864],
         [0.5393, 1.1157, 0.7627],
         [0.4199, 0.7440, 1.1205]]))

In [68]:
torch.matmul(SOME_TENSOR,SOME_TENSOR)
#* more efficient

tensor([[0.6302, 0.9848, 0.8864],
        [0.5393, 1.1157, 0.7627],
        [0.4199, 0.7440, 1.1205]])

---

### Shape errors

In [69]:

TENSOR_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])
TENSOR_B = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])
#! Gives error for shape:
# torch.mm(tensor_A,tensor_B) #torch.mm can also be used as matmul
torch.matmul(TENSOR_A,TENSOR_B.T)


tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Aggregation

Again, works almost exactly like numpy arrays

In [70]:
TENSOR_A = torch.rand(
    size=(3,3,3)
)

In [71]:
print(
    f"Avg: {TENSOR_A.mean()}\n",
    f"Median: {TENSOR_A.median()}\n",
    f"Max: {TENSOR_A.max()}\n",
    f"Min: {TENSOR_A.min()}\n",
    f"Sum: {TENSOR_A.sum()}\n",
    sep=""
)

Avg: 0.4974519610404968
Median: 0.4358171820640564
Max: 0.9322761297225952
Min: 0.021444320678710938
Sum: 13.43120288848877



In [72]:
TENSOR_B = torch.rand(size=(3,4,5))

print(TENSOR_B.sum(dim=None).shape) #same as .sum()
print(TENSOR_B.sum(dim=0).shape)
print(TENSOR_B.sum(dim=1).shape)
print(TENSOR_B.sum(dim=2).shape)

torch.Size([])
torch.Size([4, 5])
torch.Size([3, 5])
torch.Size([3, 4])


In [73]:
print(TENSOR_B.argmax(dim=None))
print(TENSOR_B.argmax(dim=2))

tensor(13)
tensor([[4, 2, 3, 4],
        [0, 2, 1, 2],
        [1, 1, 0, 0]])


### Reshaping, stacking


Exactly like numpy, with a new feature

In [74]:
TENSOR_A = torch.randint(1,10,(2,3,4))
TENSOR_B = torch.randint(1,10,(2,3,4))

#### Reshaping

In [75]:
print(TENSOR_A)
print(TENSOR_A.reshape((3,8)))
print(TENSOR_A.flatten())

tensor([[[7, 5, 1, 5],
         [3, 4, 3, 9],
         [3, 7, 6, 9]],

        [[3, 1, 5, 3],
         [9, 7, 1, 1],
         [3, 2, 6, 2]]])
tensor([[7, 5, 1, 5, 3, 4, 3, 9],
        [3, 7, 6, 9, 3, 1, 5, 3],
        [9, 7, 1, 1, 3, 2, 6, 2]])
tensor([7, 5, 1, 5, 3, 4, 3, 9, 3, 7, 6, 9, 3, 1, 5, 3, 9, 7, 1, 1, 3, 2, 6, 2])


#### View

A view in PyTorch is like a view in a mySQL database, the data is synced with the original tensor, but the shape is changed to better fir the use case.

In [76]:
view_A = TENSOR_A.view(8,3)
view_A

tensor([[7, 5, 1],
        [5, 3, 4],
        [3, 9, 3],
        [7, 6, 9],
        [3, 1, 5],
        [3, 9, 7],
        [1, 1, 3],
        [2, 6, 2]])

In [77]:
print(TENSOR_A[0,0,0])
TENSOR_A[0,0,0] += 7
print(TENSOR_A[0,0,0])
print(view_A[0,0])
view_A[0,0] -= 7
print(TENSOR_A[0,0,0])

tensor(7)
tensor(14)
tensor(14)
tensor(7)


#### Stack

In [78]:
print(torch.stack((TENSOR_A,TENSOR_B)).shape)
print(torch.stack((TENSOR_A,TENSOR_B),dim=0).shape)
print(torch.stack((TENSOR_A,TENSOR_B),dim=1).shape)
print(torch.stack((TENSOR_A,TENSOR_B),dim=2).shape)
print(torch.stack((TENSOR_A,TENSOR_B),dim=3).shape)

torch.Size([2, 2, 3, 4])
torch.Size([2, 2, 3, 4])
torch.Size([2, 2, 3, 4])
torch.Size([2, 3, 2, 4])
torch.Size([2, 3, 4, 2])


### Squeezing and unsqueezing

In [79]:
TENSOR_A = torch.randint(1,10,(2,1,5,1))

In [80]:
print(TENSOR_A.shape)
print(torch.squeeze(TENSOR_A,dim=(0,1,2)).shape)
print(torch.unsqueeze(TENSOR_A,1).shape)

torch.Size([2, 1, 5, 1])
torch.Size([2, 5, 1])
torch.Size([2, 1, 1, 5, 1])


In [81]:
(torch.squeeze(TENSOR_A,dim=(0,1,2)).unsqueeze(1)==TENSOR_A)._is_all_true().item()

True

## Stuff

### Permute

Commonly used to manipulate image data.<br>
Permutes share the same space in the memory, this means that it is like a view in that it changes with the tensor itself.

In [82]:
TENSOR_A = torch.randint(1,10,(2,3,4))

In [83]:
print(TENSOR_A.shape)
print(TENSOR_A.permute((1,0,2)).shape)

torch.Size([2, 3, 4])
torch.Size([3, 2, 4])


In [84]:
print(TENSOR_A)
print(TENSOR_A.permute((1,0,2)))

tensor([[[9, 8, 6, 4],
         [4, 8, 5, 9],
         [9, 3, 2, 5]],

        [[5, 4, 5, 7],
         [2, 9, 8, 9],
         [6, 9, 1, 5]]])
tensor([[[9, 8, 6, 4],
         [5, 4, 5, 7]],

        [[4, 8, 5, 9],
         [2, 9, 8, 9]],

        [[9, 3, 2, 5],
         [6, 9, 1, 5]]])


In [85]:
RANDOM_IMAGE_TENSOR = torch.randint(0,256,size=(224,224,3))
PERMUTE_IMAGE_TENSOR = RANDOM_IMAGE_TENSOR.permute((2,0,1))
print(RANDOM_IMAGE_TENSOR.shape)
print(PERMUTE_IMAGE_TENSOR.shape)
#* this helps us bring the colour channels to the "front"
print((RANDOM_IMAGE_TENSOR[:,:,0] == PERMUTE_IMAGE_TENSOR[0])._is_all_true().item())

torch.Size([224, 224, 3])
torch.Size([3, 224, 224])
True


In [86]:
RANDOM_IMAGE_TENSOR[50,74,0] = 500
PERMUTE_IMAGE_TENSOR[0,50,74]

tensor(500)

### PyTorch $\leftrightarrow$ Numpy

In [87]:
array = np.arange(1,10)
tensor_C = torch.from_numpy(array)
print(array)
print(tensor_C)
tensor_C[0] = 90
array = tensor_C.numpy()
print(array)
print(tensor_C)

[1 2 3 4 5 6 7 8 9]
tensor([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=torch.int32)
[90  2  3  4  5  6  7  8  9]
tensor([90,  2,  3,  4,  5,  6,  7,  8,  9], dtype=torch.int32)


### Reproducability

In [88]:
state_1 = torch.random.get_rng_state()
print(torch.randint(0,10,(2,3)))
torch.random.set_rng_state(state_1)
print(torch.randint(0,10,(2,3)))

tensor([[8, 7, 5],
        [3, 4, 2]])
tensor([[8, 7, 5],
        [3, 4, 2]])


In [89]:
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

tensor_C = torch.randint(0,10,(2,3))

torch.manual_seed(RANDOM_SEED)
tensor_D = torch.randint(0,10,(2,3))

print((tensor_C == tensor_D)._is_all_true())

tensor(True)


## GPU

1. Get GPU - `done`
2. Get CUDA - `will do later`
https://www.youtube.com/watch?v=19LQRx78QVU

In [90]:
import torch
import torchvision
torch.cuda.is_available()

True

In [91]:
!nvidia-smi

Mon Apr  1 23:15:23 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 546.26                 Driver Version: 546.26       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1650 Ti   WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   44C    P8               3W /  30W |    128MiB /  4096MiB |      4%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

### Devise agnostic code

https://pytorch.org/docs/stable/notes/cuda.html#device-agnostic-code

In [92]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

### Putting stuff on GPU

In [93]:
tensor_C = torch.tensor([1,2,3])
tensor_C,tensor_C.device

(tensor([1, 2, 3]), device(type='cpu'))

In [94]:
tensor_C_GPU = tensor_C.to(device)
tensor_C_GPU,tensor_C_GPU.device

(tensor([1, 2, 3], device='cuda:0'), device(type='cuda', index=0))

**`Numpy only works on a CPU`**

In [95]:
tensor_C_CPU = tensor_C_GPU.cpu()
tensor_C_CPU.numpy()

array([1, 2, 3], dtype=int64)