# PyTorch Fundamentals


In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


2.2.0


In [3]:
!nvidia-smi

Mon Feb 19 14:50:45 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.23                 Driver Version: 551.23         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce 940MX         WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A    0C    P8             N/A /  200W |      60MiB /   2048MiB |     11%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## Intro to Tensors


In [4]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [5]:
# number of dimension
# scalar no dimension
scalar.ndim

0

In [6]:
# Get tensor back as python int
scalar.item()

7

In [7]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [8]:
# number of square brackets
vector.ndim

1

In [9]:
vector.shape

torch.Size([2])

In [10]:
# MATRIX
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [11]:
MATRIX.ndim

2

In [12]:
MATRIX[0]

tensor([7, 8])

In [13]:
MATRIX[1]

tensor([ 9, 10])

In [14]:
MATRIX.shape

torch.Size([2, 2])

In [15]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [2, 4, 6],
                        [3, 6, 9]]])
TENSOR

tensor([[[1, 2, 3],
         [2, 4, 6],
         [3, 6, 9]]])

In [16]:
TENSOR.ndim

3

In [17]:
TENSOR.shape

torch.Size([1, 3, 3])

In [18]:
TENSOR[0]

tensor([[1, 2, 3],
        [2, 4, 6],
        [3, 6, 9]])

### Random Tensors


In [19]:
# Create random tensor of size (3, 4)
random_tensor = torch.rand(2, 3, 4, 5)
random_tensor

tensor([[[[0.2109, 0.5677, 0.7804, 0.3594, 0.3047],
          [0.1540, 0.1922, 0.8947, 0.1746, 0.3885],
          [0.9977, 0.2319, 0.1171, 0.5373, 0.7117],
          [0.0618, 0.1471, 0.4563, 0.3400, 0.4047]],

         [[0.7453, 0.2480, 0.9873, 0.3479, 0.6384],
          [0.4293, 0.1410, 0.3274, 0.9817, 0.9650],
          [0.8738, 0.2263, 0.0411, 0.9497, 0.6894],
          [0.7988, 0.8852, 0.0800, 0.0237, 0.8883]],

         [[0.0943, 0.2558, 0.3180, 0.8380, 0.9679],
          [0.4652, 0.5684, 0.7092, 0.4028, 0.8746],
          [0.3660, 0.5713, 0.7845, 0.0015, 0.5539],
          [0.8397, 0.4196, 0.1993, 0.1624, 0.4785]]],


        [[[0.1352, 0.6890, 0.2388, 0.4483, 0.4215],
          [0.5699, 0.7728, 0.2720, 0.2157, 0.5672],
          [0.4788, 0.3827, 0.4763, 0.3400, 0.7240],
          [0.0369, 0.2578, 0.8637, 0.3677, 0.2881]],

         [[0.9341, 0.3984, 0.0950, 0.9957, 0.9929],
          [0.6343, 0.7997, 0.0632, 0.1188, 0.6240],
          [0.5711, 0.0026, 0.6982, 0.0444, 0.4543],
  

In [20]:
random_tensor.ndim

4

In [21]:
# random tensor with similar shape to an image tensor
# height, width, colour channels (R, G, B)
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [22]:
random_image_size_tensor

tensor([[[0.9573, 0.1889, 0.9244],
         [0.9630, 0.4702, 0.9568],
         [0.6061, 0.9414, 0.1384],
         ...,
         [0.6313, 0.7136, 0.8386],
         [0.4707, 0.1559, 0.7220],
         [0.6303, 0.1593, 0.5279]],

        [[0.0161, 0.6568, 0.4049],
         [0.9442, 0.3862, 0.0710],
         [0.6118, 0.5828, 0.5343],
         ...,
         [0.2984, 0.9558, 0.2626],
         [0.4731, 0.7959, 0.0328],
         [0.9713, 0.8000, 0.2317]],

        [[0.1977, 0.8948, 0.1952],
         [0.2176, 0.6582, 0.9846],
         [0.9245, 0.9902, 0.5685],
         ...,
         [0.3287, 0.6270, 0.0387],
         [0.7548, 0.0189, 0.5728],
         [0.6893, 0.7624, 0.6479]],

        ...,

        [[0.1059, 0.2033, 0.5599],
         [0.1694, 0.8638, 0.1022],
         [0.4990, 0.6521, 0.6182],
         ...,
         [0.3816, 0.8898, 0.7291],
         [0.4903, 0.1026, 0.0935],
         [0.9507, 0.4291, 0.0903]],

        [[0.0680, 0.0313, 0.1312],
         [0.6294, 0.5579, 0.6624],
         [0.

## Zeros and ones


In [23]:
# All zeros
zeros = torch.zeros(3, 4)
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [24]:
# All ones
ones = torch.ones(3, 4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [25]:
ones.dtype

torch.float32

## Range of tensors and tensors-like


In [26]:
torch.arange(0, 10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [27]:
one_to_ten = torch.arange(1, 11)

In [28]:
torch.arange(start=0, end=101, step=5)

tensor([  0,   5,  10,  15,  20,  25,  30,  35,  40,  45,  50,  55,  60,  65,
         70,  75,  80,  85,  90,  95, 100])

In [29]:
# tensors-like, replicate
# zeros class like method
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensors datatypes

- Potential errors
- Tensor not right datatype, right shape, right device


In [30]:
# Float 32 tensor
# Search precision in computer science
float_32_tensor = torch.tensor([3.0, 2, 5],
                               dtype=None, # datatype
                               device=None, # what device tensor us on
                               requires_grad=False) # whether or not to track gradients
float_32_tensor, float_32_tensor.dtype

(tensor([3., 2., 5.]), torch.float32)

In [31]:
float_16_tensor = float_32_tensor.type(torch.float16) # or torch.half
float_16_tensor

tensor([3., 2., 5.], dtype=torch.float16)

In [32]:
float_16_tensor * float_32_tensor

tensor([ 9.,  4., 25.])

### Getting info from tensors

- tensor.dtype
- tensor.shape attribute or .size() function/method
- tensor.device


In [33]:
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.4544, 0.9065, 0.8958, 0.5723],
        [0.3222, 0.6413, 0.0328, 0.1671],
        [0.7536, 0.9683, 0.6961, 0.5781]])

In [34]:
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device of tensor: {some_tensor.device}")

tensor([[0.4544, 0.9065, 0.8958, 0.5723],
        [0.3222, 0.6413, 0.0328, 0.1671],
        [0.7536, 0.9683, 0.6961, 0.5781]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device of tensor: cpu


## Manipulating Tensors (Tensor operations)


In [35]:
tensor = torch.tensor([2, 3, 4])
tensor

tensor([2, 3, 4])

In [36]:
tensor + 10

tensor([12, 13, 14])

In [37]:
tensor * 10

tensor([20, 30, 40])

In [38]:
tensor - 10

tensor([-8, -7, -6])

In [39]:
tensor / 10

tensor([0.2000, 0.3000, 0.4000])

In [40]:
tensor // 10

tensor([0, 0, 0])

In [41]:
# PyTorch in-build functions
torch.mul(tensor, 10) # or torch.mm

tensor([20, 30, 40])

## Matrix multiplication

- The **inner dimensions** must match e.g (3, 2) and (2, 4) gives (3, 4)


In [42]:
tensor

tensor([2, 3, 4])

In [43]:
%%time
torch.matmul(tensor, tensor)

CPU times: total: 0 ns
Wall time: 29 ms


tensor(29)

In [44]:
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
print(value)

tensor(29)
CPU times: total: 0 ns
Wall time: 4 ms


## Transpose to manipulate tensor shape


In [45]:
tensor_A = torch.rand(3, 2)
tensor_A

tensor([[0.6620, 0.5091],
        [0.9347, 0.1809],
        [0.6342, 0.0544]])

In [46]:
tensor_B = torch.rand(3, 2)
tensor_B

tensor([[0.9033, 0.2160],
        [0.6063, 0.8012],
        [0.4846, 0.7495]])

In [47]:
torch.mm(tensor_A, tensor_B.T) # transpose

tensor([[0.7080, 0.8092, 0.7024],
        [0.8834, 0.7116, 0.5885],
        [0.5847, 0.4282, 0.3482]])

## Tensor aggregation (min, max, mean, sum , etc.)


In [48]:
x = torch.arange(0, 100, 10)
x, x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [49]:
x.min(), torch.min(x)

(tensor(0), tensor(0))

In [50]:
x.max(), torch.max(x)

(tensor(90), tensor(90))

In [51]:
#x.mean(), torch.mean(x) returns type error

In [52]:
x.type(torch.float32).mean(), torch.mean(x.type(torch.float32))

(tensor(45.), tensor(45.))

In [53]:
# positional min and max
x.argmin() # returns index position where min value occurs

tensor(0)

In [54]:
x.argmax()

tensor(9)

## Reshaping, view, stacking, squeezing, unsqueezing, permute


In [55]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [56]:
x_reshaped_1 = x.reshape(1, 9) # 9 * 1 same number of elements
x_reshaped_1, x_reshaped_1.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [57]:
x_reshaped_2 = x.reshape(9, 1)
x_reshaped_2, x_reshaped_2.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [58]:
z = x.view(1, 9)
z, z.shape
# changing z will change x because a view shares the same memory as the original input

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [59]:
z[:, 0] = 5 # changing first element to 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [60]:
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [61]:
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [62]:
# squeeze removes all single dimensions
x_reshaped_1, x_reshaped_1.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [63]:
x_squeezed = x_reshaped_1.squeeze()
x_squeezed, x_squeezed.shape

(tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [64]:
# unsqueeze add a single dimension at a specific dim
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
x_unsqueezed, x_unsqueezed.shape


(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [65]:
x_unsqueezed = x_squeezed.unsqueeze(dim=1)
x_unsqueezed, x_unsqueezed.shape

(tensor([[5.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [66]:
# permute returns a view rearranges dimensions in a specified order
x_original = torch.rand(size=(2, 3, 4))
x_original

tensor([[[0.2454, 0.6036, 0.8721, 0.6286],
         [0.7886, 0.8854, 0.2036, 0.7402],
         [0.5723, 0.1692, 0.4052, 0.3305]],

        [[0.1502, 0.7134, 0.6795, 0.3987],
         [0.0513, 0.7792, 0.9049, 0.1313],
         [0.4448, 0.2418, 0.4662, 0.6603]]])

In [67]:
x_original_permute = x_original.permute(2, 1, 0)
x_original_permute

tensor([[[0.2454, 0.1502],
         [0.7886, 0.0513],
         [0.5723, 0.4448]],

        [[0.6036, 0.7134],
         [0.8854, 0.7792],
         [0.1692, 0.2418]],

        [[0.8721, 0.6795],
         [0.2036, 0.9049],
         [0.4052, 0.4662]],

        [[0.6286, 0.3987],
         [0.7402, 0.1313],
         [0.3305, 0.6603]]])

In [68]:
x_original[0, 0, 0] = 2
x_original, x_original_permute

(tensor([[[2.0000, 0.6036, 0.8721, 0.6286],
          [0.7886, 0.8854, 0.2036, 0.7402],
          [0.5723, 0.1692, 0.4052, 0.3305]],
 
         [[0.1502, 0.7134, 0.6795, 0.3987],
          [0.0513, 0.7792, 0.9049, 0.1313],
          [0.4448, 0.2418, 0.4662, 0.6603]]]),
 tensor([[[2.0000, 0.1502],
          [0.7886, 0.0513],
          [0.5723, 0.4448]],
 
         [[0.6036, 0.7134],
          [0.8854, 0.7792],
          [0.1692, 0.2418]],
 
         [[0.8721, 0.6795],
          [0.2036, 0.9049],
          [0.4052, 0.4662]],
 
         [[0.6286, 0.3987],
          [0.7402, 0.1313],
          [0.3305, 0.6603]]]))

## Indexing selecting data from tensors


In [69]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [70]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [71]:
x[0][0]

tensor([1, 2, 3])

In [72]:
x[0][0][0]

tensor(1)

In [73]:
x[0][1][1]

tensor(5)

In [74]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [75]:
x[:] # all dimensions

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [76]:
x[:, :, 0]

tensor([[1, 4, 7]])

## PyTorch tensors and numpy

- -> `torch.from_numpy(ndarray)`
- -> `torch.Tensor.numpy()`


In [77]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [78]:
tensor = torch.ones(7)
numpy = tensor.numpy()
tensor, numpy

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility

take random out of random


In [79]:
random_a = torch.rand(3, 3)
random_b = torch.rand(3, 3)
print(random_a)
print(random_b)
print(random_a == random_b)

tensor([[0.4893, 0.1784, 0.0030],
        [0.0567, 0.1342, 0.3764],
        [0.4849, 0.1202, 0.9691]])
tensor([[0.6865, 0.4241, 0.7848],
        [0.3719, 0.5146, 0.0514],
        [0.3177, 0.9194, 0.9675]])
tensor([[False, False, False],
        [False, False, False],
        [False, False, False]])


In [80]:
# flavouring the randomness using randomseed
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_c = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_d = torch.rand(3, 4)

print(random_c)
print(random_d)
print(random_c == random_d)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running PyTorch objects and tensors in GPU


In [81]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce 940MX


In [82]:
# setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device, torch.cuda.device_count()

('cuda', 1)

In [83]:
tensor = torch.tensor([1, 2, 3])
tensor, tensor.device

(tensor([1, 2, 3]), device(type='cpu'))

In [84]:
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

In [85]:
tensor_back_on_cpu = tensor_on_gpu.cpu()
tensor_back_on_cpu, tensor_back_on_cpu.device

(tensor([1, 2, 3]), device(type='cpu'))