In [1]:
import torch

In [None]:
torch.__version__

'2.3.1+cu121'

In [None]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
scalar.ndim

0

In [None]:
#get tensor back as Python int
scalar.item()

7

In [None]:
#vector
vector = torch.tensor([7,7, 2])
vector

tensor([7, 7, 2])

In [None]:
#specific for scalars, get their value back as Python int
vector.ndim

1

In [None]:
vector.shape

torch.Size([3])

In [None]:
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [None]:
MATRIX.ndim

2

In [None]:
MATRIX[1]

tensor([ 9, 10])

In [None]:
MATRIX.shape

torch.Size([2, 2])

In [None]:
TENSOR = torch.tensor([[[1,2,3], [4,5,6], [7,8,9]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [None]:
TENSOR.ndim

3

In [None]:
TENSOR.shape

torch.Size([1, 3, 3])

In [None]:
TENSOR.size() # No difference between using the method size and the attribute shape

torch.Size([1, 3, 3])

In [None]:
TENSOR[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

Nomenclature: scalar and vectors are in lowercase, matrices and tensors in uppercase

###Random Tensors

In [None]:
# Create a random tensor of size/shape (3,4)
random_tensor = torch.rand(2,4)
random_tensor

tensor([[0.2394, 0.7612, 0.1621, 0.7952],
        [0.8123, 0.3412, 0.9507, 0.9321]])

In [None]:
random_tensor.ndim

2

In [None]:
random_image_size_tensor = torch.rand(size=(3,224, 224)) #or we can do (224, 224, 3)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

###Zeros and Ones

In [None]:
zeros = torch.zeros(2,4)
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
random_tensor*zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
ones = torch.ones(2,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
ones.dtype

torch.float32

### Create a range of tensors and tensors-like

In [None]:
one_to_ten = torch.arange(0,10)
one_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
torch.arange(start = 0, end = 1000, step = 88)

tensor([  0,  88, 176, 264, 352, 440, 528, 616, 704, 792, 880, 968])

In [None]:
# Create tensor-like: a tensor filled with the scalar value 0, with the same size as input
ten_zeros = torch.zeros_like(input = one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

In [None]:
float_32_tensor = torch.tensor([3.0, 6.0],
                               dtype = None, #what datatype is the tensor, if None the default value is float32
                               device = None, #where the tensor is constructed, cpu... Operations between tensors stored in diff. devices gives error
                                              #if None then the tensor is constructed on the current device
                               requires_grad = False) #if PyTorch tracks the gradient of the tensor when it undergoes certain numerical calculations
float_32_tensor, float_32_tensor.dtype

(tensor([3., 6.]), torch.float32)

In [None]:
float_16_tensor = torch.tensor([3.0, 6.0], dtype = torch.float16)
float_16_tensor, float_16_tensor.dtype

(tensor([3., 6.], dtype=torch.float16), torch.float16)

In [None]:
cpu_tensor = torch.tensor([3.0, 6.0], device = 'cpu')

In [None]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6.], dtype=torch.float16)

In [None]:
float_16_tensor = float_32_tensor.type(torch.half)
float_16_tensor

tensor([3., 6.], dtype=torch.float16)

In [None]:
(float_16_tensor * float_32_tensor).dtype
# here Pytorch aumatically transform one tensor to be in the right datatype
# for more computing operations, we expect a "not in the correct datatype" error

torch.float32

In [None]:
int_32_tensor = torch.tensor([3,6], dtype = torch.int32)
int_32_tensor

tensor([3, 6], dtype=torch.int32)

In [None]:
float_32_tensor * int_32_tensor

tensor([ 9., 36.])

Most common errors and how to solve them


*   Tensors not right datatype -> `tensor.dtype`
*   Tensors not right shape -> `tensor.shape`
*   Tensors not on the right device -> `tensor.device `



### Manipulating Tensors (tensor operations)
Tensor operations include:
* addition
* subtraction
* multiplication (element-wise)
* division
* matrix multiplication (dot product)

In [19]:
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [11]:
torch.add(tensor, 10)

tensor([11, 12, 13])

In [8]:
tensor * 10

tensor([10, 20, 30])

In [10]:
tensor.mul(10)

tensor([10, 20, 30])

In [12]:
tensor*tensor # Hadamard product

tensor([1, 4, 9])

In [9]:
tensor - 10

tensor([-9, -8, -7])

There are two main rules that performing matrix multilication needs to satisfy:


1.   The **inner dimensions** must match
2.   The resulting matrix has the shape of the **outer dimensions**

An example of correct matrix multiplication: (2,3) @ (3,4)
1. The number of columns in the first matrix matches the number of rows in the second
2. The resulting matrix has shape (2,4)



In [13]:
torch.matmul(tensor, tensor)

tensor(14)

In [22]:
tensor @ tensor

tensor(14)

In [25]:
torch.mm(torch.rand(2,3), torch.rand(3,4)) # mm is an alias for matmul

tensor([[1.1771, 0.8968, 1.0753, 0.7958],
        [0.1704, 0.3257, 0.4585, 0.2789]])

In [26]:
try:
  torch.matmul(torch.rand(2,3), torch.rand(2,3))
except:
  print('Shape error')

Shape error


In [27]:
torch.mm(torch.rand(2,3), torch.rand(2,3).T) # transpose of tensor is tensor.T

tensor([[0.3438, 0.9490],
        [0.5137, 0.7725]])

In [28]:
torch.rand(2,3).shape, torch.rand(2,3).T.shape

(torch.Size([2, 3]), torch.Size([3, 2]))

In [20]:
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i]*tensor[i]
print(value)

tensor(14)
CPU times: user 1.61 ms, sys: 0 ns, total: 1.61 ms
Wall time: 1.62 ms


In [21]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 123 µs, sys: 0 ns, total: 123 µs
Wall time: 129 µs


tensor(14)

### Tensor Aggregation: finding the min, max, mean, sum...

In [49]:
tensor = torch.arange(0, 100, 10)
tensor

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [31]:
torch.min(tensor), tensor.min()

(tensor(0), tensor(0))

In [32]:
torch.max(tensor), tensor.max()

(tensor(90), tensor(90))

In [33]:
tensor.mean()

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [39]:
tensor = tensor.type(torch.float32)

In [41]:
tensor.mean(), torch.mean(tensor)

(tensor(45.), tensor(45.))

In [42]:
torch.sum(tensor), tensor.sum()

(tensor(450.), tensor(450.))

### Positional min and max

In [None]:
tensor = torch.arange(0, 100, 10)

In [50]:
tensor.argmin() # return the index position of the lowest element in the tensor

tensor(0)

In [51]:
tensor.argmax() # return the index position of the highest element

tensor(9)

### Reshaping, stacking, squeezing and unsqueezing tensors

In [73]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [75]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [56]:
x_reshaped = x.reshape(9,1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [57]:
try:
  x.reshape(1,7)
except:
  print('Reshape dimension not compatible with input shape')

Reshape dimension not compatible with input shape


In [74]:
# Change the view but the shape remains the same
z = x.view(1,9)
z, x.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([9]))

In [61]:
z[0,0] = 5 #, but the newly created view shares the memory with the original tensor so x changes too
z,x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [69]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim = 0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [70]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim = 1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [80]:
x_reshaped = x.reshape(1,1,9)
x_reshaped.shape

torch.Size([1, 1, 9])

In [81]:
x_squeezed = torch.squeeze(x_reshaped) # removes all single dimensions from a targe tensor
x_squeezed, x_squeezed.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [82]:
x_unsqueezed = torch.unsqueeze(x_squeezed, dim = 0) # adds a single dimensions to a target tensor at a specific dimension
x_unsqueezed, x_unsqueezed.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [83]:
x_unsqueezed = torch.unsqueeze(x_squeezed, dim = 1)
x_unsqueezed, x_unsqueezed.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [85]:
#return a view of the tensor with re-arranged dimensions in a specified order
x = torch.randn(2,3,5)
torch.permute(x, (2,0,1)).shape

torch.Size([5, 2, 3])

In [86]:
x_original = torch.rand(224,224,3)
x_permuted = x_original.permute(2,0,1)
x_permuted.shape

torch.Size([3, 224, 224])

### Indexing

In [87]:
x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [88]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [89]:
x[0][0]

tensor([1, 2, 3])

In [91]:
x[0, 1, 1]

tensor(5)

In [92]:
x[:, 1, 1] # since this get all values of the 0th dimension, it returns a tensor

tensor([5])

In [90]:
x[:, :, 1]

tensor([[2, 5, 8]])

###From Numpy array to PyTorch tensor

In [93]:
import numpy as np

In [97]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # when converting from numpy to pytorch, pytorch reflects numpy's standard dtype
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [95]:
array.dtype # Numpy default data type is float64

dtype('float64')

In [98]:
array = array + 1
array,tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [96]:
array += 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([2., 3., 4., 5., 6., 7., 8.], dtype=torch.float64))

In [99]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy() # since Pytorch default dtype is 32, this is reflected in numpy
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility: take the random out of random
Weight initialization could consist of random numbers, but to have the result completely reproducible we'd like these numbers to stay the same in each run.
To reduce the randomness comes the concept of **random seed** to "flavour" the randomness

In [100]:
x = torch.rand(3,4)
y = torch.rand(3,4)
x == y

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [102]:
# set the random seed
RANDOM_SEED = 101

torch.manual_seed(RANDOM_SEED)
x = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
y = torch.rand(3,4)

print(x)
print(y)
print(x == y)

tensor([[0.1980, 0.4503, 0.0909, 0.8872],
        [0.2894, 0.0186, 0.9095, 0.3406],
        [0.4309, 0.7324, 0.4776, 0.0716]])
tensor([[0.1980, 0.4503, 0.0909, 0.8872],
        [0.2894, 0.0186, 0.9095, 0.3406],
        [0.4309, 0.7324, 0.4776, 0.0716]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


### Run on GPUs

In [1]:
import torch

In [2]:
!nvidia-smi

Sat Aug 10 15:44:57 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
# check for GPU access with PyTorch
torch.cuda.is_available()

True

In [5]:
# setup device agnostic code: we may not always have access to a GPU, but we'd like to use it if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [6]:
# count number of GPUs
torch.cuda.device_count()

1

In [11]:
# put tensors (and models) on the GPU
tensor = torch.tensor([1,2,3], device=device)
tensor, tensor.device

(tensor([1, 2, 3], device='cuda:0'), device(type='cuda', index=0))

In [12]:
# move tensor to GPU (if available)
tensor = torch.tensor([1,2,3])
tensor_on_gpu = tensor.to(device)
tensor_on_gpu, tensor.device, tensor_on_gpu.device


(tensor([1, 2, 3], device='cuda:0'),
 device(type='cpu'),
 device(type='cuda', index=0))

In [13]:
# we might want a tensor on the CPU, for example to transform it into Numpy which is impossible if it's on the GPU
try:
  tensor_on_gpu.numpy()
except:
  print('Tensor not on CPU')

# Tensor.cpu() copy the tensor to CPU
tensor_back_on_cpu = tensor_on_gpu.cpu()
array = tensor_back_on_cpu.numpy()
array

Tensor not on CPU


array([1, 2, 3])