In [2]:
import torch
print(torch.__version__)

1.12.1


# Introduction to Tensors
## Creating tensors

Pytorch tensor are created using `torch.Tensor()`

In [3]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim

0

In [5]:
# Get tensor back as Python int
scalar.item()

7

In [6]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([2])

In [9]:
# MATRIX
MATRIX = torch.tensor([[7, 8], [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [10]:
MATRIX.ndim 

2

In [11]:
MATRIX[0]

tensor([7, 8])

In [12]:
MATRIX.shape

torch.Size([2, 2])

In [13]:
# TENSOR

TENSOR = torch.tensor([[[1, 2, 3], 
                        [4, 5, 6], 
                        [7, 8, 9]]])

TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [14]:
TENSOR.ndim

3

In [15]:
TENSOR.shape

torch.Size([1, 3, 3])

In [16]:
TENSOR[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [17]:
EXPANDED_TENSOR = torch.Tensor([[[[1, 2], [3, 4], [5, 6]], 
                                [[7, 8], [9, 10], [11, 12]]],
                                [[[1, 2], [3, 4], [5, 6]], 
                                [[7, 8], [9, 10], [11, 12]]]])

print("Ndims:", EXPANDED_TENSOR.ndim)
print("Shape:", EXPANDED_TENSOR.shape)

Ndims: 4
Shape: torch.Size([2, 2, 3, 2])


### Random tensors

Why random tensors?

Random tensors are important, because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those ranodm numbers to better represent the data.

The process is:
`Start with random numbers -> look at data -> update random numbers -> update those numbers`

In [18]:
### Create random tensors of size (3, 4)
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.5850, 0.5363, 0.3874, 0.0532],
        [0.0094, 0.3720, 0.5263, 0.7833],
        [0.8171, 0.5652, 0.4068, 0.8262]])

In [19]:
# Create random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(3, 224, 224)) # height, width, colour channels
random_image_size_tensor.shape, random_image_size_tensor.ndim 

(torch.Size([3, 224, 224]), 3)

### Zeros and ones

In [20]:
# Create a tensor of all zeros
zero = torch.zeros(size=(3, 4))
zero

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [21]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [22]:
ones.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [23]:
# Use torch.arange() since range() is deprecated
one_to_ten = torch.arange(start=1, end=1000, step=77)
one_to_ten

tensor([  1,  78, 155, 232, 309, 386, 463, 540, 617, 694, 771, 848, 925])

In [24]:
# Creating tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

**Note**: A tensor operation might fail beacause of of these datatypes errors:
1. Tensors not the right datatype
2. Tensors not the right shape
3. Tensors not on the right device

A more precise datatype takes more memory and is slower in computing time.

In [25]:
# Float 32 tensor: even if the datatype is specified as None, the default type will always be float

float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # what datatype is the tensor 
                               device=None, # What device is your tensor on
                               requires_grad=False) # whether or not track gradients with tensors operations
float_32_tensor

tensor([3., 6., 9.])

In [26]:
float_16_tensor = float_32_tensor.type(dtype=torch.float16)

float_16_tensor*float_32_tensor

tensor([ 9., 36., 81.])

In [27]:
int_32_tensor = float_32_tensor.type(dtype=torch.int32)

int_32_tensor*float_32_tensor

tensor([ 9., 36., 81.])

### Manipulating Tensor (tensor operations)

Tensor operations include:
* Addition
* Substraction
* Multiplication
* Division
* Matrix Multiplication

In [28]:
# create a tensor and add 10 to it
tensor = torch.tensor([1, 2, 3])
print(tensor + 10)
# create a tensor and multiply by 10 to it
print(tensor * 10)
# Same goes for multiplication and division

tensor([11, 12, 13])
tensor([10, 20, 30])


#### Matrix multiplication

Two main ways of performing multiplication in neural networks and DL:

1. Element-wise multiplication: multiply an scalar to the whole matrix.
2. Matrix multiplication: Dot product between de columns and rows.

In [29]:
# Element wise multiplication
tensor = torch.rand(size=(1, 3))
print(f"{tensor} * {tensor}")
print(f"Equals: {tensor*tensor}")

tensor([[0.6984, 0.2641, 0.5343]]) * tensor([[0.6984, 0.2641, 0.5343]])
Equals: tensor([[0.4878, 0.0698, 0.2854]])


In [30]:
torch.matmul(tensor, tensor.T) # use the atribute "T" to transpose the tensor dim goes from 1,3 to 3, 1, enabling matrix multiplication

tensor([[0.8430]])

In [31]:
torch.mm(tensor, tensor.T)

tensor([[0.8430]])

### Cumulative operations with tensors

* min: extracts the smallest value of the tensor  
* max: extracts the greates value of the tensor  
* mean: extracts the mean from the values of the tensor
* sum: adds all the values from the tensor

In [32]:
print(f"The min of the tensor is: {tensor.min()}")
print(f"The max of the tensor is: {tensor.max()}")
print(f"The mean of the tensor is: {tensor.mean()}")
print(f"The sum of the tensor is: {tensor.sum()}")

The min of the tensor is: 0.2641461491584778
The max of the tensor is: 0.6983973383903503
The mean of the tensor is: 0.4989376962184906
The sum of the tensor is: 1.4968130588531494


### Find the positional min and max of the tensor
* argmin: Gets the argument of the smallest value in the tensor
* argmax: Gets the argument of the greatest value in the tensor

In [33]:
print(f"The position of the smallest argument is: {tensor.argmin()}")
print(f"The position of the greatest argument is: {tensor.argmax()}")

The position of the smallest argument is: 1
The position of the greatest argument is: 0


### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on the top of each other (vstack) or side by side (hstack)
* Squeeze - remove all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - Return a view of the input with dimnesions permuted (swapped) in a certai way

In [34]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [35]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape 

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [36]:
# Add an extra 9 dimension(s)
x_reshaped = x.reshape(9, 1)
x_reshaped, x_reshaped.shape 

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [37]:
x = torch.arange(1., 11.)
x_reshaped = x.reshape(5, 2)
x_reshaped, x_reshaped.shape 

(tensor([[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.],
         [ 9., 10.]]),
 torch.Size([5, 2]))

In [38]:
# Change the view, z shares the same memory as x
z = x.view(1, 10)
z, z.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [39]:
# Changing z changes x (because a view of a tensor shares the same memory as the original)
z[:, 0] = 5
z, x

(tensor([[ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 tensor([ 5.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]))

In [40]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x], dim=1)
x_stacked

tensor([[ 5.,  5.,  5.],
        [ 2.,  2.,  2.],
        [ 3.,  3.,  3.],
        [ 4.,  4.,  4.],
        [ 5.,  5.,  5.],
        [ 6.,  6.,  6.],
        [ 7.,  7.,  7.],
        [ 8.,  8.,  8.],
        [ 9.,  9.,  9.],
        [10., 10., 10.]])

In [41]:
x_val = torch.tensor([[1, 2, 4], [4, 5, 6], [7, 8, 9]])
Y_val = torch.tensor([[1], [2], [3]])

dataset = torch.hstack([x_val, Y_val])
dataset

tensor([[1, 2, 4, 1],
        [4, 5, 6, 2],
        [7, 8, 9, 3]])

In [42]:
x_val = torch.tensor([[1, 2, 4], [4, 5, 6], [7, 8, 9]])
Y_val = torch.tensor([1, 2, 3])

dataset = torch.vstack([x_val, Y_val])
dataset

tensor([[1, 2, 4],
        [4, 5, 6],
        [7, 8, 9],
        [1, 2, 3]])

In [43]:
x_2squeeze = torch.tensor([[[1, 2, 3, 4, 5, 6, 7, 8]]])
print(f"Previous tensor: {x_2squeeze}")
print(f"Tensor shape: {x_2squeeze.shape}")

Previous tensor: tensor([[[1, 2, 3, 4, 5, 6, 7, 8]]])
Tensor shape: torch.Size([1, 1, 8])


In [44]:
x_squeezed = x_2squeeze.squeeze() 
print(f"Squeezed tensor: {x_squeezed}")
print(f"Tensor shape: {x_squeezed.shape}")

Squeezed tensor: tensor([1, 2, 3, 4, 5, 6, 7, 8])
Tensor shape: torch.Size([8])


In [51]:
# Now we go back to the previous tensor, the unsqueezed one
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"Unsqueezed tensor: {x_unsqueezed}")
print(f"Tensor shape: {x_unsqueezed.shape}")

# we still miss one dimension, maybe unsqueeze again? Yes, it seems like when we unsqueeze we add an extra dimension
# on the higher dimension available in the tensor.

x_unsqueezed = x_unsqueezed.unsqueeze(dim=0)
print(f"Unsqueezed tensor: {x_unsqueezed}")
print(f"Unsqueezed tensor shape: {x_unsqueezed.shape}")

Unsqueezed tensor: tensor([[1, 2, 3, 4, 5, 6, 7, 8]])
Tensor shape: torch.Size([1, 8])
Unsqueezed tensor: tensor([[[1, 2, 3, 4, 5, 6, 7, 8]]])
Tensor shape: torch.Size([1, 1, 8])


In [52]:
# Permute, where the dimensions of the tensor are rearrange in an specified fashion
x_original = torch.rand(size=(224, 224, 3))
x_permuted = x_original.permute(2, 0, 1) # shifts axis such as dim 0 -> 1, 1 -> 2, 2 -> 0

print(f"Non-permuted tensor shape: {x_original.shape}")
print(f"Permuted tensor shape: {x_permuted.shape}")

Non-permuted tensor shape: torch.Size([224, 224, 3])
Permuted tensor shape: torch.Size([3, 224, 224])


### Indexing (selecting data from tensors)

- What if we want to select specific coordinates from the tensor i.e the 1st row or the 2nd column? We need indexing!

In [54]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

**NOTE**: Indexing values goes from outer dimensions to inner ones.

In [55]:
print(f"Let's print the 1st bracket: {x[0]}")
print(f"Let's print the 2nd bracket: {x[0][0]}")
print(f"Let's print the 3rd bracket: {x[0][0][0]}")

Let's print the 1st bracket: tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Let's print the 2nd bracket: tensor([1, 2, 3])
Let's print the 3rd bracket: 1


As we can see, when we index based on the first coordinate, the other two remain. One we fix the first index `x[0]`, we can select the second index, that holds the 3 arrays with three values each. If we fix both of them `x[0][0]` (with the first position), then we are getting the the first element of the tensor (the array of list of arrays), and selecting the first array of them. In that array, we select the first element `x[0][0][0]` giving us the value 1

In [59]:
# we can also index based on slices

print(f"Getting the first list of the tensor: {x[:, 0]}")
print(f"Getting all the values from the 2nd tensor {x[:, :, 1]}")
print(f"Getting the 2nd value from the 2nd tensor {x[:, 1, 1]}")

Getting the first list of the tensor: tensor([[1, 2, 3]])
Getting all the values from the 2nd tensor tensor([[2, 5, 8]])
Getting the 2nd value from the 2nd tensor tensor([5])


### PyTorch tensors & NumPy

As NumPy is so popular, PyTorch has functionality to interact with it.

* Data in NumPy, want it in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [60]:
# Numpy array to tensor
import torch
import numpy as np

array = np.arange(1., 8.)
tensor = torch.from_numpy(array)

array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [61]:
# tensor to numpy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility

The initialization in neural networks it is based on random values to the parameters of the model. Now, what if we wnt to reproduce results? We need reproducibility.

To do this, we need to fix the seed of the randomness. The seed is what generates random values (although computers are deterministic, so pseudo-random would be a better term to fit the description), is a changing term that is used to generate random values. But if fixed, then the values produced are always the same.

Let's see some code:

In [62]:
# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.2070, 0.5792, 0.5622, 0.9885],
        [0.9637, 0.6667, 0.2693, 0.0045],
        [0.4841, 0.4158, 0.5762, 0.0939]])

Tensor B:
tensor([[0.3645, 0.9921, 0.8637, 0.4291],
        [0.7247, 0.9274, 0.9582, 0.2932],
        [0.6865, 0.2334, 0.6502, 0.3221]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

As expected, the values of 2 random tensors are different, here is where `torch.manual_seed(seed)` comes into play. We fix the seed to some value, and then we generate the tensors: 

In [63]:
# # Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED) 
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called 
# Without this, tensor_D would be different to tensor_C 
torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

It seems like now, we have to equal tensors.