<a href="https://colab.research.google.com/github/GiX7000/deep-learning-with-pytorch/blob/main/learn-pytorch-for-deep-learning-in-a-day/Part1_pytorch_fundamentals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook is the first of a series of notebooks introducing deep learning with Pytorch, following [Daniel's](https://github.com/mrdbourke/pytorch-deep-learning) tutorial. The groundtruth for everything in Pytorch is here: https://pytorch.org/.

## Introduction to Tensors.

### Creating tensors.

In [None]:
import torch 
print(torch.__version__)

1.13.0+cu116


In [None]:
# scalar
scalar = torch.tensor(7)
print(scalar)

# get the dimensions of a tensor
print(scalar.ndim)

# get the tensor back as python int
print(scalar.item())

tensor(7)
0
7


In [None]:
# vector
vector = torch.tensor([7, 7])
print(vector)

print(vector.ndim) # think ndim as the number of pairs of square brackets []

# get the total number of elements of a vector
print(vector.shape) 

tensor([7, 7])
1
torch.Size([2])


In [None]:
# MATRIX
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])

print(MATRIX)
print(MATRIX.ndim) # 2 pairs of [] here

#get the shape of the matrix
print(MATRIX.shape)

# get what we have in the first dimension
print(MATRIX[0])

# get what we have in the second dimension
print(MATRIX[1])

tensor([[ 7,  8],
        [ 9, 10]])
2
torch.Size([2, 2])
tensor([7, 8])
tensor([ 9, 10])


In [None]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3, 4],
                        [10, 11, 12, 13],
                        [14, 15, 16, 17]]])
print(TENSOR)
print(TENSOR.ndim)  # 3 pairs of [] here

#get the shape of the tensor
print(TENSOR.shape)

# get what we have in the first dimension
print(TENSOR[0]) # one 3x4 matrix(defined by outer [])

tensor([[[ 1,  2,  3,  4],
         [10, 11, 12, 13],
         [14, 15, 16, 17]]])
3
torch.Size([1, 3, 4])
tensor([[ 1,  2,  3,  4],
        [10, 11, 12, 13],
        [14, 15, 16, 17]])


In [None]:
# TENSOR 2
TENSOR2 = torch.tensor([ [[1, 2, 3, 4],
                          [10, 11, 12, 13],
                          [14, 15, 16, 17]],
                        
                          [[1, 2, 3, 4],
                          [0, 1, 2, 3],
                          [14, 15, 16, 17]]
                        
                                              
                        ])
print(TENSOR2)
print(TENSOR2.ndim)  # 3 pairs of [] here

#get the shape of the tensor
print(TENSOR2.shape)

# get what we have in the first dimension
print(TENSOR2[0]) # the first 3x4 matrix
print(TENSOR2[1]) # the second 3x4 matrix

tensor([[[ 1,  2,  3,  4],
         [10, 11, 12, 13],
         [14, 15, 16, 17]],

        [[ 1,  2,  3,  4],
         [ 0,  1,  2,  3],
         [14, 15, 16, 17]]])
3
torch.Size([2, 3, 4])
tensor([[ 1,  2,  3,  4],
        [10, 11, 12, 13],
        [14, 15, 16, 17]])
tensor([[ 1,  2,  3,  4],
        [ 0,  1,  2,  3],
        [14, 15, 16, 17]])


In [None]:
# get the shape of the first element of the first dimension
print(TENSOR2[0].shape)

# get the last down right element
print(TENSOR2[1][2][3].item())

torch.Size([3, 4])
17


### Random tensors.



They are important because the way many neural networks learn is that they start with tensors full of random numbers and they adjust those random numbers to better represent the data.

In [None]:
# create a random tensor of 3x4
random_tensor = torch.rand(3, 4)
print(random_tensor)

print(random_tensor.ndim)

tensor([[0.8027, 0.8421, 0.1014, 0.2576],
        [0.0975, 0.4812, 0.7226, 0.4276],
        [0.3530, 0.8394, 0.6771, 0.3653]])
2


In [None]:
random_tensor2 = torch.rand(2, 3, 4)
print(random_tensor2)

print(random_tensor2.ndim)

tensor([[[0.7233, 0.2361, 0.4983, 0.4425],
         [0.5190, 0.3832, 0.1927, 0.1821],
         [0.8219, 0.8055, 0.9892, 0.7574]],

        [[0.3381, 0.9666, 0.5234, 0.2230],
         [0.6783, 0.9407, 0.9849, 0.8303],
         [0.2158, 0.6144, 0.4595, 0.6001]]])
3


In [None]:
# create a random tensor with similar shape to an image shape
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, width, colour channels(R, G, B)
print(random_image_size_tensor)

# get the dimension and the shape of the tensor
print(random_image_size_tensor.ndim)
print(random_image_size_tensor.shape)

tensor([[[0.9260, 0.6969, 0.6044],
         [0.7244, 0.1014, 0.5450],
         [0.6907, 0.7149, 0.8383],
         ...,
         [0.4287, 0.1588, 0.3174],
         [0.2687, 0.5764, 0.0609],
         [0.1395, 0.4178, 0.5278]],

        [[0.8642, 0.7616, 0.0406],
         [0.1864, 0.1088, 0.5755],
         [0.6925, 0.6071, 0.6993],
         ...,
         [0.0192, 0.4841, 0.1221],
         [0.4892, 0.4463, 0.3491],
         [0.4247, 0.9103, 0.6918]],

        [[0.1622, 0.8785, 0.1020],
         [0.8696, 0.2062, 0.2962],
         [0.2343, 0.7535, 0.0211],
         ...,
         [0.3757, 0.2316, 0.9493],
         [0.2751, 0.3816, 0.5613],
         [0.7411, 0.2636, 0.2588]],

        ...,

        [[0.9148, 0.5333, 0.2292],
         [0.8363, 0.2439, 0.9148],
         [0.1019, 0.6728, 0.2030],
         ...,
         [0.3838, 0.8605, 0.7658],
         [0.9580, 0.8427, 0.4478],
         [0.8827, 0.5180, 0.2561]],

        [[0.7455, 0.9106, 0.4406],
         [0.7405, 0.0289, 0.4540],
         [0.

### Zeros and Ones tensors.

We use them to create some forms of masks.

In [None]:
# create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
print(zeros, zeros.dtype)

# create a tensor of all ones
ones = torch.ones(size=(2, 3, 4))
print(ones, ones.dtype)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]) torch.float32
tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]]) torch.float32


### Create a range of tensors and tensors-like.

In [None]:
torch.range(0, 5)

  torch.range(0, 5)


tensor([0., 1., 2., 3., 4., 5.])

In [None]:
# instead of torch.range, we use torch.arange after we got the above message
one_to_five = torch.arange(1, 5) 
one_to_five

tensor([1, 2, 3, 4])

In [None]:
# we can also define the step
one_to_thousand = torch.arange(start=0, end=1000, step=100)
one_to_thousand

tensor([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [None]:
# creating tensors like: when we want to create tensors with the same shape like another tensor's shape
five_zeros = torch.zeros_like(one_to_five)
five_zeros

tensor([0, 0, 0, 0])

In [None]:
one_zeros = torch.ones_like(one_to_five)
one_zeros

tensor([1, 1, 1, 1])

### Tensor datatypes.

In [None]:
# float 32 tensor: the default datatype of tensors
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=None)
print(float_32_tensor)
print(float_32_tensor.dtype)

tensor([3., 6., 9.])
torch.float32


In [None]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float16)
print(float_16_tensor)
print(float_16_tensor.dtype)

tensor([3., 6., 9.], dtype=torch.float16)
torch.float16


Why to use datatype float 16 instead of default float 36?

Because we can sacrifice/reduce the preciasion('detail') of the input in order to become faster in computing.

3 big errors while we working with pytorch and deep learning: 

1) tensors have not the right datatype, 

2) tensors have not the right shape and 

3) tensors are not on the right device.

In [None]:
# 3 of the most important parameters when we create tensors: dtype, device and requires_grad
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=None, # what datatype is the tensor
                                                device=None,  # what device is your tensor on. default is cpu. 
                                                # device='cuda',  # enable gpu. operations between tensors must run on the same device
                                                requires_grad=False)  # whether or not to track gradients
print(float_32_tensor)
print(float_32_tensor.dtype)

tensor([3., 6., 9.])
torch.float32


In [None]:
# create a tensor with a different data type
float_16_tensor_2 = float_32_tensor.type(torch.half)
float_16_tensor_2

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
# let's multiple tensors of different data type
c = float_16_tensor_2 * float_32_tensor
print(c)

# the datatype of the result of multiplication of tensors of different data types
print(c.dtype)

tensor([ 9., 36., 81.])
torch.float32


### Getting information from tensors.

Data type, shape and the device on which is running a tensor are the most important attributes of a tensor.

But, we said that we can't do operations with tensors of different data types! 

In [None]:
# let's try multiple tensors with int, float data types
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)  # replace int32 with: int64, long
print(int_32_tensor)

d = float_32_tensor * int_32_tensor
print(d, d.dtype)

tensor([3, 6, 9], dtype=torch.int32)
tensor([ 9., 36., 81.]) torch.float32


In [None]:
# create a tensor
some_tensor = torch.rand((3, 4), dtype=torch.float16) # replace float16 with int16 !!

# find out details about it - tensor attributes!
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Shape of tensor using the size funnction: {some_tensor.size()}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.6226, 0.3965, 0.4126, 0.3257],
        [0.8613, 0.4858, 0.1504, 0.8657],
        [0.0386, 0.0532, 0.8345, 0.9546]], dtype=torch.float16)
Shape of tensor: torch.Size([3, 4])
Shape of tensor using the size funnction: torch.Size([3, 4])
Datatype of tensor: torch.float16
Device tensor is stored on: cpu


### Manipulating Tensors.

Tensor operations: addition, subtraction, element-wise multiplication, division and matrix multiplication.

In [None]:
 # create a tensor
 tensor = torch.tensor([1, 2 , 3])

 # add a scalar to a tensor
 print(tensor + 10)

 # multiple a scalar to a tensor
 print(tensor * 10)

 # subtract a scalar from a tensor
 print(tensor - 10)

tensor([11, 12, 13])
tensor([10, 20, 30])
tensor([-9, -8, -7])


In [None]:
# build-in functions 

# multiplication
print(torch.mul(tensor, 10))

# addition
print(torch.add(tensor, 10))

# subtraction
print(torch.sub(tensor, 10))

tensor([10, 20, 30])
tensor([11, 12, 13])
tensor([-9, -8, -7])


In [None]:
# element wise multiplication 1st way
print(tensor, "*", tensor, "equals: ", tensor * tensor )

# element wise multiplication 2nd way
print(tensor, "*", tensor, "equals: ", torch.mul(tensor, tensor) )

tensor([1, 2, 3]) * tensor([1, 2, 3]) equals:  tensor([1, 4, 9])
tensor([1, 2, 3]) * tensor([1, 2, 3]) equals:  tensor([1, 4, 9])


In [None]:
# matrix multiplication using a torch method
%%time
print(torch.matmul(tensor, tensor))

tensor(14)
CPU times: user 2.88 ms, sys: 0 ns, total: 2.88 ms
Wall time: 2.78 ms


In [None]:
# matrix multiplication using a for-loop and compare computing time with the above  
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]  # "tensor is not callable" as an error if you put () instead of [] 
print(value)

tensor(14)
CPU times: user 3.09 ms, sys: 4 µs, total: 3.09 ms
Wall time: 3.11 ms


In [None]:
# matmul is also equivalent with @
print(tensor @ tensor )

tensor(14)


Matrix multiplication rules: 

1) the inner dimensions must match, eg (3,2) @ (2,3).

2) the resulting matrix has the shape of the outer dimensions, eg for a (3,2) @ (2,3), the resulting shape will be (3,3).

In [None]:
# a right matrix multiplication
c = torch.matmul(torch.rand(4,3) ,torch.rand(3,4) )
print(c)
print(c.shape)

tensor([[0.5924, 0.3160, 0.7056, 0.4596],
        [1.0213, 0.5807, 0.3533, 0.7266],
        [0.9731, 0.6465, 0.9755, 0.6812],
        [0.9735, 0.7013, 0.9302, 0.6517]])
torch.Size([4, 4])


In [None]:
# a right matrix multiplication2
c = torch.matmul(torch.rand(4,10000) ,torch.rand(10000,4) )
print(c)
print(c.shape)

tensor([[2512.8240, 2529.8667, 2486.0686, 2552.2571],
        [2490.3604, 2501.7554, 2452.2800, 2499.5657],
        [2515.2690, 2530.7410, 2478.6091, 2513.1550],
        [2510.8469, 2527.5989, 2477.4954, 2528.8931]])
torch.Size([4, 4])


In [None]:
# Shapes need to be in the right way when matrix multiplication
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) # (this will error because we have a multiplication between two (3,2) matrices)

RuntimeError: ignored

To fix our tensor shape issues, we can manipulate the shape of one of our tensors using a transpose. A transpose switches the axes or dimensions of a given tensor.

In [None]:
print(tensor_B)
print(tensor_B.shape)

# transpose of B tensor
print(tensor_B.T)
print(tensor_B.T.shape)

tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])
torch.Size([3, 2])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])
torch.Size([2, 3])


In [None]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
# torch.mm is a shortcut for matmul
output = torch.mm(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


### Finding the min, max, mean, sum, etc (tensor aggregation).

In [None]:
x = torch.arange(0, 100, 10)
print(x)

# find the min/max
print(torch.min(x), x.min())
print(torch.max(x), x.max().item())

# find the mean
print(torch.mean(x))

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
tensor(0) tensor(0)
tensor(90) 90


RuntimeError: ignored

In [None]:
# let's see what is the data type of x
print(x.dtype)

# due to the error message, we need to change the data type to float32 as it is a requirement for using mean()
print(torch.mean(x.type(torch.float32)))


torch.int64
tensor(45.)


In [None]:
# find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

In [None]:
# finding the positional min, the index of minimum value occured in a tensor
print(x.argmin(), torch.argmin(x), x.argmin().item())

# find the positional max
print(x.argmax(), torch.argmax(x), x.argmax().item())

tensor(0) tensor(0) 0
tensor(9) tensor(9) 9


### Reshaping, stacking, squeezing and unsqueezing tensors.

Here, we will see some ways to change tensor's shape or dimensions.

In [None]:
# reshape an input tensor to a defined shape
x = torch.arange(1., 10.)
print(x, x.shape)

# add an extra dimension(an extra [] pair to tensor)
x_reshaped = x.reshape(1, 9)
print(x_reshaped, x_reshaped.shape)

x_reshaped2 = x.reshape(9, 1)
print(x_reshaped2, x_reshaped2.shape)

# this will produce an error because we want to add a dimension without any new elements
# try changing the (1,9)
x_reshaped3 = x.reshape(9, 2)
print(x_reshaped3, x_reshaped3.shape)

tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]) torch.Size([9])
tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]) torch.Size([1, 9])
tensor([[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]]) torch.Size([9, 1])


RuntimeError: ignored

In [None]:
# when reshaping, we can only change the dimensions for the specific number of elements we have
x2 = torch.arange(1., 11.)  # now we have 10 elements
print(x2, x2.shape)

x2_reshaped = x2.reshape(5, 2)
print(x2_reshaped, x2_reshaped.shape)

x3 = torch.arange(1., 13.)  # now we have 12 elements
print(x3, x3.shape)

x3_reshaped = x3.reshape(4, 3)
print(x3_reshaped, x3_reshaped.shape)

tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]) torch.Size([10])
tensor([[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]]) torch.Size([5, 2])
tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]) torch.Size([12])
tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]]) torch.Size([4, 3])


In [None]:
# view the input tensor of a certain shape but keep the same memory as the original tensor
z = x.view(1, 9)  # z is a different view of x, bt has the same memory
print(z, z.shape)

# changing z changes x (because a view of a tensor shares the same memory as the original input)
z[:, 0] = 5 # just change the first element of z
print(z, x)

# and if we change an element of x, the same happens to z, because z is a view of x !!
x[1] = 10
print(x, z)

# but if we change an element of x_reshaped, the same happens!
x_reshaped[:, 3] = 30
print(x_reshaped, x)

tensor([[ 5., 10.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]) torch.Size([1, 9])
tensor([[ 5., 10.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]) tensor([ 5., 10.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])
tensor([ 5., 10.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]) tensor([[ 5., 10.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]])
tensor([[ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.]]) tensor([ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.])


In [None]:
# stacking: combine multiple tensors on top of each other(vstack) or side by side(hstack)
x_stacked = torch.stack([x, x, x, x], dim=0)
print(x_stacked)

# if we change dim to 1
x_stacked = torch.stack([x, x, x, x], dim=1)
print(x_stacked)

tensor([[ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.],
        [ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.],
        [ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.],
        [ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.]])
tensor([[ 5.,  5.,  5.,  5.],
        [10., 10., 10., 10.],
        [ 3.,  3.,  3.,  3.],
        [30., 30., 30., 30.],
        [ 5.,  5.,  5.,  5.],
        [ 6.,  6.,  6.,  6.],
        [ 7.,  7.,  7.,  7.],
        [ 8.,  8.,  8.,  8.],
        [ 9.,  9.,  9.,  9.]])


In [None]:
# squeezing: remove all single dimensions from a target tensor
print(f"Previous tensor: {x_reshaped}")
print(f"Previous tensor shape: {x_reshaped.shape}")

x_squeezed = x_reshaped.squeeze() # notice we have 1 [] pair less

print(f"\nNew tensor: {x_squeezed}") 
print(f"New tensor's shape: {x_squeezed.shape}")

#torch.squeeze

Previous tensor: tensor([[ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.]])
Previous tensor shape: torch.Size([1, 9])

New tensor: tensor([ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.])
New tensor's shape: torch.Size([9])


In [None]:
# unsqueezing: add a single dimension to a target tensor
print(f"Previous target: {x_squeezed}")
print(f"Previous target's shape: {x_squeezed.shape}")

x_unsqueezed = x_squeezed.unsqueeze(dim=0) # notice we have a [] pair more

print(f"\nNew tensor: {x_unsqueezed}")
print(f"New tensor's shape: {x_unsqueezed.shape}")

# set dim=1 here
x_unsqueezed2 = x_squeezed.unsqueeze(dim=1)
print(f"\nNew tensor: {x_unsqueezed2}")
print(f"New tensor's shape: {x_unsqueezed2.shape}")

Previous target: tensor([ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.])
Previous target's shape: torch.Size([9])

New tensor: tensor([[ 5., 10.,  3., 30.,  5.,  6.,  7.,  8.,  9.]])
New tensor's shape: torch.Size([1, 9])

New tensor: tensor([[ 5.],
        [10.],
        [ 3.],
        [30.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.]])
New tensor's shape: torch.Size([9, 1])


In [None]:
# permute: rearrange the dimensions of a target in a specified order(a different view of the target)
x_original = torch.rand(size=(224, 224, 3))
print(f"Previous shape: {x_original.shape}")

# permute the original tensor
x_permuted = x_original.permute(2, 1, 0)
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [None]:
# let's change a value in x_original and see if this also changes in x_permuted
x_original[0, 0, 0] = 21
print(x_permuted[0, 0, 0])  
# if we change the value of an element in x_original, this also changes in x_permuted, as x_permuted is a view of x_original=> has the same memory 

tensor(21.)


### Indexing: selecting data from tensors.

In [None]:
# create a tensor
x = torch.arange(1, 10).reshape(1, 3, 3)
print(x, x.shape)

# index on the first dimension
print(x[0])

# index on the middle dimension
print(x[0][0])

# index on the last dimension
print(x[0][0][1])
print(x[0][1][1]) # try 1 instead of 0 here

# get number 9
print(x[0][2][2].item())

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]]) torch.Size([1, 3, 3])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(2)
tensor(5)
9


In [None]:
# use : to select all of a target dimension

# get all values of the first row
print(x[:, 0])

# get all values of 0th and 1st dimensions but only index 1 of the 2nd dim
print(x[:, :, 1])

# get all values of the 0th dim but only the first index of the 1st and 2nd dims
print(x[:, 1, 1]) # see the difference in output x[0][1][1]

# get index 0 of 0th and 1st dims and all values of the 2nd dim
print(x[0, 0, :]) 

# return 3,6,9
print(x[0, :, 2])

tensor([[1, 2, 3]])
tensor([[2, 5, 8]])
tensor([5])
tensor([1, 2, 3])
tensor([3, 6, 9])


### Pytorch tensors and Numpy.

In [None]:
# numpy array to tensor
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
# notice that the datatype is float64 here by default when using from_numpy
tensor = torch.from_numpy(array).type(torch.float32)
print(tensor.dtype)

torch.float32


In [None]:
# if we change the value of the array, what happens to the tensor?
array = array + 10
array, tensor

(array([11., 12., 13., 14., 15., 16., 17.]),
 tensor([1., 2., 3., 4., 5., 6., 7.]))

In [None]:
# tensor to numpy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [None]:
# if we change the tensor, what happens to the array?
tensor = tensor * 10
tensor, numpy_tensor

(tensor([1000., 1000., 1000., 1000., 1000., 1000., 1000.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility: trying to take the random out of random.

To reduce the randomness in neural networks and Pytorch comes the concept of a random seed. What a random seed does is "flavour" the randomness.

In [5]:
import torch

# create random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)
print(random_tensor_A)
print(random_tensor_B)
# compare the tensors
print(random_tensor_A == random_tensor_B)

tensor([[0.2802, 0.2360, 0.4059, 0.7437],
        [0.8550, 0.9044, 0.3369, 0.4495],
        [0.3740, 0.8168, 0.5070, 0.4200]])
tensor([[0.3593, 0.0939, 0.5315, 0.4974],
        [0.7778, 0.4926, 0.0943, 0.7226],
        [0.2670, 0.8699, 0.9745, 0.4452]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [8]:
# let's make some random but reproducible tensors

# set random seed and see below how to use it
RANDOM_SEED = 42  # different flavours of randomness

torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

# random seed makes the above random numbers reproducable
torch.manual_seed(RANDOM_SEED)  
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
# compare the tensors
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


### Running on GPUs.

GPUs = faster computation on numbers, thanks to cuda + NVIDIA hardware + Pytorch working behind the scenes.

Getting a GPU:

1. Use Google Colab.

2. Use your own GPU.

3. Use cloud computing(GCP, AWS, Azure, Alibaba)

For 2, 3 we need to do some settings. More on Pytorch documentation.

The first way is the easiest, as all the connections between gpu and Pytorch are already set up for us.

In [1]:
# set GPU as runtime
!nvidia-smi

Tue Jan 10 12:20:47 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   70C    P0    32W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
# check for gpu access with Pytorch
import torch
torch.cuda.is_available()

True

For Pytorch since it's capable of running compute on the GPU or CPU, it's best practice to setup agnostic code.

In [2]:
# setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

If you have to run many models and also have many gpus, it is a good strategy to run one model on one gpu, the other model on the other gpu, ...

In [5]:
# count number of devices
torch.cuda.device_count()

1

### Putting tensors and models on GPUs -> faster computations.

We have set runtime as GPU.

In [3]:
# create a tensor (default on CPU)
import torch
tensor = torch.tensor([1, 2, 3], device="cpu")

# tensor not on GPU
print(tensor, tensor.device)

tensor2 = torch.tensor([4, 5, 3])

# tensor not on GPU
print(tensor2, tensor2.device)

tensor([1, 2, 3]) cpu
tensor([4, 5, 3]) cpu


In [4]:
# move tensor to GPU if available
# device enables gpu, if available. see above how is defined
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

If a tensor is on GPU, can't transform it to numpy, as numpy doesn't work on GPUs.

This is one of the 3 most important issues in Pytorch: device errors.

In [5]:
tensor_on_gpu.numpy()

TypeError: ignored

In [7]:
# move tensor back to cpu 
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

In [8]:
# and, of course, tensor on gpu remains unchanged
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

## Exercises.

Solutions to these [exercises](https://github.com/mrdbourke/pytorch-deep-learning/blob/main/extras/exercises/00_pytorch_fundamentals_exercises.ipynb).

1. Create a random tensor with shape (7, 7)

In [10]:
import torch
random_tensor = torch.rand(7, 7)
random_tensor, random_tensor.shape

(tensor([[0.0251, 0.6630, 0.8773, 0.7993, 0.6948, 0.0811, 0.7473],
         [0.6359, 0.7707, 0.8754, 0.4303, 0.2250, 0.5583, 0.4704],
         [0.8060, 0.2025, 0.1594, 0.1986, 0.7511, 0.4720, 0.1603],
         [0.2936, 0.8581, 0.9388, 0.9392, 0.2446, 0.5322, 0.8970],
         [0.5316, 0.4789, 0.5509, 0.3647, 0.1375, 0.5066, 0.5260],
         [0.6056, 0.0126, 0.2266, 0.3725, 0.9370, 0.6815, 0.8122],
         [0.5350, 0.4653, 0.6274, 0.8224, 0.3070, 0.2177, 0.8336]]),
 torch.Size([7, 7]))

2. Perform a matrix multiplication on the above tensor with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).

In [11]:
random_tensor2 = torch.rand(1, 7)
random_tensor2, random_tensor2.shape

(tensor([[0.0263, 0.0517, 0.4308, 0.3235, 0.4139, 0.8976, 0.1298]]),
 torch.Size([1, 7]))

In [12]:
tensor = torch.matmul(random_tensor, random_tensor2.T)
tensor, tensor.shape

(tensor([[1.1288],
         [1.2282],
         [0.9199],
         [1.4556],
         [0.9739],
         [1.3397],
         [1.0051]]), torch.Size([7, 1]))

3. Set the random seed to 0 and do 1 & 2 over again.

In [17]:
RANDOM_SEED = 0

torch.manual_seed(RANDOM_SEED)
random_tensor3 = torch.rand(7, 7)
random_tensor4 = torch.rand(1, 7)

tensor2 = torch.matmul(random_tensor3, random_tensor4.T)

print(random_tensor3)
print(random_tensor4)
print(random_tensor3 == random_tensor4)

print(tensor2)

tensor([[0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901],
        [0.8964, 0.4556, 0.6323, 0.3489, 0.4017, 0.0223, 0.1689],
        [0.2939, 0.5185, 0.6977, 0.8000, 0.1610, 0.2823, 0.6816],
        [0.9152, 0.3971, 0.8742, 0.4194, 0.5529, 0.9527, 0.0362],
        [0.1852, 0.3734, 0.3051, 0.9320, 0.1759, 0.2698, 0.1507],
        [0.0317, 0.2081, 0.9298, 0.7231, 0.7423, 0.5263, 0.2437],
        [0.5846, 0.0332, 0.1387, 0.2422, 0.8155, 0.7932, 0.2783]])
tensor([[0.4820, 0.8198, 0.9971, 0.6984, 0.5675, 0.8352, 0.2056]])
tensor([[False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False]])
tensor([[1.8542],
        [1.9611],
        [2.2884],
  

4. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one)

If there is, set the GPU random seed to 1234.

In [18]:
# check for gpu access with Pytorch
import torch
torch.cuda.is_available()

True

In [19]:
# Set random seed on the GPU
torch.cuda.manual_seed(1234)

5. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).

In [20]:
# Set random seed
torch.manual_seed(1234)

# Check for access to GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# Create two random tensors on GPU
tensor_A = torch.rand(size=(2,3)).to(device)
tensor_B = torch.rand(size=(2,3)).to(device)
tensor_A, tensor_B

Device: cuda


(tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='cuda:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='cuda:0'))

6. Perform a matrix multiplication on the tensors you created in 5 (again, you may have to adjust the shapes of one of the tensors).

In [21]:
tensor_C = torch.mm(tensor_A, tensor_B.T)
tensor_C, tensor_C.shape

(tensor([[0.3647, 0.4709],
         [0.5184, 0.5617]], device='cuda:0'), torch.Size([2, 2]))

7. Find the maximum and minimum values of the output of 6.

In [22]:
torch.min(tensor_C), torch.max(tensor_C)

(tensor(0.3647, device='cuda:0'), tensor(0.5617, device='cuda:0'))

8. Find the maximum and minimum index values of the output of 6.

In [23]:
torch.argmin(tensor_C), torch.argmax(tensor_C)

(tensor(0, device='cuda:0'), tensor(3, device='cuda:0'))

9. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [24]:
random_tensor_D = torch.rand(1, 1, 1, 10)
print(random_tensor_D, random_tensor_D.shape)

random_tensor_E = random_tensor_D.squeeze()
print(random_tensor_E, random_tensor_E.shape)

tensor([[[[0.7749, 0.8208, 0.2793, 0.6817, 0.2837, 0.6567, 0.2388, 0.7313,
           0.6012, 0.3043]]]]) torch.Size([1, 1, 1, 10])
tensor([0.7749, 0.8208, 0.2793, 0.6817, 0.2837, 0.6567, 0.2388, 0.7313, 0.6012,
        0.3043]) torch.Size([10])
