## PyTorch Fundamentals

In [3]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.2.0


## Introduction to Tensors

### Creating Tensors

In [14]:
# Scalar
scalar = torch.tensor(1)
print(scalar)
print(scalar.ndim)
print(scalar.item())

tensor(1)
0
1


In [16]:
# Vector
Vector = torch.tensor([1,1])
print(Vector)
print(Vector.ndim)
print(Vector.size())
print(Vector.shape)

tensor([1, 1])
1
torch.Size([2])
torch.Size([2])


In [25]:
# Matrix
Matrix = torch.tensor([[1,2],
                       [3,4]])
print(Matrix)
print(Matrix.ndim)
print(Matrix.size())
print(Matrix.shape)
print(Matrix[1])
print(Matrix[1,1])
print(Matrix[1][1])
print(Matrix[1,1].item())

tensor([[1, 2],
        [3, 4]])
2
torch.Size([2, 2])
torch.Size([2, 2])
tensor([3, 4])
tensor(4)
tensor(4)
4


In [32]:
# Tensor
Tensor = torch.tensor([[[1,2, 3],
                        [4, 5, 6]],
                       [[7, 8, 9],
                        [10,11,12]],
                        [[13,14,15],
                         [16,17,18]],
                         [[19,20,21],
                         [22,23,24]]])
# purple = dim 0
# blue = dim 1
# yellow = dim 2
# Goes from outside to inside

print(Tensor)
print(Tensor.ndim)
print(Tensor.size())
print(Tensor.shape)
print(Tensor[0])
print(Tensor[0,1])
print(Tensor[0,1,1])
print(Tensor[0,1,1].item())

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]],

        [[13, 14, 15],
         [16, 17, 18]],

        [[19, 20, 21],
         [22, 23, 24]]])
3
torch.Size([4, 2, 3])
torch.Size([4, 2, 3])
tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([4, 5, 6])
tensor(5)
5


### Random Tensors

Why random tensors?  
  
Important because the way many NNs learn is that they start with Tensors full of random numbers and then adjust those random numbers to better represent the data.  

`Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers`

In [35]:
# Create a random tensor of size or shape (3, 4)
random_tensor = torch.rand(3, 4)
print(random_tensor)
print(random_tensor.ndim)

tensor([[0.0125, 0.1918, 0.0802, 0.0327],
        [0.2697, 0.8742, 0.3134, 0.9401],
        [0.7069, 0.7610, 0.2499, 0.7811]])


In [36]:
# Create a random tensor with shape like an image tensor
random_image_size_tensor = torch.rand(size=(224,224,3)) # height, width, colour channels
print(random_image_size_tensor.shape)
print(random_image_size_tensor.ndim)

torch.Size([224, 224, 3])
3


### Zeros and Ones

In [38]:
# Create a tensor of all zeros
zeros = torch.zeros(3, 4)
print(zeros)
print(zeros*random_tensor)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


In [39]:
# Create a tensor of all ones
ones = torch.ones(3, 4)
print(ones)
print(ones.dtype)
print(random_tensor.dtype)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
torch.float32
torch.float32


### Creating a range of tensors and tensors-like 

In [46]:
# Use torch.range()
one_to_ten = torch.arange(start=1, end=11, step=1)
print(one_to_ten)

print(torch.arange(3, 14, 2))

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
tensor([ 3,  5,  7,  9, 11, 13])


In [49]:
# Creating tensors like
print(one_to_ten.shape)
ten_zeros = torch.zeros_like(input=one_to_ten)
print(ten_zeros)
print(ten_zeros.shape)
ten_ones = torch.ones_like(input=one_to_ten)
print(ten_ones)
print(ten_ones.shape)

torch.Size([10])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([10])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([10])


### Tensor Datatypes

** Note: ** Tensor datatypes is one of the 3 big errors you can run into with PyTorch & Deep learning:
1. Tensors not the right datatype
2. Tensors not the right shape
3. Tensors not on the right device

In [52]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype=None, # data type - default is float32
                               device=None, # CPU or GPU (cuda) - default is CPU
                               requires_grad=False) # Track computation of gradients - default is False

print(float_32_tensor)

float_16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float16)

tensor([3., 6., 9.])


In [54]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
print(int_32_tensor)
print(float_32_tensor)

print(float_32_tensor*int_32_tensor)

tensor([3, 6, 9], dtype=torch.int32)
tensor([3., 6., 9.])
tensor([ 9., 36., 81.])


### Getting information from tensors

1. To get data type of tensor - `tensor.dtype`
2. To get shape of tensor - `tensor.shape`
3. To get device tensor is on - `tensor.device`

In [55]:
some_tensor = torch.rand(3, 4)
print(some_tensor)
print(some_tensor.dtype)
print(some_tensor.shape)
print(some_tensor.device)

tensor([[0.6383, 0.3010, 0.4302, 0.3041],
        [0.6724, 0.4168, 0.0155, 0.6574],
        [0.8865, 0.6433, 0.9280, 0.2102]])
torch.float32
torch.Size([3, 4])
cpu


In [57]:
# size is a function, shape is an attribute
# attributes are without parenthesis, functions are with parenthesis
some_tensor.size, some_tensor.shape

(<function Tensor.size>, torch.Size([3, 4]))

### Manipulating Tensors - Tensor Operations

Tensor operations include:  
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix Multiplication

In [58]:
# Create a tensor and add 10 to each element
tensor = torch.tensor([1, 2, 3])
print(tensor+10)

tensor([11, 12, 13])


In [59]:
# Multiply each element by 10
print(tensor*10)

tensor([10, 20, 30])


In [60]:
tensor

tensor([1, 2, 3])

In [61]:
# Subtract 10 from each element
print(tensor-10)

tensor([-9, -8, -7])


In [63]:
# Try out pytorch in-built functions
# Just use python operators instead of these functions
print(torch.mul(tensor, 10))
print(torch.add(tensor, 10))
print(torch.sub(tensor, 10))
print(torch.div(tensor, 10))

tensor([10, 20, 30])
tensor([11, 12, 13])
tensor([-9, -8, -7])
tensor([0.1000, 0.2000, 0.3000])


### Matrix multiplication

Two main ways of performing multiplication in NNs and DL
1. Element-wise multiplication
2. Matrix Multiplication (or dot product)

There are two main rules that performing matrix multiplication needs to satisfy:
1. The inner dimensions must match
2. The resulting matrix has the shape of the outer dimensions


In [71]:
# Element wise multiplication
print(tensor)
print(tensor, "*", tensor)
print(f"Equals {tensor * tensor}")

tensor([1, 2, 3])
tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals tensor([1, 4, 9])


In [75]:
# Matrix multiplication
# All are the same
print(torch.matmul(tensor, tensor))
# print(torch.mm(tensor, tensor))
print(tensor @ tensor)

tensor(14)


RuntimeError: self must be a matrix

In [68]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 151 µs, sys: 205 µs, total: 356 µs
Wall time: 275 µs


tensor(14)

### One of the most common errors in DL: shape errors

In [76]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])

tensor_B = torch.tensor([[7,8],
                        [9,10],
                        [11,12]])

print(torch.mm(tensor_A, tensor_B))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [79]:
# Manipulate shape of the tensor for matmul

tensor_B_transpose = tensor_B.T
print(tensor_B)
print(tensor_B_transpose)

tensor([[ 7,  8],
        [ 9, 10],
        [11, 12]])
tensor([[ 7,  9, 11],
        [ 8, 10, 12]])


In [80]:
print(torch.mm(tensor_A, tensor_B_transpose))

tensor([[ 23,  29,  35],
        [ 53,  67,  81],
        [ 83, 105, 127]])


## Finding the min, max, mean, sum, etc (Tensor Aggregations)

In [90]:
# Create a tensor
x = torch.arange(1, 100, 10)
print(x)

# Find the min
print(torch.min(x))
print(x.min())

# Find the max
print(torch.max(x))
print(x.max())

# Find the mean
print(torch.mean(x))
print(x.mean())


tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])
tensor(1)
tensor(1)
tensor(91)
tensor(91)


RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [82]:
print(x.dtype) # int64 - long
# Torch.mean() returns a float tensor, not an int tensor

torch.int64


In [91]:
print(torch.mean(x.type(torch.float32)))
print(x.type(torch.float32).mean())
print(x.float().mean())
print(x.to(torch.float32).mean())

tensor(46.)
tensor(46.)
tensor(46.)
tensor(46.)


In [92]:
# find the sum
print(torch.sum(x))
print(x.sum())

tensor(460)
tensor(460)


## Finding the positional min and max

In [93]:
print(x)

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])


In [94]:
# Find the position in tensor that has the min and max value
# Returns the index of the min and max value
print(x.argmin())
print(x.argmax())

tensor(0)
tensor(9)


## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top (vstack) or side (hstack) of each other
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - adds a `1` dimension to a tensor
* Permute - return a view of the input with dimensions permutted(swapped) in a certain way

In [103]:
# Create a tensor
x = torch.arange(1., 10)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [104]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [105]:
x_reshaped = x.reshape(9, 1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [106]:
# Change the view
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [107]:
# Changing z will change x
# because z is a view of x, and shares the same memory
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [108]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [109]:
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [110]:
# Squeeze and Unsqueeze
# torch.squeeze() removes all the dimensions of size 1
print(x_reshaped.shape)
print(x_reshaped.squeeze().shape)

torch.Size([9, 1])
torch.Size([9])


In [111]:
print(x_reshaped.unsqueeze().shape)

TypeError: unsqueeze() missing 1 required positional arguments: "dim"

In [112]:
print(x_reshaped.unsqueeze(dim=0).shape) # adds a dimension at the beginning
print(x_reshaped.unsqueeze(dim=1).shape) # adds a dimension at the end 

torch.Size([1, 9, 1])
torch.Size([9, 1, 1])


In [116]:
# torch.permute() - rearrange the dimensions of a tensor in a specified order
# returns a VIEW of the original tensor with its dimensions rearranged
x_original = torch.rand(224, 224, 3)
print(x_original.shape)

x_permuted = x_original.permute(2, 0, 1)
print(x_permuted.shape)

print(x_permuted[0, 0, 0])
x_original[0, 0, 0] = 1000
print(x_original[0, 0, 0])
print(x_permuted[0, 0, 0]) # x_permuted is a view of x_original; changing x_original changes x_permuted

torch.Size([224, 224, 3])
torch.Size([3, 224, 224])
tensor(0.3817)
tensor(1000.)
tensor(1000.)


## Indexing (selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy

In [117]:
# Create a tensor
x = torch.arange(1, 10).reshape(1, 3, 3)
print(x)

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])


In [119]:
# Let's index on our new tensor
# Shape of the tensor is (1, 3, 3)
# Indexing on the first dimension leaves (3, 3)
# two dimensionsal tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [120]:
# Let's index on the second dimension
# This leaves (3)
# one dimensional tensor
x[0, 0] # or x[0][0]

tensor([1, 2, 3])

In [121]:
# Let's index on the third dimension
# This leaves a scalar 
# 0 dimensional tensor
x[0, 0, 0] # or x[0][0][0]

tensor(1)

In [125]:
x[0, 2, 1]

tensor(8)

In [126]:
# You can use ":" to select all elements in a dimension
x[:, 0]

tensor([[1, 2, 3]])

In [127]:
# Get all values of the 0th and 1st dimensions but only index 1 of the 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [128]:
# Get all values of the 0th dimension but only the 1 index value of the 1st and 2nd dimensions
x[:, 1, 1]

tensor([5])

In [129]:
x[0, 1, 1] # : gives extra dimension

tensor(5)

In [130]:
# Get index 0 of 0th and 1st dimension and all values of the 2nd dimension
x[0, 0, :]

tensor([1, 2, 3])

In [131]:
# Get number 9
x[:, 2, 2]

tensor([9])

In [132]:
# Get number 3, 6, 9
x[:, :, 2]

tensor([[3, 6, 9]])

## PyTorch tensors & Numpy

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy()`
* PyTorch tensor, want in NumPy -> `torch.tensor.numpy()`

In [150]:
# NP array to Torch tensor
import numpy as NP

array = np.arange(1., 8.)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [137]:
# NP default dtype is float64
# Torch default dtype is float32 - CAREFUL
array.dtype, tensor.dtype

(dtype('float64'), torch.float64)

In [149]:
# Change the value of array, will it change tensor?
array += 1
array, tensor 

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([2., 3., 4., 5., 6., 7., 8.], dtype=torch.float64))

In [151]:
# Tensor to NP array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor # PyTorch default dtype is float32

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [152]:
tensor += 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([2., 2., 2., 2., 2., 2., 2.], dtype=float32))

## Reproducibility (trying to take the random out of random)

In short how a NN learns:  
`start with random no -> look at data -> update random no -> look at data -> update random no to better represent data -> repeat`  
  
To reduce the randomness in NNs and PyTorch comes the concept of a random seed.

In [154]:
# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.2208, 0.1223, 0.9225, 0.7190],
        [0.6135, 0.9755, 0.6509, 0.5861],
        [0.4879, 0.2846, 0.8121, 0.7609]])
tensor([[0.5904, 0.5993, 0.7337, 0.8679],
        [0.3659, 0.9921, 0.7706, 0.8595],
        [0.5719, 0.3035, 0.3883, 0.0259]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [157]:
# Random but reproducible
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [158]:
# Random but reproducible
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_A = torch.rand(3, 4)

# Have to call torch.manual_seed() again
# everytime you want to generate random numbers
torch.manual_seed(RANDOM_SEED)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on GPUs (and making faster computations)

GPUs = faster computation on numbers

In [160]:
# Cant run since no nvidia GPU
torch.cuda.is_available()

False

In [161]:
# Set-up device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [162]:
# Count number of GPUs
torch.cuda.device_count()

0

In [163]:
# Putting Tensors (and models) on the GPU
# Create a tensor (default is CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor on CPU
print(tensor, tensor.device)

# Move tensor to GPU (doesnt work on my machine)
tensor = tensor.to(device)


tensor([1, 2, 3]) cpu


In [None]:
# Moving tensors back to CPU
# If tensor is on GPU, cant transform it to NumPy
# Have to move it back to CPU first
tensor = tensor.to('cpu') # or tensor.cpu()