# Dependencies

In [71]:
# Dependencies
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.2.2


# 0 - PyTorch fundamentals
Following along with Daniel Bourke's "Learn PyTorch for deep leraning in a day. Literally." video on youtube.

## 0.1 - Introduction to Tensors

In [72]:
# scalar
scalar = torch.tensor(7)
'''
Tensors are one of the most common data classes in PyTorch,
see documentation on tensors for more.
'''
scalar

tensor(7)

In [73]:
scalar.ndim # a scalar has no dimensions, it's just the number 7

0

In [74]:
# we can get the number back as a python int with:
scalar.item()

7

In [75]:
# Vectors
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [76]:
vector.ndim # a vector has 1 dimension

1

In [77]:
vector.shape # but the shape of the vector is 2, i.e. 2 elements x 1 dimension

torch.Size([2])

In [78]:
# MATRIX (capitalized for a reason)
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [79]:
MATRIX.ndim # a matrix has 2 dimensions

2

In [80]:
MATRIX.shape # with a shape of 2x2, or 2 elements by 2 dimensions

torch.Size([2, 2])

In [81]:
# TENSOR (also capitalized for a reason)
TENSOR = torch.tensor([[[1,2,3],
                        [3,4,5],
                        [4,5,6]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 4, 5],
         [4, 5, 6]]])

In [82]:
TENSOR.ndim # this tensor has 3 dimensions

3

In [83]:
'''
and a shape of 1x3x3
because it is a 3x3 matrix, encased into 1 dimension.
Think about the brackets and how indexing them would return.
i.e. returning index [0] returns a single item, the entire 3x3 matrix.
'''
TENSOR.shape

torch.Size([1, 3, 3])


 | name | dimensions | lower or upper case |
| -- | -- | -- |
| scalar | 0 | lower case (a) |
| vector | 1 | lower case (y) |
| matrix | 2 | upper case (Q) |
| tensor | any | upper case (X) |



## 0.2 - Random tensors


why random tensors?
Random tensors are a big part of PyTorch because the way many neural networks (NN) learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

`Start with random numbers -> loop ad infinitum:`


`(look at data -> update random numbers -> look at data -> update random numbers ->)`

In [84]:
# Create a random tensor of size or shape
# random tensor of size (3,4)
random_tensor = torch.rand(3,4)
random_tensor # 3 elements, 4 deep (i.e. 3 x 4-element vectors)

tensor([[0.6805, 0.5524, 0.7945, 0.2576],
        [0.9794, 0.1941, 0.5377, 0.7933],
        [0.4224, 0.9443, 0.9274, 0.3598]])

In [85]:
random_tensor = torch.rand(1,10,10)
random_tensor # 1 element composed of 10 individual 10-element vectors

tensor([[[0.6364, 0.0663, 0.1963, 0.2635, 0.9553, 0.7958, 0.9118, 0.2964,
          0.0061, 0.7412],
         [0.5688, 0.1555, 0.1630, 0.6196, 0.9512, 0.6794, 0.6971, 0.2679,
          0.2692, 0.6257],
         [0.4463, 0.6241, 0.0757, 0.2507, 0.2752, 0.1184, 0.7453, 0.8574,
          0.9678, 0.0983],
         [0.0958, 0.3229, 0.8760, 0.3111, 0.7218, 0.7683, 0.1087, 0.9923,
          0.3560, 0.4227],
         [0.8560, 0.2167, 0.9006, 0.7461, 0.7490, 0.6860, 0.4288, 0.3845,
          0.7519, 0.7475],
         [0.0408, 0.2080, 0.9331, 0.1151, 0.1764, 0.1510, 0.3464, 0.4892,
          0.8101, 0.5210],
         [0.8248, 0.0430, 0.8541, 0.7296, 0.0055, 0.5293, 0.1847, 0.3258,
          0.4641, 0.9918],
         [0.6583, 0.0898, 0.9128, 0.8830, 0.5350, 0.1027, 0.7965, 0.0496,
          0.7980, 0.0511],
         [0.6344, 0.8710, 0.3288, 0.0629, 0.5193, 0.0769, 0.7967, 0.7998,
          0.8244, 0.2102],
         [0.7879, 0.7952, 0.3704, 0.4859, 0.3702, 0.3559, 0.0215, 0.0189,
          0.8877,

In [86]:
# create a random tensor with similar shape to an image tensor

# height, width, color channels (R, G, B)
# not necessarily always this order
random_image_size_tensor = torch.rand(size=(224, 224, 3))

random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [87]:
# create a random tensor of any size and shape

'''
1 element composed of(
  2 elements composed of(
    3 elements composed of(
      4 elements composed of(
        5 element vectors
        )
      )
    )
  )
)
'''
my_random_tensor = torch.rand(size = (1, 2, 3, 4, 5))
my_random_tensor

tensor([[[[[0.1926, 0.3112, 0.5714, 0.6479, 0.5418],
           [0.5288, 0.5626, 0.1920, 0.8414, 0.5355],
           [0.3492, 0.0990, 0.2362, 0.8896, 0.6123],
           [0.6687, 0.2505, 0.6890, 0.0069, 0.3693]],

          [[0.8972, 0.4404, 0.2713, 0.1588, 0.5622],
           [0.7863, 0.8578, 0.1259, 0.8301, 0.0264],
           [0.8544, 0.9668, 0.9897, 0.7940, 0.6163],
           [0.3618, 0.9629, 0.6518, 0.4611, 0.7780]],

          [[0.1799, 0.0212, 0.8506, 0.1346, 0.7098],
           [0.4207, 0.8786, 0.4291, 0.1639, 0.3011],
           [0.8862, 0.0941, 0.0500, 0.0892, 0.6325],
           [0.8116, 0.7563, 0.7733, 0.9669, 0.7805]]],


         [[[0.7845, 0.7647, 0.2717, 0.3462, 0.3721],
           [0.9759, 0.2582, 0.6814, 0.2667, 0.8110],
           [0.7834, 0.8318, 0.4948, 0.7349, 0.9664],
           [0.4082, 0.3307, 0.7469, 0.9627, 0.4184]],

          [[0.4656, 0.4783, 0.0155, 0.8484, 0.3202],
           [0.9060, 0.8003, 0.9755, 0.8250, 0.7258],
           [0.1189, 0.8899, 0.7305, 

### Zeroes and Ones tensors

In [88]:
# create a tensor of all zeros

# useful for creating a mask
zero = torch.zeros(size = (3, 4))
random_tensor = torch.rand(size = (3, 4))
# can use that zero tensor to zero out another tensor

### Creating a range of tensors and tensors-like


In [89]:
# torch.range() may be deprecated
torch.range(0,10)

  torch.range(0,10)


tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [90]:
# instead use torch.arange()
torch.arange(0,10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [91]:
# can also create steps with torch.arange()
stepped_range = torch.arange(start=0, end=1000, step=77)
stepped_range

tensor([  0,  77, 154, 231, 308, 385, 462, 539, 616, 693, 770, 847, 924])

In [92]:
# could also get funky with it
# like in this instance, if you wanted exactly 10 multiples of 77
step_size = 77
stepped_range = torch.arange(start = step_size, end = 11*step_size, step = step_size) # 11 because we want 10 multiples. the end is exclusive, meaning 11 is not included
stepped_range

tensor([ 77, 154, 231, 308, 385, 462, 539, 616, 693, 770])

Tensors-like
Say you have a shape that you want to replicate, but don't want to explicitly define what that shape should be

In [93]:
# tensors like
one_to_ten = torch.arange(start=1, end=11, step=1)
print(one_to_ten.shape)
print(one_to_ten)

ten_zeros = torch.zeros_like(input=one_to_ten)
print(f'\n{ten_zeros.shape}')
print(ten_zeros)

ten_ones = torch.ones_like(input=one_to_ten)
print(f'\n{ten_ones.shape}')
print(ten_ones)

torch.Size([10])
tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

torch.Size([10])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

torch.Size([10])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


## 0.3 - Tensor datatypes


Tensor datatypes is one of the 3 big errors you'll run into with Pytorch and deep learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device

float32 is referred to as single precision floating point


float16 is referred to as half precision floating point


more precision = more computing power required


less precision = less computing power required

In [94]:
# Float 32 tensor
# when unspecified, the default dtype is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0])
float_32_tensor.dtype

torch.float32

In [95]:
# it's good to explicitly set the dtype, device, and required gradient
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype=None, # what datatype the tensor should be
                               device=None, # what device the tensor should be on
                               requires_grad=True # whether or not to track gradients
                               )


In [96]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor.dtype

torch.float16

In [98]:
# if you try to do tensor operations on tensors not on the same device, you'll get an error
cpu_tensor = torch.tensor([1,2,3], device='cpu')
gpu_tensor = torch.tensor([1,2,3], device='cuda')

cpu_tensor + gpu_tensor

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

some mismatched datatypes will result in an error

## 0.4 - Getting information from tensors (tensor attributes)

In [99]:
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.2116, 0.5383, 0.4137, 0.9415],
        [0.0590, 0.1405, 0.4789, 0.1742],
        [0.1329, 0.6018, 0.8005, 0.7299]])

In [103]:
# find out details about some_tensor
print(some_tensor)
print(f'Datatype: {some_tensor.dtype}\n',
        f'Shape: {some_tensor.shape}\n',
        f'Device: {some_tensor.device}')



tensor([[0.2116, 0.5383, 0.4137, 0.9415],
        [0.0590, 0.1405, 0.4789, 0.1742],
        [0.1329, 0.6018, 0.8005, 0.7299]])
Datatype: torch.float32
 Shape: torch.Size([3, 4])
 Device: cpu


In [104]:
# change the device a pytorch tensor is on
print(some_tensor.device)
cuda_some_tensor = some_tensor.to(device='cuda')
print(cuda_some_tensor.device)

cpu
cuda:0


## 0.5 - Manipulating tensors (tensor operations)
https://pytorch.org/docs/stable/generated/torch.matmul.html


Tensor operations include:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix multiplication

### 0.51 - Basic Arithmetic

In [109]:
# create a tensor
tensor = torch.tensor([1, 2, 3], dtype=torch.int32)

In [110]:
# addition
tensor + 10

tensor([11, 12, 13], dtype=torch.int32)

In [111]:
# subtraction
tensor - 10

tensor([-9, -8, -7], dtype=torch.int32)

In [112]:
# element-wise multiplication
tensor * 10

tensor([10, 20, 30], dtype=torch.int32)

In [113]:
# There are also inbuilt functions for tensor operations
torch.mul(tensor, 10) # multiplication (element-wise)

tensor([10, 20, 30], dtype=torch.int32)

In [114]:
# Division
tensor / 2

tensor([0.5000, 1.0000, 1.5000])

### 0.52 Matrix multiplication
Two main ways of performing multiplication in neural networks and deep learning:
- Element-wise multiplication (x*y)
- Matrix multiplication

In [115]:
# Element-wise multiplication
print(tensor, "*", tensor)
print(f'Equals: {tensor*tensor}')

tensor([1, 2, 3], dtype=torch.int32) * tensor([1, 2, 3], dtype=torch.int32)
Equals: tensor([1, 4, 9], dtype=torch.int32)


In [116]:
# Matrix multiplication
print(tensor)
print(f'tensor*tensor with matrix multiplication: {torch.matmul(tensor, tensor)}')

tensor([1, 2, 3], dtype=torch.int32)
tensor*tensor with matrix multiplication: 14


In [117]:
# The result is 14 because:
1*1 + 2*2 + 3*3

14

In [118]:
%%time
# or in a for loop:
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
print(value)

tensor(14, dtype=torch.int32)
CPU times: user 1.6 ms, sys: 389 µs, total: 1.99 ms
Wall time: 1.47 ms


In [119]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 319 µs, sys: 77 µs, total: 396 µs
Wall time: 321 µs


tensor(14, dtype=torch.int32)

Even with such a small tensor, it is 10x slower to use the for loop than the PyTorch matrix multiplication function

#### Rules that must be satisfied with matrix multiplication
1. The **inner dimensions** must match: (@ is the symbol for matrix multiplication)
* `(3, 2) @ (3, 2)` won't work
* `(2, 3) @ (3, 2)` will work
* `(3, 2) @ (2, 3)` will work


`torch.matmul(torch.rand(3, 2), torch.rand(3, 2))`

RuntimeError: ***mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)***

This happens because when mat1 row is multiplied by mat2 column, mat1 has 3 values, and mat2 has only 2


2. The resulting matrix has the shape of the **outer dimensions**:

`(2, 3) @ (3, 2)` --> (2, 2)

**inner dimensions** match; rule 1 satisfied.

so the resulting matrix will have dimensions (2, 2) from the **outer dimensions**

In [120]:
# (2, 3) @ (3, 2)
torch.matmul(torch.rand(2, 3), torch.rand(3, 2))

tensor([[0.4234, 0.2344],
        [1.0673, 0.6612]])

One of the most common errors in deep learning: shape errors

In [121]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]])

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])

In [122]:
# torch.mm is a shortcut for matmul
torch.mm(tensor_A, tensor_B)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [123]:
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

We were creating tensors on the fly before, but these already exist, how can we change their shape?

transposition!

In [124]:
tensor_B

tensor([[ 7, 10],
        [ 8, 11],
        [ 9, 12]])

In [125]:
tensor_B.T

tensor([[ 7,  8,  9],
        [10, 11, 12]])

In [126]:
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

Now, having transposed tensor_B (tensor_B.T), we have:

In [127]:
print(f' tensor_A.shape: {tensor_A.shape}\n',
      f'tensor_B.T.shape: {tensor_B.T.shape}\n',
      f'    or {tensor_A.shape} @ {tensor_B.T.shape}',
      f'\n\n Inner dimensions match ((3, 2) @ (2, 3)),\n resulting matrix has shape of outer dimensions (3, 3)'
      f'\n\n',
      f'torch.mm(tensor_A, tensor_B.T):\n {torch.mm(tensor_A, tensor_B.T)}')

 tensor_A.shape: torch.Size([3, 2])
 tensor_B.T.shape: torch.Size([2, 3])
     or torch.Size([3, 2]) @ torch.Size([2, 3]) 

 Inner dimensions match ((3, 2) @ (2, 3)),
 resulting matrix has shape of outer dimensions (3, 3)

 torch.mm(tensor_A, tensor_B.T):
 tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


## 0.6 - Finding the min, max, mean, sum, etc. (Tensor Aggregation)

In [130]:
# create a tensor

# torch.arange(start, end, step)
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [132]:
# find the mein
torch.min(x), x.min()

(tensor(0), tensor(0))

In [133]:
# find the max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [134]:
# find the mean
torch.mean(x), x.mean()

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

Here finally is a "Tensor not the right datatype" errors.

The tensor we created is of type:

In [136]:
x.type()
# torch.int64 or torch.LongTensor, or in english it's a 64-bit signed integer

'torch.LongTensor'

The documentation doesn't explicitly state the dtype the data should be in, but from the RuntimeError we can see this should either be a floating point or a complex dtype

The floating points: (complex dtypes include things like imaginary literals, likely unnecessary)

| Data Type | dtype | 
| --- | --- |
| 32-bit float | torch.float32 or torch.float |
| 64-bit float | torch.float64 or torch.double |
| 16-bit float | torch.float16 or torch.half |



In [137]:
# torch.mean() requires a tensor of float numbers
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [138]:
# find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

## 0.7 - Finding the positional min ( argmin() ) and max ( argmax() )

Positional max and min return the index value of the maximum and minimum value within a tensor

In [150]:
x = torch.arange(1, 111, 10)
x

tensor([  1,  11,  21,  31,  41,  51,  61,  71,  81,  91, 101])

In [151]:
# argmin() returns the index of the min value
x.argmin()

tensor(0)

In [152]:
# we can then use that argmin() value to get the actual min value
x[x.argmin()]

tensor(1)

In [153]:
# argmax() returns the index of the max value
x.argmax()

tensor(10)

In [154]:
# and similarly we can use this index to get the actual max value
x[x.argmax()]

tensor(101)

This is helpful for things like the softmax activation function, because we really don't care about **what** the maximum value is, as much as ***where*** the maximum value is

## 0.8 - Reshaping, stacking, squeezing and unsqueezing

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape bu tkeep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack for vertical stack; there are other types of stacks)