In [1]:
import torch
torch.__version__

'2.1.2.post303'

Tensors are the fundamental building block of machine learning.

Their job is to represent data in a numerical way.

For example, you could represent an image as a tensor with shape [3, 224, 224] which would mean [colour_channels, height, width], as in the image has 3 colour channels (red, green, blue), a height of 224 pixels and a width of 224 pixels.

In [2]:
x = torch.tensor([[1., -1.], [1., 1.]], requires_grad=True)
out = x.pow(2).sum()
out.backward()
x.grad
# from the 10 min doc on tensor exercice

tensor([[ 2., -2.],
        [ 2.,  2.]])

A scalar is a single number and in tensor-speak it's a zero dimension tensor.

In [9]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [10]:
scalar.ndim

0

In [11]:
scalar.shape

torch.Size([])

In [14]:
int_from_scalar = scalar.item()
int_from_scalar

7

A vector is a single dimension tensor but can contain many numbers.

In [15]:
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [16]:
vector.ndim

1

In [17]:
vector.shape

torch.Size([2])

You can tell the number of dimensions a tensor in PyTorch has by the number of square brackets on the outside ( [ ) and you only need to count one side.

In [18]:
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [19]:
MATRIX.ndim

2

In [20]:
MATRIX.shape

torch.Size([2, 2])

When the first rank is higher than the second (3, 2), its a higher-dimensional tensor.
Matrice is often used to define a rank-2 tensor with two equal dimension, but basically they all can be refered as a tensor

In [21]:
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

Tensor can represent almost anything, first row could be day of the week, snd could bbe steak sales, trd could me almond butter sales

In [22]:
TENSOR.shape

torch.Size([1, 3, 3])

In [23]:
TENSOR.ndim

3

1 dimension of 3 by 3

In practice, you'll often see scalars and vectors denoted as lowercase letters such as y or a. And matrices and tensors denoted as uppercase letters such as X or W.

Tensor are basically when there is multiples dimensions of matrices (3, 3, 3)

In [24]:
random_tensor = torch.rand(size=(3,4))
random_tensor, random_tensor.dtype

(tensor([[0.5326, 0.5352, 0.0033, 0.1846],
         [0.9410, 0.0705, 0.2008, 0.3463],
         [0.5243, 0.8211, 0.0576, 0.4913]]),
 torch.float32)

In [25]:
random_image_size_tensor = torch.rand(size=(224,224,3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [26]:
random_image_size_tensor

tensor([[[0.2052, 0.6004, 0.7202],
         [0.7979, 0.3628, 0.6645],
         [0.9539, 0.9038, 0.0342],
         ...,
         [0.5378, 0.5794, 0.1041],
         [0.1811, 0.9960, 0.8762],
         [0.8865, 0.7775, 0.7062]],

        [[0.0153, 0.2631, 0.6181],
         [0.5503, 0.5494, 0.2004],
         [0.1290, 0.3686, 0.4253],
         ...,
         [0.8024, 0.3706, 0.2126],
         [0.6861, 0.8002, 0.2603],
         [0.1912, 0.2645, 0.4407]],

        [[0.9306, 0.0800, 0.2869],
         [0.0044, 0.7270, 0.9442],
         [0.9927, 0.9040, 0.7381],
         ...,
         [0.6263, 0.7559, 0.3630],
         [0.3406, 0.2156, 0.3027],
         [0.7415, 0.8358, 0.2161]],

        ...,

        [[0.4976, 0.1631, 0.9708],
         [0.3068, 0.7845, 0.7598],
         [0.3811, 0.1952, 0.5948],
         ...,
         [0.9786, 0.9677, 0.1044],
         [0.5374, 0.5927, 0.5220],
         [0.1230, 0.3933, 0.5625]],

        [[0.0942, 0.0673, 0.4279],
         [0.5942, 0.7212, 0.3334],
         [0.

In [28]:
zeros = torch.zeros(size=(3,4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [29]:
ones = torch.ones(size=(3,4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

In [32]:
zero_to_ten_deprecated = torch.arange(0,10)
zero_to_ten_deprecated

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [34]:
ten_zeros = torch.zeros_like(zero_to_ten_deprecated) #zeros_like is the data, (zero_to_ten..) is the input shape
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Generally if you see torch.cuda anywhere, the tensor is being used for GPU (since Nvidia GPUs use a computing toolkit called CUDA).

The higher the precision value (8, 16, 32), the more detail and hence data used to express a number.

This matters in deep learning and numerical computing because you're making so many operations, the more detail you have to calculate on, the more compute you have to use

In [38]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded 

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [39]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16) # torch.half would also work

float_16_tensor.dtype

torch.float16

In [40]:
some_tensor = torch.rand(3,4)

print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.8878, 0.8821, 0.7122, 0.2653],
        [0.2106, 0.1166, 0.4197, 0.3480],
        [0.6399, 0.2031, 0.8593, 0.0933]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


When you run into issues in PyTorch, it's very often one to do with one of the three attributes above. So when the error messages show up, sing yourself a little song called "what, what, where"

"what shape are my tensors? what datatype are they and where are they stored? what shape, what datatype, where where where"

In [41]:
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [42]:
tensor * 10

tensor([10, 20, 30])

In [44]:
tensor # don't change unless reassigned

tensor([1, 2, 3])

In [47]:
tensor - 10

tensor([-9, -8, -7])

In [48]:
torch.sub(tensor, 10)

tensor([-9, -8, -7])

In [49]:
sub_tensor = tensor - 10
sub_tensor

tensor([-9, -8, -7])

In [50]:
tensor + 10

tensor([11, 12, 13])

In [51]:
torch.multiply(tensor, 10)

tensor([10, 20, 30])

In [52]:
print(tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


The main two rules for matrix multiplication to remember are:

    The inner dimensions must match:
        (3, 2) @ (3, 2) won't work
        (2, 3) @ (3, 2) will work
        (3, 2) @ (2, 3) will work

    The resulting matrix has the shape of the outer dimensions:
        (2, 3) @ (3, 2) -> (2, 2)
        (3, 2) @ (2, 3) -> (3, 3)

In [54]:
tensor = torch.tensor([1,2,3])
tensor.shape

torch.Size([3])

Element-wise matrix multiplication:

        Also known as the Hadamard product, is a binary operation beetwen two samedimensions   matrix, returning a matrix of the multiplied ocrresponding elements, 
        
[1 * 1 , 2 * 2 , 3 * 3] = [1, 4, 9]

In [55]:
tensor * tensor

tensor([1, 4, 9])

In [56]:
torch.matmul(tensor, tensor)

tensor(14)

Matrix multiplication:

        Multiplying every corresponding element and adding them together, 

[1 * 1 + 2 * 2 + 3 * 3] = [14] 

In [57]:
tensor @ tensor

tensor(14)

In [58]:
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
value

CPU times: user 5.07 ms, sys: 241 µs, total: 5.31 ms
Wall time: 158 ms


tensor(14)

In [None]:
%%time

torch.matmul(tensor, tensor)

CPU times: user 65 µs, sys: 17 µs, total: 82 µs
Wall time: 93 µs


tensor(14)

Manual = 158 ms

Matmul = 93 µs

In [3]:
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]],dtype=torch.float32)
tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)
torch.matmul(tensor_A, tensor_B) # (this will error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [64]:
tensor_A, tensor_B

(tensor([[1., 2.],
         [3., 4.],
         [5., 6.]]),
 tensor([[ 7., 10.],
         [ 8., 11.],
         [ 9., 12.]]))

In [66]:
tensor_B.T #Transpose

tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])

In [67]:
torch.matmul(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [68]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


In [70]:
torch.mm(tensor_A,tensor_B.T) #short for matmul

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

To visualise matrix mul : http://matrixmultiplication.xyz/

Matrix multiplication is also referred as `dot product`

In [77]:
torch.manual_seed(42)

linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input 
                         out_features=6) # out_features = describes outer value 
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")
# 2 is the inner of x, 6 is the outer of output

Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


`Matrice multiplicaion is all you need`

https://marksaroufim.substack.com/p/working-class-deep-learner

In [78]:
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [80]:
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [81]:
torch.max(x), torch.min(x), torch.mean(x.type(torch.float32)), torch.sum(x)

(tensor(90), tensor(0), tensor(45.), tensor(450))

In [82]:
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


In [87]:
tensor = torch.arange(10., 100., 10.)
tensor, tensor.dtype

(tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.]), torch.float32)

In [84]:
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [88]:
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

Syntax is torch.Tensor.type(dtype=None)

The lower the number (e.g. 32, 16, 8), the less precise a computer stores the value. And with a lower amount of storage, this generally results in faster computation and a smaller overall model.

In [93]:
x = torch.arange(1.,8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [94]:
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [95]:
z = x.view(1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [96]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

`Changing the view of a tensor with torch.view() changes the orginal tensor too`

In [97]:
x_stacked = torch.stack([x, x, x, x,], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.]])

In [98]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


In [99]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [100]:
x_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [103]:
x = torch.arange(1,10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [104]:
print(f"First square bracket:\n{x[0]}")
print(f"Second square bracket: {x[0][0]}")
print(f"Third square bracket: {x[0][0][0]}")

First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1


In [107]:
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [105]:
x[:, 0]

tensor([[1, 2, 3]])

In [117]:
x[:,:,2]

tensor([[3, 6, 9]])

In [118]:
x[0,0,:]

tensor([1, 2, 3])

In [121]:
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array).type(torch.float32)
array, tensor, tensor.dtype

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.]),
 torch.float32)

In [122]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [123]:
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [124]:
numpy_tensor += 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([2., 2., 2., 2., 2., 2., 2.], dtype=float32))

In [125]:
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.8016, 0.3649, 0.6286, 0.9663],
        [0.7687, 0.4566, 0.5745, 0.9200],
        [0.3230, 0.8613, 0.0919, 0.3102]])

Tensor B:
tensor([[0.9536, 0.6002, 0.0351, 0.6826],
        [0.3743, 0.5220, 0.1336, 0.9666],
        [0.9754, 0.8474, 0.8988, 0.1105]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [129]:
import random

torch.manual_seed(42)
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called
torch.manual_seed(42)
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D


Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [5]:
torch.cuda.device_count()
torch.cuda.is_available()

True

In [6]:
tensor = torch.tensor([1, 2, 3])
print(tensor, tensor.device)

tensor_on_gpu = tensor.to("cuda")
tensor_on_gpu.device

tensor([1, 2, 3]) cpu


device(type='cuda', index=0)

In [8]:
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()

In [9]:
tensor_on_gpu, tensor_back_on_cpu

(tensor([1, 2, 3], device='cuda:0'), array([1, 2, 3]))