In [92]:
import torch
torch.__version__

'2.2.1+cpu'

In [93]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [94]:
# Checking the dimensions of a tensor using ndim
scalar.ndim

0

In [95]:
# We can get the number within a Tensor using item() for 0-dimensional tensors
scalar.item()

7

In [96]:
# Now let's look at vectors, where a vector is a single dimension tensor
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [97]:
# Checking the dimension of the vector, should be 1.
vector.ndim

1

In [98]:
# We can check the shape of a tensor with the .shape function
vector.shape

torch.Size([2])

In [99]:
# Let's look at a matrix now:
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [100]:
# Matrices are as flexible as vectors, just with one extra dimension.
# Let's check the dimensionality again, should be 2 now.
MATRIX.ndim

2

In [101]:
# Now let's look at the shape, it's a 2x2 so...
MATRIX.shape

torch.Size([2, 2])

In [102]:
# Now let's create a tensor (although a tensor can represent almost anything)
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [103]:
# Let's check the dimensionality again, should be 3
TENSOR.ndim

3

In [104]:
# Let's check the shape. The dimensions go from outer to inner.
# It has 1 dimension of 3 by 3 so:
TENSOR.shape

torch.Size([1, 3, 3])

To summarize, a tensor is a n-dimensional array of numbers. 0-dimensional tensor is a scalar, 1-dimensional tensor is a vector, 2-dimensional tensor is a matrix.

In [105]:
# We can create a tensor filled with random values of size (3,4)
random_tensor = torch.rand(size=(3,4))
random_tensor, random_tensor.dtype

(tensor([[0.1592, 0.5585, 0.7851, 0.0329],
         [0.9231, 0.9967, 0.0342, 0.6473],
         [0.4587, 0.1659, 0.7269, 0.3543]]),
 torch.float32)

In [106]:
# The flexibility of torch.rand() is that we can adjust the size to whatever we want.
# So, a common image shape of [224, 224, 3] would be the following:
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [107]:
# We can also fill a tensor with zeros through torch.zeros(). 
# This is often done for letting know that these should not be learned.
zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [108]:
# We can do the same but then for ones with torch.ones()
ones = torch.ones(size=(3, 4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

In [109]:
# We can create a range in a tensor with torch.arange(start, end, step)
# For example, a tensor with numbers ranging from 0 to 10:
zero_to_ten = torch.arange(start=0, end=10, step=1)
zero_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [110]:
# We can also create a tensor with a shape that's similar to another arbitrary tensor:
ten_zeros = torch.zeros_like(input=zero_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [111]:
# We can create some tensors with specific datatypes by using dtype
# The default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype = None, # defaults to float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # If True, operations on this tensor are recorded
float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

Common mistakes are shape issues (that tensor shapes don't match up).
Other common mistakes are datatype and device issues.

In [112]:
# Let's create a tensor with dtype float16
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16) # or use torch.half
float_16_tensor.dtype

torch.float16

It is improtant to know how to get information from tensors.
Common attributes are:
1. shape - what shape is the tensor?
2. dtype - what datatype are the elements within the tensor stored in?
3. device - what device is the tensor stored on? (usually GPU or CPU)

In [113]:
# Let's test this for a random tensor
some_tensor = torch.rand(3,4)

# Find out some details about it
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.0499, 0.6257, 0.0403, 0.9301],
        [0.9557, 0.3143, 0.0426, 0.0733],
        [0.9959, 0.0860, 0.3763, 0.8411]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [114]:
# Let's do some basic tensor operations like addition, subtraction, and multiplication.
tensor = torch.tensor([1, 2, 3])
# Let's add 10
tensor + 10

tensor([11, 12, 13])

In [115]:
# Let's multiply by 10
tensor * 10

tensor([10, 20, 30])

In [116]:
# Tensor values don't change unless they're reassigned, so what is tensor?
tensor

tensor([1, 2, 3])

In [117]:
# Subtract and reassign
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [118]:
# Add and reassign
tensor = tensor + 10
tensor

tensor([1, 2, 3])

In [119]:
# There are also some built-in functions for basic operations
# For multiplication there is torch.mul()
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [120]:
# For Addition there is torch.add()
torch.add(tensor, 10)

tensor([11, 12, 13])

In [121]:
# The original tensor stays the same
tensor

tensor([1, 2, 3])

In [122]:
# It's more common to use the symbols though:
print(tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


Matrix multiplication is very common in ML and DL algorithms.

We can use torch.matmul() or simply use the @ operator.

Two rules that we must watch out for is that the inner dimensions must match and the resulting matrix has the shape of the outer dimensions

In [123]:
# Let's create a tensor and perform (matrix) multiplication:
tensor = torch.tensor([1, 2, 3])
tensor.shape

torch.Size([3])

In [124]:
# element-wise matrix multiplication
tensor * tensor

tensor([1, 4, 9])

In [125]:
# Matrix multiplication
torch.matmul(tensor, tensor) # or use tensor @ tensor but torch.matmul() has more functionality

tensor(14)

In [126]:
# %%timeit # For average run time
# The torch.matmul() method is faster so use than with a loop:
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
value

27.6 µs ± 838 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [127]:
# %%timeit
tensor @ tensor

1.91 µs ± 10.2 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [128]:
# %%timeit
torch.matmul(tensor, tensor)

1.91 µs ± 41.4 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [None]:
# One of the most common errors is shape mismatches.
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) # (this will error)

We can make this matrix multiplication work by transposing (switching the dimensions of a given tensor).

We can do this either with torch.transpose(input, dim0, dim1), where input is the desired tensor and dim0 and dim1 are the dimensions to be swapped. 

Or, we do tensor.T, where tensor is transposed.


In [130]:
# view tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])


In [131]:
# view tensor_A and transpose B
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [136]:
# Previously when doing the matrix multiplication we had a shape issue, now it should work
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")
print(f"\nDimensions from resulting matrix are the outer dimensions from the original tensors")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])

Dimensions from resulting matrix are the outer dimensions from the original tensors


In [137]:
# We can also use torch.mm() as a shortcut for torch.matmul()
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

The torch.nn.Linear() modele, also known as a feed-forward layer or fully connected layer implements a matrix multiplication between input x and a weights matrix A.

In [144]:
# Let's make this reproducible by setting the seed.
torch.manual_seed(42)
# Let's create a fully connected layer
linear = torch.nn.Linear(in_features=2, # should match inner dimensionality
                         out_features=8) # will be the dimensionality for hidden layer
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")
# Matrix multiplication is literally everywhere:)

Input shape: torch.Size([3, 2])

Output:
tensor([[ 2.2595,  1.2380, -0.1997,  0.6665, -0.7400,  0.7964,  0.4267,  0.6104],
        [ 4.5145,  2.2058, -0.2241,  0.8086, -0.5308,  2.2903,  1.6631,  1.0926],
        [ 6.7696,  3.1736, -0.2486,  0.9506, -0.3216,  3.7842,  2.8995,  1.5748]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 8])


In [145]:
# Now that we've looked a bit at manipulating tensors, let's look at aggregating them (going from more to less values)
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [146]:
# Let's calculate the min, max, mean, and sum
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# x.mean() will result in an error as some methods require tensors to be in a specific datatype.
print(f"Mean: {x.type(torch.float32).mean()}")
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [147]:
# You can also use torch methods for this:
torch.min(x), torch.max(x), torch.mean(x.type(torch.float32)), torch.sum(x)

(tensor(0), tensor(90), tensor(45.), tensor(450))

In [149]:
# We can also find the index of the max or min value using argmax and argmin
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")
print(f"index of minimum: {tensor.argmax()}")
print(f"index of minimum: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
index of minimum: 8
index of minimum: 0


In [150]:
# We can run into problems if tensors have different datat types.
# We can use torch.Tensor.type(dtype=None) where the dtype is the datatype you'd like to use.
tensor = torch.arange(10., 100., 10.)
tensor.dtype

torch.float32

In [151]:
# Create a float16 tensor
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [153]:
# Or make a int8 tensor
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

Different datatypes might be a bit confusing but the value of the number (e.g., 32, 16, 8) dictates the precision of the value. So 32 is more precies than 8 when talking about floats.

Let's now look at some ways of reshaping and manipulating tensors. 

In [163]:
# Create a tensor
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [164]:
# We can add an extra dimension using torch.reshape()
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [165]:
# We can use torch.view() to keep the same tensor but have a different view
z = x.view(1, 7) # or f.e. x.view(1, 1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [166]:
# Changing the view changes the original tensor too
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [172]:
# We can stack a tensor on top of itself using torch.stack()
x_stacked = torch.stack([x, x, x, x], dim=0) # Experiment with the dimension
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.]])

In [174]:
# We can remove all single dimensions using torch.squeeze()
# Literally squeezing the dimensions out of it:)
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


tensor([5., 2., 3., 4., 5., 6., 7.])

In [175]:
# We can add a single dimension at an index with torch.unsqueeze()
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

# Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")


Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [177]:
# We can also rearrange the order of axes values with torch.permute(input, dims)
# The input gets turned into a view with new dims
x_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 1, 0)

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])
