In [1]:
import torch
torch.__version__

'2.3.1+cpu'

In [None]:
#Scalars (zero dimension tensor)
scalar = torch.tensor(7)
print(scalar)
print(scalar.ndim) #dimension

tensor(7)
0


In [5]:
#Get the python number within a tensor (for one-element tensors)
scalar.item() 

7

In [7]:
#Vectors
vector = torch.tensor([7,8])
print(vector)
print(vector.ndim)

tensor([7, 8])
1


In [9]:
#The shape of a vector tell how the elemts inside them are arranged. This has a shape of 2 (two entires) and is one dimentional
vector.shape

torch.Size([2])

In [16]:
#Matrix
matrix = torch.tensor([[7,8],
                       [9,10]])
print(matrix)
print('Dimesion:',matrix.ndim,'(because its a 2x2 matrix)')
print('Shape:',matrix.shape)

tensor([[ 7,  8],
        [ 9, 10]])
Dimesion: 2 (because its a 2x2 matrix)
Shape: torch.Size([2, 2])


In [23]:
#Tensors
tensor = torch.tensor([[[1,2,3],
                        [4,5,6],
                        [7,8,9]]])
print(tensor)
print('Dimesion:',tensor.ndim)
print('Shape:',tensor.shape,'(1 dimension of a 3x3 matrix)')

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
Dimesion: 3
Shape: torch.Size([1, 3, 3]) (1 dimension of a 3x3 matrix)


Machine learning model often starts out with large random tensors of numbers and adjusts these random numbers as it works through data to better represent it.

In essence:

Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers...

As a data scientist, you can define how the machine learning model starts (initialization), looks at data (representation) and updates (optimization) its random numbers

In [26]:
#Tensor of random numbers
random_tensor = torch.rand(size=(3,4))
random_tensor, random_tensor.dtype

(tensor([[0.0846, 0.5276, 0.5176, 0.1685],
         [0.7727, 0.1149, 0.3130, 0.0834],
         [0.7409, 0.5332, 0.1218, 0.0974]]),
 torch.float32)

Sometimes you'll just want to fill tensors with zeros or ones.

This happens a lot with masking (like masking some of the values in one tensor with zeros to let a model know not to learn them)

In [27]:
#Tensor of zeros
zeros = torch.zeros(size=(4,5))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]),
 torch.float32)

In [28]:
#Also a tensor of ones
ones = torch.ones(size=(2,3))
ones, ones.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

Sometimes you might want a range of numbers, such as 1 to 10 or 0 to 100.

In [31]:
zero_to_ten = torch.arange(start=0,end=10, step=1)
zero_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [32]:
#Sometimes you might want one tensor of a certain type with the same shape as another tensor. For example, a tensor of all zeros with the same shape as a previous tensor.
ten_zeros = torch.zeros_like(input=zero_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [34]:
#The default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0,6.0,9.0],
                            dtype=None,
                            device=None,
                            requires_grad=False) # if True, operations performed on the tensor are recorded 
float_32_tensor,float_32_tensor.dtype, float_32_tensor.device

(tensor([3., 6., 9.]), torch.float32, device(type='cpu'))

In deep learning, data (images, text, video, audio, protein structures, etc) gets represented as tensors.

A model learns by investigating those tensors and performing a series of operations (could be 1,000,000s+) on tensors to create a representation of the patterns in the input data.

In [49]:
#Tensor operations
tensor = torch.tensor([3,4,5])
tensor+10 

tensor([13, 14, 15])

In [50]:
#Note that the values inside the tensor don't change unless they're reassigned
tensor *10
print('The original tensors is unchanged:', tensor)

The original tensors is unchanged: tensor([3, 4, 5])


In [None]:
tensor = tensor - 10 #Subtract and reassign
tensor

tensor([-7, -6, -5])

In [None]:
tensor = tensor + 10 #Add and reassign
tensor


tensor([3, 4, 5])

In [55]:
#Multiplication can be done as:
print(torch.multiply(tensor,10))
#but the values are not reassigned:
tensor

tensor([30, 40, 50])


tensor([3, 4, 5])

In [57]:
#also
print(tensor*tensor)


tensor([ 9, 16, 25])


In [58]:
#Matrix multiplication (matmul)
tensormul = torch.tensor([1,2,3])
print('Element wise:', tensormul* tensormul)
print('matrix multiplication', torch.matmul(tensormul,tensormul))
print('same as using @', tensormul @ tensormul) 

Element wise: tensor([1, 4, 9])
matrix multiplication tensor(14)
same as using @ tensor(14)


In [67]:
%%time

# See the difference in computational time between manual and authomatic method
#Manually

value = 0 
for i in range(len(tensormul)):
    value  =+ tensormul[i] * tensormul[i]
value

CPU times: total: 0 ns
Wall time: 0 ns


tensor(9)

In [68]:
%%time
torch.matmul(tensormul,tensormul)

CPU times: total: 0 ns
Wall time: 0 ns


tensor(14)

One of the most common errors in deep learning (shape errors)

In [77]:
#Shapes need to be in the right way
tensor_A = torch.tensor([[1,2],
                        [4,5],
                        [7,8]], dtype=torch.float32)

tensor_B = torch.tensor([[7,5],
                         [8,4],
                         [4,8]], dtype=torch.float32)

tensor_A.shape, tensor_B.shape #cannot multiply (3x2)(3x2)

torch.matmul(tensor_A, tensor_B) #shape error

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [79]:
# To multiply A and B we need to use the transpose so taht (3x2)(2x3)
print(tensor_A,'\n')
print(tensor_B.T)

tensor([[1., 2.],
        [4., 5.],
        [7., 8.]]) 

tensor([[7., 8., 4.],
        [5., 4., 8.]])


In [81]:
#So now we can see:
print(torch.matmul(tensor_A, tensor_B.T)) #same with torch.mm(tensor_A,tensor_B)


tensor([[17., 16., 20.],
        [53., 52., 56.],
        [89., 88., 92.]])



Neural networks are full of matrix multiplications and dot products.

The torch.nn.Linear() module (we'll see this in action later on), also known as a feed-forward layer or fully connected layer, implements a matrix multiplication between an input x and a weights matrix A.

In [86]:
#Choose a seed for reproducibility
torch.manual_seed(42)
#Linear uses matrix multiplication
linear = torch.nn.Linear(in_features=2,
                         out_features=6)

x = tensor_A
output = linear(x)

print(f"Input shape of x: {x.shape}\n")
print(f"Output: \n {output}\n\nOutput shape:{output.shape}")

Input shape of x: torch.Size([3, 2])

Output: 
 tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [5.6194, 2.6809, 0.4347, 0.5995, 0.4447, 3.2247],
        [9.0019, 4.1326, 0.3980, 0.8125, 0.7585, 5.4656]],
       grad_fn=<AddmmBackward0>)

Output shape:torch.Size([3, 6])


Finding the min, max, mean, sum, etc (aggregation)

In [87]:
#Create a tensor starting from 0 to 100 with 10 steps
x = torch.arange(0,100,10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [93]:
#Some aggregation
print(f"Minimum: {x.min()}")
print(f"Minimum: {x.max()}")
print(f"Mean: {x.type(torch.float32).mean()}") #x.mean won't work
print(f"Sum: {x.sum()}")

print('\n Same as:\n')
torch.min(x), torch.max(x), torch.mean(x.type(torch.float32)), torch.sum(x)

Minimum: 0
Minimum: 90
Mean: 45.0
Sum: 450

 Same as:



(tensor(0), tensor(90), tensor(45.), tensor(450))

In [97]:
#It is useful to find the index of such (or any) value
tens = torch.arange(0,100,10)
print(f"Tensoor: {tens}")

#Return the index of min and max
print(f"Index of min value: {tens.argmin()}")
print(f"Index of max value: {tens.argmax()}")

Tensoor: tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
Index of min value: 0
Index of max value: 9


Change tensor datatype:
A common issue with deep learning operations is having your tensors in different datatypes.
Mobile-based neural networks often operate with 8-bit integers, smaller and faster to run but less accurate than their float32 counterparts

In [99]:
tensor32 = torch.arange(10.,100.,10.)
tensor32.dtype

torch.float32

In [100]:
tensor16 = tensor32.type(torch.float16)
tensor16.dtype

torch.float16

In [102]:
tensor8 = tensor32.type(torch.int8)
tensor8.dtype

torch.int8

Reshaping, stacking, squeezing and unsqueezing: Reshape or change the dimensions of your tensors without actually changing the values inside them.

In [103]:
#Create a tensor
x = torch.arange(1.,8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [113]:
#Add an extra dimension
x_reshaped = x.reshape(1,7)
x_reshaped, x_reshaped.shape    

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [121]:
#Change the view
z = x.view(1,7)
z, z.shape

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [122]:
#But changing the view changes the original tensor
z[:, 0] = 5
z,x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [123]:
#We can stack tensosrs on top
x_stacked = torch.stack([x,x,x,x], dim=0) #0 along x and 1 along y
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.]])

In [125]:
#Remove all single dimensions
x_squeezed = x_reshaped.squeeze()
x_squeezed.shape

torch.Size([7])

In [127]:
#Can also unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
x_unsqueezed.shape

torch.Size([1, 7])

In [None]:
#Rearrange the order of axes values
x_original = torch.rand(size=(224,224,3))

x_permuted = x_original.permute(2,0,1) #Return a view
x_original.shape, x_permuted.shape

(torch.Size([224, 224, 3]), torch.Size([3, 224, 224]))