# Introduction to Pytorch

### Creating tensors

In [1]:
import torch

# scalars, vectors and matrices

In [2]:
scalar = torch.tensor(3.14159)
vector = torch.tensor([1, 2, 3])
matrix = torch.tensor([[1, 2, 3], [4, 5, 6]])
tensor = torch.tensor([[[1,2,3]]])
print(scalar)
print(vector)
print(matrix)
print(tensor)

tensor(3.1416)
tensor([1, 2, 3])
tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[[1, 2, 3]]])


check the dimensions of a tensor with ndim. You can use the square bracket trick to count the number of dimensions of a tensor

In [3]:
print(scalar.ndim)
print(vector.ndim)
print(matrix.ndim)
print(tensor.ndim)

0
1
2
3


In [4]:
print(scalar.shape)
print(vector.shape)
print(matrix.shape)
print(tensor.shape)

torch.Size([])
torch.Size([3])
torch.Size([2, 3])
torch.Size([1, 1, 3])


get the float/int value from the tensor

In [5]:
scalar.item()

3.141590118408203

Random Tensors

In [6]:
random_tensor = torch.rand(size=(3, 4))
random_tensor

tensor([[0.5561, 0.4331, 0.9004, 0.3433],
        [0.3908, 0.8312, 0.3254, 0.2894],
        [0.5037, 0.1795, 0.6925, 0.5698]])

We will usually instantiate our models with random tensors and have them be updated by gradient descent or another optimazation algorithm. However we might also want to start with all ones and zeros

In [7]:
ones_tensor = torch.ones(size=(3, 4))
zeros_tensor = torch.zeros(size=(3, 4))

also worth noting that the default type in pytorch is float32

In [8]:
ones_tensor.dtype

torch.float32

We use arange instead of the range function in pytorch

In [9]:
zero_to_ten = torch.arange(0, 10)
zero_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

to fill a tensor with the same shape as another tensor (useful for masking) we can use ones_like or zeros like

In [10]:
ten_zeros = torch.zeros_like(zero_to_ten)

torch.cuda is the datatype used for GPU computation

common attricutes we may want to access are the dtype, device and shape

In [11]:
print(ten_zeros.device)
print(ten_zeros.dtype)
print(ten_zeros.shape)

cpu
torch.int64
torch.Size([10])


# Basic Tensor Operations

Neural networks are really just a bunch of interconnected matrices/vectors with some mathematical expression connecting them. We can add, subtract, multiply (element wise & matrix multiplication), and divide tensors. You can use the regular operators in python or you can use the built in tensor  

In [12]:
tensor = torch.tensor([1, 2, 3])
print(tensor + 10) # == torch.add(tensor, 10)
print(tensor - 10) # == torch.sub(tensor, 10)
print(tensor * 10) # == torch.mul(tensor, 10)
print(tensor / 10) # == torch.div(tensor, 10)
tensor_2 = torch.tensor([4, 5, 6])

tensor([11, 12, 13])
tensor([-9, -8, -7])
tensor([10, 20, 30])
tensor([0.1000, 0.2000, 0.3000])


Matrix Multiplication is one of the most important concepts in Neural networks. Some basic rules of Matrix multiplication are:
1) The inner dimensions of the two matrices must match. 

        (3,2) @ (3,2) will not work
        (3,2) @ (2,3) will work
2) The resulting matrix will be the dimensions of the outer matrix

        (3,2) @ (2,3) will result in a matrix with dimensions (3,3)

Keep in mind that matrix multiplication and element-wise matrix multiplication will return different results on the same matrices 

In [13]:
print(tensor * tensor)
print(tensor.matmul(tensor))

tensor([1, 4, 9])
tensor(14)


One of the most common errors in deep learning is trying to multiply 2 matrices that can not be multiplies, and one of the easiest fixes to this is to transpose one of the matrices

In [14]:
tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [3,2,1],[7, 8, 9]])
# tensor.matmul(tensor) will throw an error
print(tensor.shape)
print(tensor.T.shape)
# use the .T method to transpose the tensor so we can perform the matrix multiplication
mul = tensor.matmul(tensor.T)

torch.Size([4, 3])
torch.Size([3, 4])


Matrix multiplication is one of the most important topics to understand neural networks

We can use some aggregate functions to find out information about our tensors

In [15]:
print(mul.min())
print(mul.max())
#argmax and argmin return the index of the maximum and minimum value in a tensor
print(mul.argmin())
print(mul.argmax())

tensor(10)
tensor(194)
tensor(2)
tensor(15)


There are a lot of useful methods to reshape tensors so they can be manipulated in other ways

In [19]:
#reshape takes an input tensor and a new shape and returns a new tensor with the new shape
torch.reshape(tensor, (3, 4))
#stack concatanates a sequence of tensors along a new dimension
torch.stack([tensor, tensor])
#squeeze squishes tensors along a dimension of size 1
torch.squeeze(torch.tensor([[[1, 2, 3]]]))
#There are a couple other methods that are useful for manipulating tensors, such as torch.cat, torch.chunk, torch.split, and torch.unbind

tensor([1, 2, 3])

The main use for these methods is to make tensors compatible for matrix multiplication for use in deep learning models

# Tensor Indexing

Tensor indexing works pretty much the same as numpy array and list indexing with a couple differences

In [23]:
#To get all elements of one dimension we can use : and , to separate dimensions
tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
#get all elements of the first dimension
tensor[:,0:2]

tensor([[1, 2],
        [4, 5],
        [7, 8]])

Pytorch has built in functionality to convert tensors to and from numpy arrays

In [24]:
import numpy as np

use from_numpy to convert numpy arrays to tensors, and .numpy() to convert from tensors to numpy arrays

In [25]:
array = np.array([1, 2, 3])
tensor = torch.from_numpy(array)
tensor.dtype

torch.int64

In [26]:
tensor = torch.tensor([1, 2, 3])
array = tensor.numpy()
array.dtype

dtype('int64')

### Randomness

We can use a manual seed to create reproducible random numbers for testing in pytorch using the manual seed function

In [28]:
import random

# # Set the random seed
RANDOM_SEED=65 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED) 
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called 
# Without this, tensor_D would be different to tensor_C 
torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.9731, 0.5162, 0.0391, 0.1524],
        [0.5041, 0.2316, 0.1970, 0.5104],
        [0.6644, 0.0129, 0.7153, 0.3509]])

Tensor D:
tensor([[0.9731, 0.5162, 0.0391, 0.1524],
        [0.5041, 0.2316, 0.1970, 0.5104],
        [0.6644, 0.0129, 0.7153, 0.3509]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

# Running Pytorch on a GPU

Deep learning models take a long time to run, so  we usually want to use a GPU to run them. We can either use a virtual machine (google colab, AWS, etc...) or our own local GPU

In [30]:
#this command will show you if yo have a GPU available, I have a GTX 1050 on my laptop with 4gb of VRAM
!nvidia-smi

Thu Feb 15 10:08:12 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.112                Driver Version: 537.42       CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1050        On  | 00000000:01:00.0 Off |                  N/A |
| N/A   33C    P8              N/A / ERR! |      0MiB /  4096MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [32]:
#check if we can run on our GPU
torch.cuda.is_available()

True

In [35]:
#set our device to the GPU if it is available
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.device_count()
#move our tensor to the GPU
tensor = torch.tensor([1, 2, 3]).to(device)
tensor.device
#now our tensor is on the GPU!

device(type='cuda', index=0)

Other libraries like numpy and sklearn don't support running on the GPU, so we can use the .cpu() method to run our tensors back on the CPU

In [37]:
#make a copy of our tensor that can run on the CPU
tensor_cpu = tensor.cpu()
print(tensor.device)
print(tensor_cpu.device)

cuda:0
cpu


# Excercises

In [40]:
tensor = torch.rand(7,7)
tensor_2 = torch.rand(1,7)
tensor.matmul(tensor_2.T)

tensor([[2.0175],
        [2.3386],
        [1.9782],
        [1.6978],
        [1.9161],
        [2.0249],
        [1.9303]])

In [43]:
RANDOM_SEED=0
torch.random.manual_seed(seed=RANDOM_SEED)
tensor = torch.rand(7,7)
tensor_2 = torch.rand(1,7)
tensor.matmul(tensor_2.T)

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [48]:
#torch.cuda.set_rng_state(torch.ByteTensor(),device)
torch.random.manual_seed(seed=1234)
tensor = torch.rand(2,3).to(device)
tensor_2 = torch.rand(2,3).to(device)
tensor.matmul(tensor_2.T)

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]], device='cuda:0')