<a href="https://colab.research.google.com/github/Sourish168/Python/blob/main/PyTorch/PyTorch_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Introduction and Installation
- From https://pytorch.org/ we can download the required package of PyTorch in our machine(Linux/Mac/Windows).
- If gpu is available then the cuda version is preferable for NVIDIA GPUs only.
- In local mechine we have to create a virtual environment and install the appropiate version of python.
- In Google Colab we have to install PyTorch by "!pip install torch" command.
- By "torch.cuda.is_available()" we can check if the cuda is there or not.

In [1]:
!pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-

In [2]:
import torch
torch.cuda.is_available() # False means there is no cuda in the machine

False

## Tensor Basics
- In NumPy we have used vectors, matrices, tensors. In PyTorch everything starts with tensors i.e. 1D, 2D, nD arrays.

In [3]:
import torch
x = torch.empty(1) # Scalar
print(x)

tensor([2.8665e+32])


In [4]:
x = torch.empty(2, 3, 4) # 3D array
print(x)

tensor([[[ 5.7344e-30,  4.4355e-41,  5.7344e-30,  4.4355e-41],
         [        nan,  0.0000e+00,  1.8728e+31,  1.4153e-43],
         [ 3.2892e+14,  4.4354e-41,  0.0000e+00,  0.0000e+00]],

        [[ 8.9683e-44,  0.0000e+00,  2.6905e-43,  0.0000e+00],
         [-6.3723e+09,  4.4354e-41,  0.0000e+00,  1.4013e-45],
         [ 9.2327e-26,  3.2695e-41,  0.0000e+00,  0.0000e+00]]])


In [5]:
x = torch.zeros(2, 2, 3) # 3D array of zeros
print(x)

tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])


In [8]:
x = torch.ones(2, 3, dtype = torch.float16) # 2D array of ones
print(x)
print("\n", x.dtype) # Prints the datatype of x
print("\n", x.size()) # Prints the size of x
print("\n", x.shape) # Prints the shape of x

tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float16)

 torch.float16

 torch.Size([2, 3])

 torch.Size([2, 3])


In [9]:
x = torch.tensor([2.5, 0.1, 3.6]) # Creates a tensor from a list
print(x)

tensor([2.5000, 0.1000, 3.6000])


In [13]:
x = torch.rand(2, 2) # 2D array of random values
y = torch.rand(2, 2)
print("x =\n", x)
print("\ny =\n", y)
print("\nx+y =\n", x+y) # Addition
print("\ntorch.add(x, y) =\n", torch.add(x, y)) # Addition
print("\nx-y =\n", x-y) # Subtraction
print("\ntorch.sub(x, y) =\n", torch.sub(x, y)) # Subtraction
print("\nx*y =\n", x*y) # Multiplication
print("\ntorch.mul(x, y) =\n", torch.mul(x, y)) # Multiplication
print("\nx/y =\n", x/y) # Division
print("\ntorch.div(x, y) =\n", torch.div(x, y)) # Division

x =
 tensor([[0.7870, 0.4952],
        [0.1715, 0.4954]])

y =
 tensor([[0.5973, 0.2423],
        [0.5274, 0.0733]])

x+y =
 tensor([[1.3843, 0.7376],
        [0.6989, 0.5687]])

torch.add(x, y) =
 tensor([[1.3843, 0.7376],
        [0.6989, 0.5687]])

x-y =
 tensor([[ 0.1896,  0.2529],
        [-0.3559,  0.4221]])

torch.sub(x, y) =
 tensor([[ 0.1896,  0.2529],
        [-0.3559,  0.4221]])

x*y =
 tensor([[0.4701, 0.1200],
        [0.0904, 0.0363]])

torch.mul(x, y) =
 tensor([[0.4701, 0.1200],
        [0.0904, 0.0363]])

x/y =
 tensor([[1.3175, 2.0436],
        [0.3251, 6.7598]])

torch.div(x, y) =
 tensor([[1.3175, 2.0436],
        [0.3251, 6.7598]])


In [15]:
x = torch.rand(2, 2)
y = torch.rand(2, 2)
print("x =\n", x)
print("\ny =\n", y)
print("\nx+y =\n", x+y) # Addition
y.add_(x) # Inplace addition
print("\nNow, y =\n", y)

x =
 tensor([[0.6547, 0.3169],
        [0.1159, 0.8991]])

y =
 tensor([[0.6328, 0.8502],
        [0.6011, 0.8069]])

x+y =
 tensor([[1.2875, 1.1671],
        [0.7170, 1.7060]])

Now, y =
 tensor([[1.2875, 1.1671],
        [0.7170, 1.7060]])


In [16]:
# Slicing of a tensor in PyTorch
x = torch.rand(5, 4)
print(x)
print("\n", x[:, 0]) # Prints the first column
print("\n", x[1, :]) # Prints the second row

tensor([[0.2758, 0.0263, 0.4842, 0.3037],
        [0.9383, 0.4854, 0.5855, 0.4944],
        [0.5045, 0.8050, 0.6152, 0.1146],
        [0.6972, 0.2365, 0.1701, 0.5679],
        [0.1362, 0.5831, 0.3701, 0.5951]])

 tensor([0.2758, 0.9383, 0.5045, 0.6972, 0.1362])

 tensor([0.9383, 0.4854, 0.5855, 0.4944])


In [20]:
# Reshaping of a tensor in PyTorch
x = torch.rand(4, 4)
print("x =", x)
y = x.view(2, 8) # Reshapes the tensor to 2D of shape (2, 8)
print("\ny =", y)
z = x.view(16) # Reshapes the tensor to 1D of shape (16)
print("\nz =", z)

x = tensor([[0.9847, 0.5975, 0.7559, 0.1402],
        [0.5898, 0.6594, 0.3962, 0.7136],
        [0.9404, 0.0475, 0.6655, 0.4802],
        [0.1402, 0.3459, 0.9550, 0.7598]])

y = tensor([[0.9847, 0.5975, 0.7559, 0.1402, 0.5898, 0.6594, 0.3962, 0.7136],
        [0.9404, 0.0475, 0.6655, 0.4802, 0.1402, 0.3459, 0.9550, 0.7598]])

z = tensor([0.9847, 0.5975, 0.7559, 0.1402, 0.5898, 0.6594, 0.3962, 0.7136, 0.9404,
        0.0475, 0.6655, 0.4802, 0.1402, 0.3459, 0.9550, 0.7598])


In [21]:
x = torch.rand(4, 4)
print("x =", x)
y = x.view(-1, 8) # Reshapes the tensor to 2D of shape (2, 8), Negative value by default choose the proper dimension
print("\ny =", y)

x = tensor([[0.0124, 0.9629, 0.6070, 0.7891],
        [0.5116, 0.8248, 0.1646, 0.9194],
        [0.5410, 0.4994, 0.0865, 0.5310],
        [0.9660, 0.4909, 0.0985, 0.1097]])

y = tensor([[0.0124, 0.9629, 0.6070, 0.7891, 0.5116, 0.8248, 0.1646, 0.9194],
        [0.5410, 0.4994, 0.0865, 0.5310, 0.9660, 0.4909, 0.0985, 0.1097]])


In [22]:
# If there is no GPU memory then PyTorch and NumPy shares the same memory location thus the output changes
import numpy as np
a = torch.ones(5)
print("a =", a)
b = a.numpy() # Creates a numpy array from a tensor
print("\nb =", b)
print("\nType of a =", type(a))
print("\nType of b =", type(b))
a.add_(1)
print("\nNow, a =", a)
print("\nNow, b =", b)

a = tensor([1., 1., 1., 1., 1.])

b = [1. 1. 1. 1. 1.]

Type of a = <class 'torch.Tensor'>

Type of b = <class 'numpy.ndarray'>

Now, a = tensor([2., 2., 2., 2., 2.])

Now, b = [2. 2. 2. 2. 2.]


In [23]:
import numpy as np
a = np.ones(5)
print("a =", a)
b = torch.from_numpy(a) # Creates a tensor from a numpy array
print("\nb =", b)
print("\nType of a =", type(a))
print("\nType of b =", type(b))

a = [1. 1. 1. 1. 1.]

b = tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

Type of a = <class 'numpy.ndarray'>

Type of b = <class 'torch.Tensor'>


In [27]:
# Since in this case, torch.cuda.is_available() = False
if torch.cuda.is_available():
  device = torch.device("cuda") # Creates a device object
  print(device)
  x = torch.ones(5, device = device) # Creates a tensor on the GPU
  y = torch.ones(5)
  y = y.to(device) # Moves the tensor to the GPU
  z = x + y
  print(z)
  z = z.to("cpu") # Moves the tensor to the CPU
  print(z)
else:
  print("No GPU available")

No GPU available


In [29]:
x = torch.ones(5, requires_grad = True) # Creates a tensor with requires_grad = True
print(x) # Later for optimization steps the tensor needs to calculate the gradient

tensor([1., 1., 1., 1., 1.], requires_grad=True)


## Gradient Calculation with Autograd in PyTorch
- In Deep Learning, gradient calculation is a very crutial step for optimization.

In [31]:
import torch
x = torch.randn(3, requires_grad = True) # Creates a tensor with requires_grad = True
print("x =", x)
y = x + 2 # Creates a tensor with requires_grad = False
print("\ny =", y)
z = y*y*2 # Creates a tensor with requires_grad = False
print("\nz =", z)
p = z.mean() # Creates a tensor with requires_grad = False
print("\np =", p)
p.backward() # Calculates the gradient of p with respect to x i.e. dp/dx
print("\nx.grad =", x.grad)

x = tensor([0.5580, 0.4624, 0.7128], requires_grad=True)

y = tensor([2.5580, 2.4624, 2.7128], grad_fn=<AddBackward0>)

z = tensor([13.0869, 12.1266, 14.7182], grad_fn=<MulBackward0>)

p = tensor(13.3106, grad_fn=<MeanBackward0>)

x.grad = tensor([3.4107, 3.2832, 3.6170])


In [39]:
x = torch.randn(3, requires_grad = False) # Creates a tensor with requires_grad = True
print("x =", x)
y = x + 2 # Creates a tensor with requires_grad = False
print("\ny =", y)
z = y*y*2 # Creates a tensor with requires_grad = False
print("\nz =", z)
p = z.mean() # Creates a tensor with requires_grad = False
print("\np =", p)
p.backward() # Calculates the gradient of p with respect to x i.e. dp/dx
print("\nx.grad =", x.grad) # Since requires_grad = False

x = tensor([-1.5262,  0.2790,  1.0887])

y = tensor([0.4738, 2.2790, 3.0887])

z = tensor([ 0.4490, 10.3881, 19.0807])

p = tensor(9.9726)


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [41]:
x = torch.randn(3, requires_grad = True) # Creates a tensor with requires_grad = True
print("x =", x)
y = x + 2 # Creates a tensor with requires_grad = False
print("\ny =", y)
z = y*y*2 # Creates a tensor with requires_grad = False
print("\nz =", z)
v = torch.tensor([0.1, 10.0, 0.001], dtype = torch.float32)
z.backward(v) # Calculates the gradient of p with respect to x i.e. dp/dx
print("\nx.grad =", x.grad) # Vector Jacobian Product in the background

x = tensor([-0.0687, -0.8293, -0.0447], requires_grad=True)

y = tensor([1.9313, 1.1707, 1.9553], grad_fn=<AddBackward0>)

z = tensor([7.4598, 2.7409, 7.6466], grad_fn=<MulBackward0>)

x.grad = tensor([7.7252e-01, 4.6826e+01, 7.8213e-03])


In [44]:
# Preventing gradient history
x = torch.randn(3, requires_grad = True) # Creates a tensor with requires_grad = True
print("x =", x)
x.requires_grad_(False) # Prevents gradient history
print("\nx =", x)
print("\n", 50*"-")
x = torch.randn(3, requires_grad = True) # Creates a tensor with requires_grad = True
print("\nx =", x)
y = x.detach() # Detaches the gradient history
print("\ny =", y)
print("\n", 50*"-")
x = torch.randn(3, requires_grad = True) # Creates a tensor with requires_grad = True
print("\nx =", x)
with torch.no_grad(): # Prevents gradient history
  y = x + 2
  print("\ny =", y)

x = tensor([ 0.1956, -0.0141, -0.1218], requires_grad=True)

x = tensor([ 0.1956, -0.0141, -0.1218])

 --------------------------------------------------

x = tensor([0.6713, 0.0724, 0.5710], requires_grad=True)

y = tensor([0.6713, 0.0724, 0.5710])

 --------------------------------------------------

x = tensor([ 0.7072, -0.6595, -0.1999], requires_grad=True)

y = tensor([2.7072, 1.3405, 1.8001])


In [51]:
# Dummy Trainning Example
weight = torch.ones(4, requires_grad = True)
print("x =", weight, "\n")
for epoch in range(3):
  model_output = (weight*3).sum() # Dummy model output
  print(model_output)
  model_output.backward()
  print(weight.grad)
  weight.grad.zero_() # Resets the gradient to zero, optimization step

x = tensor([1., 1., 1., 1.], requires_grad=True) 

tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])


In [55]:
# Optimization Steps
weight = torch.ones(4, requires_grad=True)
print("x =", weight)
optimizer = torch.optim.SGD([weight], lr=0.01)  # Pass [weight] as a single-element list, Stochastic Gradient Descent
optimizer.step()  # Optimization step
optimizer.zero_grad()  # Resets the gradient to zero, optimization step
print("\nOptimizer =", optimizer)

x = tensor([1., 1., 1., 1.], requires_grad=True)

Optimizer = SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)


## Backpropagation in PyTorch
-

In [None]:
import torch
