<a href="https://colab.research.google.com/github/SarwarSaif/Learn-PyTorch/blob/main/PyTorch_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch Tensors
Similar to Numpy

In [None]:
import torch

In [None]:
# Create empty tensors
w = torch.empty(3) # empty 1D Tensor
x = torch.empty(3, 2) # empty 2D Tensor
y = torch.empty(3, 2, 4) # empty 3D Tensor
z = torch.empty(3, 2, 4, 4) # empty 1D Tensor
#print(z)

In [None]:
# Create tensors with random values
x = torch.rand(2, 2)
print(x)
print(x.dtype)
# Change type of the tensor
x = torch.rand(2,2, dtype=torch.double)
print(x)
# Create tensors with arrays
x = torch.tensor([2.5, 0.1])

tensor([[0.2950, 0.8424],
        [0.1169, 0.5482]])
torch.float32
tensor([[0.5086, 0.1574],
        [0.9693, 0.4536]], dtype=torch.float64)


In [None]:
x = torch.rand(2,2)
y = torch.rand(2,2)
z = x + y # Element wise add
z = torch.add(x,y) # Same as Element wise
print(z)

# Inplace addition
y.add_(x) # Modify only y
# N.B: Every function with a trail sign '_' does an inplace operation
print(y)

tensor([[0.4582, 1.3041],
        [0.3599, 0.2333]])
tensor([[0.4582, 1.3041],
        [0.3599, 0.2333]])


In [None]:
# Slicing operations on tensors
x = torch.rand(5,3)
print(x[1,2:4])
# To get item or value from a single tensor
print(x[1,1].item())

tensor([0.2339])
0.1390628218650818


In [None]:
# Reshape the Tensors
x = torch.rand(5, 4)
print(x)
y = x.view(20)
print(y)

tensor([[0.9900, 0.8861, 0.4973, 0.3721],
        [0.6714, 0.8887, 0.6617, 0.1334],
        [0.9683, 0.7950, 0.6091, 0.9559],
        [0.9372, 0.8076, 0.4903, 0.1536],
        [0.2611, 0.6432, 0.6865, 0.3693]])
tensor([0.9900, 0.8861, 0.4973, 0.3721, 0.6714, 0.8887, 0.6617, 0.1334, 0.9683,
        0.7950, 0.6091, 0.9559, 0.9372, 0.8076, 0.4903, 0.1536, 0.2611, 0.6432,
        0.6865, 0.3693])


In [None]:

# Cnvert tensor to a numpy array
x = torch.ones(5) # <class 'torch.Tensor'>
print(x, type(x)) 
y = x.numpy() # <class 'numpy.ndarray'>
print(y, type(y))
# N.B: If the tensor and numpy array is in running in the CPU than they will have same memory location. So changing x will change b and vice-versa

tensor([1., 1., 1., 1., 1.]) <class 'torch.Tensor'>
[1. 1. 1. 1. 1.] <class 'numpy.ndarray'>


In [None]:
# Convert numpy array to tensor
import numpy as np
x = np.ones([2, 3])
print(x)
y = torch.from_numpy(x)
print(y)


[[1. 1. 1.]
 [1. 1. 1.]]
tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [None]:
# Use CUDA if available
if torch.cuda.is_available():
  print("Yes CUDA is available")
  device = torch.device("cuda")
  print(device)
  x = torch.ones(5, device=device) # Move x to the GPU [Way: 1]
  y = torch.ones(5)
  y = y.to(device) # Move y to the GPU [Way: 2]
  z = x + y
  print(z) 
  # N.B: now the GPU tensor can not be transformed into numpy using .numpy() method as numpy doesn't work on GPU and can only be used when using CPU
  z = z.to("cpu")


Yes CUDA is available
cuda
tensor([2., 2., 2., 2., 2.], device='cuda:0')


In [None]:
# When  you want to Optimize a varibale in future using gradient then you need to specify "requires_grad=True" which is by default False
x = torch.ones(5, requires_grad=True)
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


# Calculate Gradients using Autograd package in PyTorch

In [None]:
import torch
x = torch.randn(3, requires_grad=True)
print(x)
y = x+2
print(y)
z = y.mean()
z.backward()
print(x.grad)
# The Autograd function uses Jacobian Product to calculate the gradient
# if the value is not scalar, we can use a vector to produce a scalar value and then can calculate the gradient
v = torch.tensor([0.1, 1.0, 0.2], dtype=torch.float)
y.backward(v)
print(x.grad)

tensor([-1.0381,  0.0724, -0.5242], requires_grad=True)
tensor([0.9619, 2.0724, 1.4758], grad_fn=<AddBackward0>)
tensor([0.3333, 0.3333, 0.3333])
tensor([0.4333, 1.3333, 0.5333])


In [None]:
# To stop pytorch from calculating Grad_fn function or tracking the history, wed can suse the following
# x.requires_grad_(False)
# x.detach()
# with torch.no_grad():
x = torch.randn(3, requires_grad=True)
print(x)

with torch.no_grad():
  y = x + 2
  print(y)

z = x + 3
z.detach_()
print(z)
x.requires_grad_(False)
print(x)



tensor([-0.8296,  0.1619,  1.2709], requires_grad=True)
tensor([1.1704, 2.1619, 3.2709])
tensor([2.1704, 3.1619, 4.2709])
tensor([-0.8296,  0.1619,  1.2709])


In [None]:
# Dummy training example
weights = torch.ones(4, requires_grad=True)

for epoch in range(4):
  model_output = (weights*3).sum()

  model_output.backward()

  print(weights.grad)

  # Before moving to next iteration we must empty our gradient
  weights.grad.zero_()


tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


# Backpropagation

In [None]:
import torch
weights = torch.ones(4, requires_grad=True)
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# froward pass and compute the loss
y_hat = w * x
loss = (y_hat - y) ** 2
print(loss)

# backward pass
loss.backward()
print(w.grad)

# Update weights
# Next forward and backward pass


tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


# Gradient Descent using AutoGrad and Backpropagation
> Steps used in 1st Process:
> * Prediction: Manually
> * Gradients Computation: Manually
> * Loss Computation: Manually
> * Parameter updates: Manually

In [1]:
import numpy as np

# f = w * x # Neglect Bias Here
# f = 2 * x # Goal
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0

# model prediction
def forward(x: int) -> np.float32:
  return w * x

# loss = MSE (Mean Squarred Error)
def loss(y:np.float32, y_predicted:np.float32) -> np.float32:
  return ((y_predicted-y)**2).mean()

# gradient 
# MSE = 1/N * (w*x - y)**2 
# dJ/dw = 1/N * 2*x * (w*x -y) 
def gradient(x:int, y:np.float32, y_predicted:np.float32) -> np.float32:
  return np.dot(2*x, (y_predicted-y)).mean()

print(f'Prediction before training: f(5) = {forward(5): .3f}')

# Training
learning_rate = 0.01
n_iters = 10

for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = forward(X)

  # loss
  l = loss(Y, y_pred)

  # gradients
  dw = gradient(X,Y,y_pred)

  # update weights
  w -= learning_rate * dw
  if epoch % 1 == 0:
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5): .3f}')



Prediction before training: f(5) =  0.000
epoch 1: w = 1.200, loss = 30.00000000
epoch 2: w = 1.680, loss = 4.79999924
epoch 3: w = 1.872, loss = 0.76800019
epoch 4: w = 1.949, loss = 0.12288000
epoch 5: w = 1.980, loss = 0.01966083
epoch 6: w = 1.992, loss = 0.00314574
epoch 7: w = 1.997, loss = 0.00050331
epoch 8: w = 1.999, loss = 0.00008053
epoch 9: w = 1.999, loss = 0.00001288
epoch 10: w = 2.000, loss = 0.00000206
Prediction after training: f(5) =  9.999


> Steps used in 2nd Process:
> * Prediction: Manually
> * Gradients Computation: Autograd
> * Loss Computation: Manually
> * Parameter updates: Manually

In [2]:
import torch

# f = w * x # Neglect Bias Here
# f = 2 * x # Goal
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x: int) -> np.float32:
  return w * x

# loss = MSE (Mean Squarred Error)
def loss(y:np.float32, y_predicted:np.float32) -> np.float32:
  return ((y_predicted-y)**2).mean()

# gradient is replaced by autograd 

print(f'Prediction before training: f(5) = {forward(5): .3f}')

# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = forward(X)

  # loss
  l = loss(Y, y_pred)

  # gradients = backward_pass
  l.backward() # Calculate the gradient dl/dw

  # update weights
  with torch.no_grad(): # Grad history tracking is not required here
    dw = w.grad
    w -= learning_rate * dw

  # zero gradients // Or the Gradients will be summed in the next iteration
  w.grad.zero_()

  if epoch % 2 == 0:
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5): .3f}')



Prediction before training: f(5) =  0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 3: w = 0.772, loss = 15.66018772
epoch 5: w = 1.113, loss = 8.17471695
epoch 7: w = 1.359, loss = 4.26725292
epoch 9: w = 1.537, loss = 2.22753215
epoch 11: w = 1.665, loss = 1.16278565
epoch 13: w = 1.758, loss = 0.60698116
epoch 15: w = 1.825, loss = 0.31684780
epoch 17: w = 1.874, loss = 0.16539653
epoch 19: w = 1.909, loss = 0.08633806
Prediction after training: f(5) =  9.612


> Steps used in 3rd Process:
> * Prediction: Manually
> * Gradients Computation: Autograd
> * Loss Computation: PyTorch Loss
> * Parameter updates: PyTorch Optimizer

In [4]:
# 1 ) Design model (input, output size, forward pass)
# 2 ) Construct loss and optimizer
# 3 ) Training loop
#     - forward pass: compute prediction
#     - backward pass: gradients
#     - update weights
import torch
import torch.nn as nn

# f = w * x # Neglect Bias Here
# f = 2 * x # Goal
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x: int) -> torch.float32:
  return w * x


# gradient is replaced by autograd 

print(f'Prediction before training: f(5) = {forward(5): .3f}')

# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = forward(X)

  # loss
  l = loss(Y, y_pred)

  # gradients = backward_pass
  l.backward() # Calculate the gradient dl/dw

  # update weights
  optimizer.step()

  # zero gradients // Or the Gradients will be summed in the next iteration
  optimizer.zero_grad()

  if epoch % 10 == 0:
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5): .3f}')


Prediction before training: f(5) =  0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) =  10.000


> Steps used in 4th Process:
> * Prediction: PyTorch Model
> * Gradients Computation: Autograd
> * Loss Computation: PyTorch Loss
> * Parameter updates: PyTorch Optimizer

In [19]:
# 1 ) Design model (input, output size, forward pass)
# 2 ) Construct loss and optimizer
# 3 ) Training loop
#     - forward pass: compute prediction
#     - backward pass: gradients
#     - update weights
import torch
import torch.nn as nn

# f = w * x # Neglect Bias Here
# f = 2 * x # Goal
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)
n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features
# model prediction
model = nn.Linear(input_size, output_size)


# gradient is replaced by autograd 

print(f'Prediction before training: f(5) = {model(X_test).item(): .3f}')

# Training
learning_rate = 0.01
n_iters = 1000

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = model(X)

  # loss
  l = loss(Y, y_pred)

  # gradients = backward_pass
  l.backward() # Calculate the gradient dl/dw

  # update weights
  optimizer.step()

  # zero gradients // Or the Gradients will be summed in the next iteration
  optimizer.zero_grad()

  if epoch % 10 == 0:
    [w, b] = model.parameters()
    #print(w)
    print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item(): .3f}')


4 1
Prediction before training: f(5) = -4.417
Parameter containing:
tensor([[-0.2634]], requires_grad=True)
epoch 1: w = -0.263, loss = 67.48521423
Parameter containing:
tensor([[1.6186]], requires_grad=True)
epoch 11: w = 1.619, loss = 1.74661422
Parameter containing:
tensor([[1.9218]], requires_grad=True)
epoch 21: w = 1.922, loss = 0.04576396
Parameter containing:
tensor([[1.9711]], requires_grad=True)
epoch 31: w = 1.971, loss = 0.00172560
Parameter containing:
tensor([[1.9795]], requires_grad=True)
epoch 41: w = 1.979, loss = 0.00055470
Parameter containing:
tensor([[1.9813]], requires_grad=True)
epoch 51: w = 1.981, loss = 0.00049472
Parameter containing:
tensor([[1.9821]], requires_grad=True)
epoch 61: w = 1.982, loss = 0.00046521
Parameter containing:
tensor([[1.9826]], requires_grad=True)
epoch 71: w = 1.983, loss = 0.00043811
Parameter containing:
tensor([[1.9831]], requires_grad=True)
epoch 81: w = 1.983, loss = 0.00041261
Parameter containing:
tensor([[1.9836]], requires_gr

## Custom Linear Regression Model in Step 4

In [28]:
# 1 ) Design model (input, output size, forward pass)
# 2 ) Construct loss and optimizer
# 3 ) Training loop
#     - forward pass: compute prediction
#     - backward pass: gradients
#     - update weights
import torch
import torch.nn as nn

# f = w * x # Neglect Bias Here
# f = 2 * x # Goal
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)
n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features
# model prediction
# Turn this "model = nn.Linear(input_size, output_size)"  into a custom class 
class LinearRegression(nn.Module):

  def __init__(self, input_dim, output_dim):
    super(LinearRegression, self).__init__()
    # define layers
    self.lin = nn.Linear(input_dim, output_dim)

  def forward(self, x):
    return self.lin(x)

model = LinearRegression(input_size, output_size)

print(f'Prediction before training: f(5) = {model(X_test).item(): .3f}')

# Training
learning_rate = 0.1
n_iters = 400

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = model(X)

  # loss
  l = loss(Y, y_pred)

  # gradients = backward_pass
  l.backward() # Calculate the gradient dl/dw

  # update weights
  optimizer.step()

  # zero gradients // Or the Gradients will be summed in the next iteration
  optimizer.zero_grad()

  if epoch % 50 == 0:
    [w, b] = model.parameters()
    #print(w)
    print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item(): .3f}')


4 1
Prediction before training: f(5) = -2.031
epoch 1: w = 3.005, loss = 41.11968231
epoch 51: w = 1.913, loss = 0.01163712
epoch 101: w = 1.981, loss = 0.00055687
epoch 151: w = 1.996, loss = 0.00002665
epoch 201: w = 1.999, loss = 0.00000127
epoch 251: w = 2.000, loss = 0.00000006
epoch 301: w = 2.000, loss = 0.00000000
epoch 351: w = 2.000, loss = 0.00000000
Prediction after training: f(5) =  10.000
