In [2]:
import torch
import numpy as np

In [3]:
#With random or constant values:
#shape is a tuple of tensor dimensions. In the functions below, it determines the dimensionality of the output tensor.

shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.8725, 0.8792, 0.9792],
        [0.9985, 0.2746, 0.9548]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [4]:
#Tensor attributes 
tensor = torch.rand(3,4)
print(tensor.shape)
print(tensor.dtype)
print(tensor.device)

torch.Size([3, 4])
torch.float32
cpu


In [5]:
#Tensor Operations
# https://pytorch.org/docs/stable/torch.html

#Standard numpy-like indexing and slicing:
tensor = torch.ones(4, 4)
tensor[:,1] = 0
print(tensor)

#Join tensor
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])
tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [6]:
#Multiplying tensors
# This computes the element-wise product
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")
# Alternative syntax:
print(f"tensor * tensor \n {tensor * tensor}")

tensor.mul(tensor) 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor * tensor 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [7]:
#This computes the matrix multiplication between two tensors

print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")
# Alternative syntax:
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor.matmul(tensor.T) 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 

tensor @ tensor.T 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


TORCH.AUTOGRAD

torch.autograd tracks operations on all tensors which have their requires_grad flag set to True. For tensors that don’t require gradients, setting this attribute to False excludes it from the gradient computation DAG.

The output tensor of an operation will require gradients even if only a single input tensor has requires_grad=True.

In [8]:
'''TORCH.AUTOGRAD
torch.autograd is PyTorch’s automatic differentiation engine that powers neural 
network training. In this section, you will get a conceptual understanding of 
how autograd helps a neural network train. '''

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)


$$
Q = 3a^3 - b^2
$$

In [9]:
Q = 3*a**3 - b**2
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

#Gradients are now deposited in a.grad and b.grad
# check if collected gradients are correct
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


Neural Network

Neural networks can be constructed using the torch.nn package.

*Now that you had a glimpse of autograd, nn depends on autograd to define models and differentiate them. An nn.Module contains layers, and a method forward(input) that returns the output.*


A typical training procedure for a neural network is as follows:

- Define the neural network that has some learnable parameters (or weights)
- Iterate over a dataset of inputs
- Process input through the network
- Compute the loss (how far is the output from being correct)
- Propagate gradients back into the network’s parameters
- Update the weights of the network, typically using a simple update rule: weight = weight - learning_rate * gradient

---

Let’s understand PyTorch through a more practical lens. Learning theory is good, but it isn’t much use if you don’t put it into practice!

A PyTorch implementation of a neural network looks exactly like a NumPy implementation. The goal of this section is to showcase the equivalent nature of PyTorch and NumPy. For this purpose, let’s create a simple three-layered network having 5 nodes in the input layer, 3 in the hidden layer, and 1 in the output layer. We will use only one training example with one row which has five features and one target.

In [10]:
#Basic Neural net
n_input , n_hidden , n_output = 5 , 3 , 1

The first step is to do parameter initialization. Here, the weights and bias parameters for each layer are initialized as the tensor variables. Tensors are the base data structures of PyTorch which are used for building different types of neural networks. They can be considered as the generalization of arrays and matrices; in other words, tensors are N-dimensional matrices

In [11]:
## initialize tensor for inputs, and outputs 
x = torch.randn((1, n_input))
y = torch.randn((1, n_output))
print(x)
print(y)
## initialize tensor variables for weights 
w1 = torch.randn(n_input, n_hidden) # weight for hidden layer
w2 = torch.randn(n_hidden, n_output) # weight for output layer
print(w1)
print(w2)
## initialize tensor variables for bias terms 
b1 = torch.randn((1, n_hidden)) # bias for hidden layer
b2 = torch.randn((1, n_output)) # bias for output layer
print(b1)
print(b2)

tensor([[-0.0110,  0.4247,  0.6480,  3.1905, -0.1734]])
tensor([[-0.7587]])
tensor([[-2.5228,  0.4819,  0.0272],
        [ 0.6909, -1.7849, -0.2146],
        [ 0.7728,  1.4393,  1.0957],
        [ 0.4781,  1.6440,  0.8901],
        [-0.4864, -0.1480, -1.3468]])
tensor([[ 1.2191],
        [-0.7388],
        [ 2.2757]])
tensor([[-0.1492, -2.5270,  0.5076]])
tensor([[2.2772]])


After the parameter initialization step, a neural network can be defined and trained in four key steps:

- Forward Propagation
- Loss computation
- Backpropagation
- Updating the parameters

Let’s see each of these steps in a bit more detail.

***Forward Propagation:*** In this step, activations are calculated at every layer using the two steps shown below. These activations flow in the forward direction from the input layer to the output layer in order to generate the final output.

1. *z = weight * input + bias*
2. *a = activation_function (z)*

The following code blocks show how we can write these steps in PyTorch. Notice that most of the functions, such as exponential and matrix multiplication, are similar to the ones in NumPy.

In [19]:
#Sigmoid function using pytorch

def sigmoid_activation(z):
    return 1 / (1 + torch.exp(-z))

In [39]:
#activation of hidden layers
#https://pytorch.org/docs/stable/generated/torch.mm.html
z1 = torch.mm(x , w1) + b1
a1 = sigmoid_activation(z1)

print(z1)
print(a1)


tensor([[2.2795, 2.9123, 4.1975]])
tensor([[0.9072, 0.9485, 0.9852]])


In [40]:
#Activation for out put layer
z2 = torch.mm(a1 , w2) + b2
output = sigmoid_activation(z2)

print(z2)
print(output)

tensor([[4.9210]])
tensor([[0.9928]])



**Loss Computation:** In this step, the error (also called loss) is calculated in the output layer. A simple loss function can tell the difference between the actual value and the predicted value. Later, we will look at different loss functions available in PyTorch

In [41]:
loss = y - output
print(loss)

tensor([[-1.7514]])


**Backpropagation**: The aim of this step is to minimize the error in the output layer by making marginal changes in the bias and the weights. These marginal changes are computed using the derivatives of the error term.

Based on the Calculus principle of the Chain rule, the delta changes are back passed to hidden layers where corresponding changes in their weights and bias are made. This leads to an adjustment in the weights and bias until the error is minimized.

In [31]:
#Function to calculate the derivative
def sigmoid_delta(x):
  return x*(1-x)

In [42]:
#Computes derivative of error terms
delta_output = sigmoid_delta(output)
delta_hidden =  sigmoid_delta(a1)

print(delta_output)
print(delta_hidden)

tensor([[0.0072]])
tensor([[0.0842, 0.0489, 0.0146]])


In [43]:
##backpass the changes to previous layers
d_outp = loss * delta_output
loss_h = torch.mm(d_outp, w2.t())
d_hidn = loss_h*delta_hidden

print(d_outp)
print(loss_h)
print(d_hidn)

tensor([[-0.0126]])
tensor([[-0.0153,  0.0093, -0.0286]])
tensor([[-0.0013,  0.0005, -0.0004]])


Updating the Parameters: Finally, the weights and bias are updated using the delta changes received from the above backpropagation step.



In [34]:
learning_rate = 0.1

In [44]:
w2 += torch.mm(a1.t(), d_outp) * learning_rate
w1 += torch.mm(x.t(), d_hidn) * learning_rate

print(w2)
print(w1)

tensor([[ 1.2168],
        [-0.7412],
        [ 2.2732]])
tensor([[-2.5228,  0.4819,  0.0272],
        [ 0.6907, -1.7848, -0.2147],
        [ 0.7726,  1.4393,  1.0956],
        [ 0.4772,  1.6443,  0.8898],
        [-0.4864, -0.1480, -1.3468]])


In [46]:
b2 +=d_outp.sum()*learning_rate
b1 +=d_hidn.sum()*learning_rate
print(b2)
print(b1)

tensor([[2.2760]])
tensor([[-0.1521, -2.5299,  0.5047]])


Finally, when these steps are executed for a number of epochs with a large number of training examples, the loss is reduced to a minimum value. The final weight and bias values are obtained which can then be used to make predictions on the unseen data

**ConvNet**

In [73]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    #1 inpuut image channel , 6 output,3x3 square Convolution kernel
    self.conv1 = nn.Conv2d(1,6,3)
    self.conv2 = nn.Conv2d(6,16,3)

    self.fc1 = nn.Linear(16 * 6 * 6 , 120)
    self.fc2 = nn.Linear(120 , 84)
    self.fc3 = nn.Linear(84 , 10)

    def forward(self,x):
      #Max pooling over (2,2) window
      x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
      x = F.max_pool2d(F.relu(self.conv2(x)),2)
      x = x.view(-1 , self.num_flat_features(x))
      x = F.relu(self.fc1(x))
      x = F.relu(self.fc2(x))
      x = self.fc3(x)
      return x
    
    def num_flat_features(self ,x):
      size = x.size()[1:]
      num_features = 1
      for s in size:
        num_features *= s
      return num_features

net = Net()
print(net)


Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [50]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 3, 3])


Let’s try a random 32x32 input. Note: expected input size of this net (LeNet) is 32x32. To use this net on the MNIST dataset, please resize the images from the dataset to 32x32.

---

- torch.Tensor - A multi-dimensional array with support for autograd operations like backward(). Also holds the gradient w.r.t. the tensor.
- nn.Module - Neural network module. Convenient way of encapsulating parameters, with helpers for moving them to GPU, exporting, loading, etc.
- nn.Parameter - A kind of Tensor, that is automatically registered as a parameter when assigned as an attribute to a Module.
- autograd.Function - Implements forward and backward definitions of an autograd operation. Every Tensor operation creates at least a single Function node that connects to functions that created a Tensor and encodes its history.


In [None]:
'''
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss
'''