In [168]:
# import libraries
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
import os
import matplotlib.pyplot as plt
from time import perf_counter

### PyTorch is an open source deep learning framework built to be flexible framework ###

- Most pytorch codes are imperative that tell what and tell how. Imperative program performs computation at runtime
- Imperative programs are more flexiblee because of python
- Dynamic computation graph are built and rebuilt as necessary at runtime.


## Using Torch backend ##

In [169]:
root = './MNIST/raw'
if not os.path.exists(root):
    os.mkdir(root)

The torchvision dataset images are in the range [0,1], and each channel of the tensor images needs to be normalized inthe range [-1,1] using mean and standard deviation 

In [None]:
# PyTorch's DataLoader contain a few interesting options other than the dataset and batch size. For example we could use num_workers > 1 to use subprocesses to asynchronously load data or using pinned RAM
#DataLoader combines a dataset and a sampler to provides an iterable over MNIST dataset.
#train set is an instance of  MNIST in this calss that also lives inside torchvision package

 For dataloader we are using dataloader constructor and passing train set along with batch size. Batch_size, which denotes the number of samples contained in each generated batch
num_workers, which denotes the number of processes that generate batches in parallel. 

In [170]:

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20

#To pass our data into our PyTorch models we need to convert it to a PyTorch Dataset. A Tensor Dataset in this case.
# convert data to torch.FloatTensor
transform = transforms.ToTensor()

# choose the training and test datasets
train_data = datasets.MNIST(root=root, train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root=root, train=False,
                                  download=True, transform=transform)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)

## Build the Model using PyTorch ##

Here we are inheriting from nn.Module. Combined with super().init() this creates a class that tracks the architecture and provides a lot of useful methods and attributes. 
It is mandatory to inherit from nn.Module when you are creating a class for your network
 The module automatically creates the weight and bias tensors which we will use in the forward method


In [173]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #input Image -28 * 28 * 1               
        self.conv1=nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5)
        #self.conv1 = nn.Conv2d(1, 32, 5)
        self.pool=nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2=nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5)
        self.conv3=nn.Conv2d(in_channels=32,out_channels= 64, kernel_size=3)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1=nn.Linear(in_features=1*1*64, out_features=120)
        self.fc2=nn.Linear(in_features=120, out_features=84)
        self.fc3=nn.Linear(in_features=84, out_features=10)
        
    def forward(self,x):
        x=self.pool(F.relu(self.conv1(x)))
        x=self.pool(F.relu(self.conv2(x)))
        x=self.pool(F.relu(self.conv3(x))) 

        x = x.view(-1, 64 * 1 * 1)

        x = self.dropout(F.relu(self.fc1(x)))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
       
net=Net()
net

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): Linear(in_features=64, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

## Define the Loss function and Optimizer ## 

 Calculate the loss between the true label and the predicted label from the neural network

In [174]:
import torch.optim as optim

criterion= nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(), lr=0.001)


## Train the Model ##

 Perform gradient calculations using backward() and then update the weights.

In [183]:
t1_start = perf_counter()
for epoch in range(2):
    running_loss=0.0
    for i, data in enumerate(train_loader,0):
        images, labels=data
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward 
        outputs= net(images)
        #calculate the loss between the target and the actuals
        loss= criterion(outputs, labels)
        #Gradient calculation uisng backward pass
        loss.backward()
        # update the weights
        optimizer.step()
#calculate loss
        running_loss+=loss.item()
        if i%1875==1874:
            print('epoch %d - image count %5d - Loss %.3f' % (epoch +1, i+1, running_loss/1875))      
            running_loss = 0.0
t1_end = perf_counter()
print("Time for training using PyTorch %f" %(t1_end-t1_start))

epoch 1 - image count  1875 - Loss 0.049
epoch 2 - image count  1875 - Loss 0.036
Time for training using PyTorch 47.722747


In [184]:
torch.save(net.state_dict(), 'mnist_pyt.pt')

## Evaluate the Model ##

In [185]:
correct=0
total=0
t1_start=perf_counter()

with torch.no_grad():
    
    '''Disabling gradient calculation is useful for inference, when you are sure that you will not call Tensor.backward(). 
    It will reduce memory consumption for computations that would otherwise have requires_grad=True.'''
for  data in test_loader:
        images. lables=data
        outputs= net(images)
        _,pred= torch.max(outputs,1)
        total+=labels.size(0)
        correct += (pred == labels).sum().item()
    
t1_end=perf_counter()
print("Eval accuracy using PyTorch is %.2f and execution time %.2f seconds" %((100 * (correct / total)), (t1_end-t1_start)))

Eval accuracy using PyTorch is 99.98 and execution time 2.48 seconds
