# Architecture 

In [47]:
import random
import torch
import torch.nn
import torch.optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import sys
import time
#
# Note: You must go to Runtime -> Change Runtime Type -> Select GPU for GPU acceleration
if torch.cuda.is_available():
  print("GPU acceleration available!")
  device = torch.device('cuda')
else:
  print("Using CPU - no GPU acceleration available")
  # Set default tensor type to CPU
  device = torch.device('cpu')
# Neural Network Model
# MNIST images are 1 channel 28x28 images
class MNIST_CNN( torch.nn.Module ):
  # Constructor
  def __init__(self):
    # Invoke constructor of nn.Model
    super(MNIST_CNN, self).__init__()


    # Initialize layers of network
    self.conv1	  = torch.nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
    self.bn1	  = torch.nn.BatchNorm2d(16)
    self.maxpool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
    

    ### Task 1: Create conv2 that the number of input channel is 16, output channel is 24, kernel size is 3, stide is 1 and padding is 1.
    ###

    ###
    self.bn2	  = torch.nn.BatchNorm2d(24)
    self.maxpool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)


    self.conv3	  = torch.nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1)
    ### Task 2: Create a batch normalization layer, please figure out what the num_features should be.
    ###

    ###

    self.conv4	  = torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.bn4	  = torch.nn.BatchNorm2d(32)

    self.conv5	  = torch.nn.Conv2d(in_channels=32, out_channels=48, kernel_size=3, stride=1, padding=1)
    self.bn5	  = torch.nn.BatchNorm2d(48)

    # Final Output Layers
    # Flatten 1x28x28 to 1x768
    # This is done in the forward(...) function
    # Create a fully connected layer followed by softmax to interpet the output of the neural network as confidences for the classes
    self.fully_connected  = torch.nn.Linear(in_features=4*4*48, out_features=10)  # tensor of length 10
    self.softmax		  = torch.nn.LogSoftmax(dim=1) # tensor of length 10, but entries are interpreted as probabilities

  def predict(self, input):
    # Do a forward pass to get a 1x10 tensor of class predcitions 
    x = self.forward( input )
    # The final layer is LogSoftmax(...), so just use argmax to get the prediction
    x = torch.argmax( x, dim=1 )
    return x

  def forward(self, input):
    x = self.conv1( input )
    x = self.bn1( x )
    x = torch.nn.functional.relu6( x )
    x = self.maxpool1( x )

    x = self.conv2( x )
    x = self.bn2( x )
    x = torch.nn.functional.relu6( x )
    x = self.maxpool2( x )

    x = self.conv3( x )
    x = self.bn3( x )
    x = torch.nn.functional.relu6( x )

    x = self.conv4( x )
    x = self.bn4( x )
    x = torch.nn.functional.relu6( x )

    x = self.conv5( x )
    x = self.bn5( x )
    x = torch.nn.functional.relu6( x )

    ### Automatically infer batch size
    x = x.view(-1, 4*4*48 )
    x = self.fully_connected( x )
    x = self.softmax( x )
    return x

GPU acceleration available!


# Dummy Input and Inference Timing

In [48]:
#
# Helper functions for inference timing
#
def StartInferenceTiming( device : torch.device ):
    return time.time_ns()

def StopInferenceTiming( start_time_ns : int, device : torch.device, ):
    if device == torch.device('cuda'):
        ### Synchronize before recording time if we are using GPU
        torch.cuda.current_stream(device).synchronize()
    ### Get duration
    duration = time.time_ns() - start_time_ns
    ### Return
    return duration

def TorchDeviceToText( device : torch.device ):
    return 'CPU' if device == torch.device('cpu') else 'GPU'

In [49]:
# Create an instance of our model
model = MNIST_CNN().to(device)

# Generate a couple dummy images and compute inference time
batch_size              = 16
number_of_image_batches = 100
number_of_images        = batch_size * number_of_image_batches
# Print number of images we are trying to process
print('Attempting to process {} images on {} device'.format(number_of_images, TorchDeviceToText(device)))


### Task 3: Please use functions start_time_ns() and StopInferenceTiming() to measure and calculate the throughput and average inference time of the whole model
### hint: The average inference time should <= 0.1 ms/image and Throughput should be >= 10000 images/s in google colab
###

# Pass random inputs through the model
for k in range(number_of_image_batches):
    # Generate a dummy input in the same shape as an MNIST image (28 x 28)
    dummy_input = torch.randn(size=(batch_size, 1, 28, 28), device=device)
    # Pass through the model
    forward_pass = model( dummy_input )

###
print('Throughput on {}: {} images/s'.format(TorchDeviceToText(device), throughput))
print('Average Inference Time on {}: {} ms/image'.format(TorchDeviceToText(device), avg_inference_time))

Attempting to process 1600 images on GPU device
Throughput on GPU: 9386.966548958018 images/s
Average Inference Time on GPU: 0.00010653068749999999 s/image
