
# Multi-Layer Perceptron 2

#### 2 hidden layers, designed for MNIST dataset

Import libraries

In [None]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable

Define hyperparameters

In [None]:
input_size = 784 # img_size = (28,28) ---> 28*28=784 in total
hidden_size = 500 # number of nodes at hidden layer
num_classes = 10 # number of output classes discrete range [0,9]
num_epochs = 10 # number of times which the entire dataset is passed throughout the model
batch_size = 100 # the size of input data took for one iteration
lr = 1e-3 # size of step

Download and load MNIST data

In [None]:
train_data = dsets.MNIST(root = './data', train = True,
                        transform = transforms.ToTensor(), download = True)

test_data = dsets.MNIST(root = './data', train = False,
                       transform = transforms.ToTensor())

train_gen = torch.utils.data.DataLoader(dataset = train_data,
                                             batch_size = batch_size,
                                             shuffle = True)

test_gen = torch.utils.data.DataLoader(dataset = test_data,
                                      batch_size = batch_size, 
                                      shuffle = False)

Define model class

In [None]:
class Net(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size[0])
    self.fc2 = nn.Linear(hidden_size[0], hidden_size[1])
    self.fc3 = nn.Linear(hidden_size[1], num_classes)
    self.relu = nn.ReLU()
  
  def forward(self,x):
    out = self.relu(self.fc1(x))
    out = self.relu(self.fc2(out))
    out = self.fc3(out)
    return out

Build the model

In [None]:
hidden_size=[300,100]
net = Net(input_size, hidden_size, num_classes)

if torch.cuda.is_available():
  net.cuda()

Define loss function and the optimizer

In [None]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam( net.parameters(), lr=lr)

Train the model

In [None]:
for epoch in range(num_epochs):
  for i ,(images,labels) in enumerate(train_gen):
    images = Variable(images.view(-1,28*28)).cuda()
    labels = Variable(labels).cuda()
    
    optimizer.zero_grad()
    outputs = net(images)
    loss = loss_function(outputs, labels)
    loss.backward()
    optimizer.step()
    
    if (i+1) % 100 == 0:
      print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, len(train_data)//batch_size, loss.data.item()))

Evaluate the accuracy of the model

In [None]:
correct = 0
total = 0
for images,labels in test_gen:
  images = Variable(images.view(-1,28*28)).cuda()
  labels = labels.cuda()
  
  output = net(images)
  _, predicted = torch.max(output,1)
  correct += (predicted == labels).sum()
  total += labels.size(0)

print('Accuracy of the model: %.3f %%' %((100*correct)/(total+1)))

Evaluate memory consumption and inference time

In [None]:

import torch
import torchvision.models as models
from torch.profiler import profile, record_function,ProfilerActivity

with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA],profile_memory=True, record_shapes=True) as prof:
    with record_function("model_inference"):
      for i ,(images,labels) in enumerate(train_gen):
        outputs = net(Variable(images.view(-1,28*28)).cuda())

print(prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=10))