In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import random

In [None]:
use_cuda = torch.cuda.is_available() # check if cuda is available
device = torch.device("cuda" if use_cuda else "cpu")

**Loading the Dataset**


In [None]:

train_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) 
                                       ])


test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])


# Load and transform data(MNIST)
MNIST_trainset = torchvision.datasets.MNIST('/tmp', train=True, download=True, transform=train_transforms)
MNIST_testset = torchvision.datasets.MNIST('/tmp', train=False, download=True, transform=test_transforms)

In [None]:
len(MNIST_trainset),len(MNIST_testset)

(60000, 10000)

The dataset contains 70,000 images, 60,000 for training and 10,000 for testing

### **Creating random number data point**

In [None]:
class RandomNumber(Dataset):
  def __init__(self, MNISTDataset):
    self.MNISTDataset = MNISTDataset

  def __getitem__(self, index):
    image = self.MNISTDataset[index][0]
    label = self.MNISTDataset[index][1]
    randomNo = random.randint(0,9)

    
    one_hotrandomNo = torch.nn.functional.one_hot(torch.arange(0, 10))#one hot encoding for random number 

    
    sum = label + randomNo
    return image, label, one_hotrandomNo[randomNo], sum

  def __len__(self):
    return len(self.MNISTDataset)

In [None]:
train_dataset = RandomNumber(MNIST_trainset)
test_dataset = RandomNumber(MNIST_testset)

In [None]:
train_loader = DataLoader(train_dataset,batch_size=128,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=128)

In [None]:
for i in train_loader:
    image, label, randomNo, sum = i
    print("IMAGES:",image.shape)
    print("Labels:", label)
    print("random_number:", randomNo)
    print("sum:", sum)
    break

IMAGES: torch.Size([128, 1, 28, 28])
Labels: tensor([6, 7, 1, 0, 8, 8, 0, 0, 4, 4, 3, 5, 3, 2, 8, 7, 4, 9, 9, 4, 1, 1, 9, 7,
        7, 9, 4, 5, 8, 1, 8, 5, 6, 0, 5, 4, 0, 8, 1, 8, 4, 8, 2, 7, 0, 5, 2, 2,
        7, 2, 4, 5, 6, 5, 4, 0, 1, 0, 1, 6, 6, 7, 0, 0, 9, 0, 3, 2, 3, 1, 7, 7,
        3, 0, 2, 1, 9, 6, 6, 4, 0, 0, 3, 4, 7, 2, 2, 9, 8, 3, 6, 3, 3, 9, 8, 3,
        6, 7, 1, 9, 9, 4, 2, 1, 0, 5, 3, 7, 9, 2, 6, 3, 2, 9, 8, 8, 4, 2, 3, 9,
        9, 4, 1, 1, 7, 9, 7, 0])
random_number: tensor([[1, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 1],
        [0, 0, 1,  ..., 0, 0, 0],
        ...,
        [1, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 1, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0]])
sum: tensor([ 6, 16,  3,  2, 10, 10,  4,  1,  9,  9, 11, 13,  7,  7, 10, 11,  7, 14,
        15, 10,  6, 10, 13, 11,  7, 16,  9, 13, 14,  7, 13,  8,  9,  0,  9, 12,
         0, 11,  6, 11,  5,  8,  6, 16,  4,  9, 10,  5, 12,  2, 11, 10, 15, 12,
         4,  2,  2,  6,  7, 12,  8, 16,  4, 

In [None]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)       #Input: 28*28*1    Output:28 * 28 * 32    RF:3 * 3  
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)      #Input: 28*28*32   Output:28 * 28 * 64    RF:5 * 5
        self.pool1 = nn.MaxPool2d(2, 2)                   #Input: 28*28*64   Output:14 * 14 * 64    RF:10*10 
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)     #Input: 14*14*64   Output:14 * 14 * 128   RF:12*12
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)    #Input: 14*14*128  Output:14 * 14 * 256   RF:14*14
        self.pool2 = nn.MaxPool2d(2, 2)                   #Input: 14*14*256  Output: 7 * 7 * 256    RF:28*28
        self.conv5 = nn.Conv2d(256, 512, 3)               #Input: 7*7*256    Output: 5 * 5 * 12     RF:30*30
        self.conv6 = nn.Conv2d(512, 1024, 3)              #Input: 5*5*12     Output: 3 * 3 * 1024   RF:32*32
        self.conv7 = nn.Conv2d(1024, 10, 3)               #Input: 3*3*1024   Output: 1 * 1 * 10     RF:34*34


        self.fc1 = nn.Linear(10+10, 128)                  #Concatenate two inputs 
        # self.fc2 = nn.Linear(128, 30)
        self.fc2 = nn.Linear(128, 19)

    def forward(self, image, randomNumber):
        x = self.conv1(image)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool2(x)
        x = self.conv5(x)
        x = F.relu(x)
        x = self.conv6(x)
        x = self.conv7(x)  
        x = x.view(-1, 10)
        
        #concatenate second input to the output from above convolution
        x1 = torch.cat((x, randomNumber), dim=1)
        
        
        x1 = F.relu(self.fc1(x1))
        x1 = self.fc2(x1)  

        

        return F.log_softmax(x), F.log_softmax(x1)

In [None]:
network = Network()

for name, param in network.named_parameters():
    print(name, '\t\t',)

conv1.weight 		
conv1.bias 		
conv2.weight 		
conv2.bias 		
conv3.weight 		
conv3.bias 		
conv4.weight 		
conv4.bias 		
conv5.weight 		
conv5.bias 		
conv6.weight 		
conv6.bias 		
conv7.weight 		
conv7.bias 		
fc1.weight 		
fc1.bias 		
fc2.weight 		
fc2.bias 		


# MODEL

In [None]:
print(network)

Network(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
  (conv6): Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1))
  (conv7): Conv2d(1024, 10, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=20, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=19, bias=True)
)


## TRAINING

In [None]:
def train(network, device, train_loader,optimizer, epoch):
    network.train()

    epoch_loss = 0

    for batch_idx, (data, target, random_number, sum) in enumerate(train_loader):

        # fetch the data and the target values
        target = target.type(torch.LongTensor)
        sum = sum.type(torch.LongTensor)

        data, target, sum = data.to(device), target.to(device), sum.to(device)
        optimizer.zero_grad()

        # forward pass the model with the data
        output, sum_output = network(data,random_number.to(device))

        # compute the loss occured
        mnist_loss = F.nll_loss(output, target)
        addition_loss = F.nll_loss(sum_output, sum)
        loss= (mnist_loss + addition_loss)/2

        epoch_loss += loss.item()

        # compute the backward gradients
        loss.backward()

        optimizer.step()

    print('Train set: Average loss: {:.2f}'.format(loss.item()))

    train_loss = epoch_loss / len(train_loader)
    return train_loss



## TESTING

In [None]:
def test(network, device, test_loader):
    # set the network in evaluation model
    network.eval()

    test_loss = 0
    
    actual_mnist = 0
    actual_addition = 0

    with torch.no_grad():
        for (data, target, random_number, sum) in test_loader:

            target = target.type(torch.LongTensor)
            sum = sum.type(torch.LongTensor)
            data, target,sum = data.to(device), target.to(device), sum.to(device)

            # forward pass the model and get the output
            output, sum_output = network(data,random_number.to(device))

            # accumulate the loss from both the network outputs
            test_loss += (F.nll_loss(output, target, reduction='sum').item() + F.nll_loss(sum_output, sum, reduction='sum').item())/2

            # get the index of max log-probability
            mnist_pred = output.argmax(dim=1, keepdim=True)
            addition_pred = sum_output.argmax(dim=1, keepdim=True)

            # increment the actual prediction count if prediction is correct
            actual_mnist += mnist_pred.eq(target.view_as(mnist_pred)).sum().item()
            actual_addition += addition_pred.eq(sum.view_as(addition_pred)).sum().item()


    test_loss /= len(test_loader.dataset)
    print(f'Test set: Average loss: {test_loss:.3f}, MNist Accuracy:{100. * actual_mnist/len(test_loader.dataset)}, Sum_Accuracy:{100. * actual_addition/len(test_loader.dataset)}')

    return test_loss

## Train and Test the network

In [44]:
network = Network().to(device)
optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=0.9)


# set the number of epochs to train for
num_epoch = 10

train_loss_values = []
valid_loss_values = []

# run it for epoch number of times
for epoch in range(1, num_epoch+1):
    print('\nEpoch {} : '.format(epoch))
    # train the model
    train_loss = train(network, device, train_loader,optimizer, epoch)
    # test the model
    test_loss = test(network, device, test_loader)

    train_loss_values.append(train_loss)
    valid_loss_values.append(test_loss)


Epoch 1 : 


  return F.log_softmax(x), F.log_softmax(x1)


Train set: Average loss: 1.20
Test set: Average loss: 1.151, MNist Accuracy:98.2, Sum_Accuracy:20.07

Epoch 2 : 
Train set: Average loss: 0.95
Test set: Average loss: 0.963, MNist Accuracy:98.95, Sum_Accuracy:44.4

Epoch 3 : 
Train set: Average loss: 0.67
Test set: Average loss: 0.659, MNist Accuracy:98.99, Sum_Accuracy:88.68

Epoch 4 : 
Train set: Average loss: 0.35
Test set: Average loss: 0.351, MNist Accuracy:99.2, Sum_Accuracy:97.44

Epoch 5 : 
Train set: Average loss: 0.18
Test set: Average loss: 0.182, MNist Accuracy:99.24, Sum_Accuracy:98.75

Epoch 6 : 
Train set: Average loss: 0.09
Test set: Average loss: 0.114, MNist Accuracy:99.17, Sum_Accuracy:98.97

Epoch 7 : 
Train set: Average loss: 0.07
Test set: Average loss: 0.084, MNist Accuracy:99.12, Sum_Accuracy:98.9

Epoch 8 : 
Train set: Average loss: 0.04
Test set: Average loss: 0.062, MNist Accuracy:99.37, Sum_Accuracy:99.22

Epoch 9 : 
Train set: Average loss: 0.03
Test set: Average loss: 0.051, MNist Accuracy:99.4, Sum_Accura