<a href="https://colab.research.google.com/github/aish0606/DeepLearning/blob/main/Applications/App_2_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [48]:
# Since we know that the size of input image is 28x28 so input features= 28x28 = 784
input_size = 784     #Number of input neurons (image pixels)
hidden_size = 400    # hidden_neuron=input_neuron+output_neuron/2 (approx.)
output_size = 10     #Number of classes (0-9)
epochs = 10          #How many times we pass our entire dataset into our network 
batch_size = 100       #Input size of the data during one iteration
learning_rate = 0.001   #How fast we are learning

In [49]:
train_data = datasets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
test_data = datasets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

In [50]:
# When we have to load the data in batches, we use dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                          batch_size=batch_size,
                                          shuffle=True)

In [51]:
print(len(train_data))
print(len(train_loader))

60000
600


In [52]:
class Net(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.fc2 = nn.Linear(hidden_size, hidden_size) # Here we have 2 hidden layers
    self.fc3 = nn.Linear(hidden_size, output_size)
    self.relu = nn.ReLU()
    self.init_weight()
  
  def init_weight(self):
    nn.init.kaiming_normal_(self.fc1.weight)
    nn.init.kaiming_normal_(self.fc2.weight)

  def forward(self, x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    out = self.relu(out)
    out = self.fc3(out)
    return out

In [53]:
# Create an object of the class
net = Net(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
CUDA = torch.cuda.is_available()
if CUDA:
  net = net.cuda()
#The loss function. The Cross Entropy loss comes along with Softmax. 
#Therefore, no need to specify Softmax as well
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [54]:
# What is in net.parameters
print(len(list(net.parameters())))
print(net.parameters)

6
<bound method Module.parameters of Net(
  (fc1): Linear(in_features=784, out_features=400, bias=True)
  (fc2): Linear(in_features=400, out_features=400, bias=True)
  (fc3): Linear(in_features=400, out_features=10, bias=True)
  (relu): ReLU()
)>


In [55]:
# Visualize the train_loader
for i, (images, labels) in enumerate(train_loader):
  #print(images.size())
  # changing the dimension of tensors from 4D to 2D.
  images = images.view(-1, 784)
  #print(images.size())

In [56]:
# print(labels.shape)
# labels = labels.unsqueeze(1)
# print(labels.shape)
# print(labels.dtype)
# print(predicted.dtype)

In [62]:
# train the network
for epoch in range(epochs):
  correct_train = 0
  running_loss = 0
  for i, (images, labels) in enumerate(train_loader):
    #Flatten the image from size (batch,1,28,28) --> (100,1,28,28) 
    #where 1 represents the number of channels (grayscale-->1),
    # to size (100,784)
    images = images.view(-1, 784)
    if CUDA:
      images = images.cuda()
      labels = labels.cuda()
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)  # returns max_value & index of max_val

    correct_train += (labels == predicted).sum()
    #predicted = predicted.unsqueeze(1)
    loss = criterion(outputs, labels)
    running_loss += loss.item()
    # Clear the param_grad so it won't be accumulated
    optimizer.zero_grad()
    loss.backward()    #Backpropogation
    optimizer.step()   # Weight updation

  print('Epoch: {} / {}. Training Loss: {}. Training Accuracy: {}'.format(
      epoch, epochs, running_loss/len(train_loader), 100 * correct_train/len(train_data)))

print("TRAINING DONE")

Epoch: 0 / 10. Training Loss: 0.2383550605022659. Training Accuracy: 93.03500366210938
Epoch: 1 / 10. Training Loss: 0.08361979553941637. Training Accuracy: 97.42833709716797
Epoch: 2 / 10. Training Loss: 0.057043617534606406. Training Accuracy: 98.22833251953125
Epoch: 3 / 10. Training Loss: 0.03952181260644769. Training Accuracy: 98.74166870117188
Epoch: 4 / 10. Training Loss: 0.031252037803642455. Training Accuracy: 98.95000457763672
Epoch: 5 / 10. Training Loss: 0.0216145265243055. Training Accuracy: 99.28166961669922
Epoch: 6 / 10. Training Loss: 0.020103458532394144. Training Accuracy: 99.34166717529297
Epoch: 7 / 10. Training Loss: 0.01715963010124445. Training Accuracy: 99.40166473388672
Epoch: 8 / 10. Training Loss: 0.014033533431841837. Training Accuracy: 99.53666687011719
Epoch: 9 / 10. Training Loss: 0.015138558352373366. Training Accuracy: 99.5


In [68]:
print(labels)
print(len(labels))
print(predicted)
print(len(outputs))

tensor([0, 9, 1, 1, 7, 6, 7, 1, 9, 3, 7, 8, 8, 0, 9, 1, 3, 8, 3, 8, 4, 4, 5, 9,
        9, 8, 0, 7, 2, 1, 1, 4, 0, 0, 6, 7, 0, 3, 4, 8, 8, 2, 6, 6, 1, 7, 0, 0,
        1, 9, 8, 0, 4, 7, 2, 9, 0, 3, 9, 1, 9, 9, 6, 4, 0, 7, 9, 3, 4, 5, 3, 1,
        6, 5, 3, 4, 8, 5, 7, 8, 8, 7, 9, 3, 8, 7, 5, 8, 4, 6, 4, 4, 7, 8, 6, 1,
        3, 1, 3, 9], device='cuda:0')
100
tensor([0, 9, 1, 1, 7, 6, 7, 1, 9, 3, 7, 8, 8, 0, 9, 1, 3, 8, 3, 8, 4, 4, 5, 9,
        9, 8, 0, 7, 2, 1, 1, 4, 0, 0, 6, 7, 0, 3, 4, 1, 8, 2, 6, 6, 1, 7, 0, 0,
        1, 9, 8, 0, 4, 7, 2, 9, 0, 3, 9, 1, 9, 9, 6, 4, 0, 7, 9, 3, 4, 5, 3, 1,
        6, 5, 3, 4, 8, 5, 7, 8, 8, 7, 9, 3, 8, 7, 5, 8, 4, 6, 4, 4, 7, 8, 6, 1,
        3, 1, 3, 9], device='cuda:0')
100


In [69]:
# Testing 
with torch.no_grad():
  correct = 0
  for images, labels in test_loader:
    if CUDA:
      images = images.cuda()
      labels = labels.cuda()
    images = images.view(-1, 784) # 28*28 = 784
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    correct += (predicted == labels).sum()
  
  print('Accuracy for 10000 test images are {}'.format(100 * (correct / len(test_data))))

Accuracy for 10000 test images are 98.20999145507812
