In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets
from tqdm import tqdm
import sys

In [2]:
device = ("cuda:0" if torch.cuda.is_available() else "cpu:0")

train_mnist = datasets.MNIST(root=".", download=True, train=True)
test_mnist = datasets.MNIST(root=".", download=True, train=False)

x_train, y_train = train_mnist.data.unsqueeze(1).to(device) / 255., train_mnist.targets.to(device)
x_test, y_test = test_mnist.data.unsqueeze(1).to(device) / 255., test_mnist.targets.to(device)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

train_size, test_size = x_train.shape[0], x_test.shape[0]
BATCH_SIZE = 32
TRAIN_ITER_SIZE, TEST_ITER_SIZE = train_size // BATCH_SIZE, test_size // BATCH_SIZE

train_data = DataLoader(TensorDataset(x_train,y_train), batch_size=BATCH_SIZE, shuffle=True)
test_data = DataLoader(TensorDataset(x_test,y_test), batch_size=BATCH_SIZE, shuffle=True)

train_size, test_size, TRAIN_ITER_SIZE, TEST_ITER_SIZE

torch.Size([60000, 1, 28, 28]) torch.Size([60000]) torch.Size([10000, 1, 28, 28]) torch.Size([10000])


(60000, 10000, 1875, 312)

In [3]:
batch_no = 0
for x,y in train_data:
  #print(x.shape, y.shape)
  batch_no += 1
print("batch_no =", batch_no, "\t60000 / 32 =", 60000 / 32)

batch_no = 1875 	60000 / 32 = 1875.0


# Initializing the Network:

In [4]:
class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
    self.pooling1 = nn.MaxPool2d(kernel_size=2)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
    self.pooling2 = nn.MaxPool2d(kernel_size=2)
    self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
    self.flatten = nn.Flatten()
    self.fc1 = nn.Linear(in_features=128*7*7, out_features=128)
    self.drop = nn.Dropout(0.2)
    self.fc_out = nn.Linear(in_features=128, out_features=10)
  def forward(self,x):
    x = self.conv1(x)
    x = F.relu(x)
    x = self.pooling1(x)
    x = self.conv2(x)
    x = F.relu(x)
    x = self.pooling2(x)
    x = self.conv3(x)
    x = F.relu(x)
    x = self.flatten(x)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.drop(x)
    x = self.fc_out(x)
    x = F.log_softmax(x, dim=-1)
    return x

# Testing the Network:

In [5]:
net = Net().to(device)
net(torch.FloatTensor(2, 1, 28,28).normal_().to(device))

tensor([[-2.3185, -2.2562, -2.2732, -2.3738, -2.3899, -2.2853, -2.2469, -2.3622,
         -2.2140, -2.3215],
        [-2.2940, -2.2609, -2.2859, -2.3920, -2.3656, -2.2802, -2.2549, -2.3714,
         -2.2020, -2.3355]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)

In [6]:
net(x_train[0:2,...])

tensor([[-2.3114, -2.2375, -2.2805, -2.3709, -2.4043, -2.2771, -2.2639, -2.3396,
         -2.2205, -2.3356],
        [-2.3219, -2.2380, -2.2799, -2.3792, -2.4061, -2.2637, -2.2588, -2.3382,
         -2.2290, -2.3272]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)

In [7]:
net = Net().to(device)
criterion = nn.NLLLoss()
optim = torch.optim.Adam(net.parameters(), lr=0.001)

def evaluate(net, test_data, BATCH_SIZE, epoch_accuracies):
  with torch.no_grad():
    net.eval()
    for x_test_batch,y_test_batch in test_data:
      y_test_pred = net(x_test_batch)
      accuracy = torch.sum(y_test_batch == torch.argmax(y_test_pred,dim=-1)).item()
      epoch_accuracies.append(accuracy / BATCH_SIZE)
      break

for epoch in range(10):
  epoch_loss = 0.0
  epoch_accuracies = []
  print("Epoch ", epoch, end="")
  for i,(x_batch,y_batch) in enumerate(train_data):
    net.train()
    y_pred = net(x_batch)
    loss = criterion(y_pred, y_batch)
    epoch_loss += loss.item()
    optim.zero_grad()
    loss.backward()
    optim.step()

    if i % 10 == 0:
      evaluate(net, test_data, BATCH_SIZE, epoch_accuracies)

  print(" | Epoch_loss: "+str(epoch_loss)+" | Epoch_Accuracy: "+str(100*sum(epoch_accuracies)/len(epoch_accuracies)))

Epoch  0 | Epoch_loss: 264.4747666452313 | Epoch_Accuracy: 96.27659574468085
Epoch  1 | Epoch_loss: 93.21236207432776 | Epoch_Accuracy: 98.76994680851064
Epoch  2 | Epoch_loss: 65.56749240449062 | Epoch_Accuracy: 99.15226063829788
Epoch  3 | Epoch_loss: 49.77416495290527 | Epoch_Accuracy: 99.20212765957447
Epoch  4 | Epoch_loss: 39.455786682607595 | Epoch_Accuracy: 99.10239361702128
Epoch  5 | Epoch_loss: 34.147032016590174 | Epoch_Accuracy: 99.18550531914893
Epoch  6 | Epoch_loss: 27.92339105601087 | Epoch_Accuracy: 99.1688829787234
Epoch  7 | Epoch_loss: 26.991504425089516 | Epoch_Accuracy: 99.00265957446808
Epoch  8 | Epoch_loss: 22.622694585979872 | Epoch_Accuracy: 99.38497340425532
Epoch  9 | Epoch_loss: 17.892860483425608 | Epoch_Accuracy: 99.21875


In [8]:
with torch.no_grad():
  for x_test_batch,y_test_batch in test_data:
    y_test_pred = net(x_test_batch)
    accuracy = torch.sum(y_test_batch == torch.argmax(y_test_pred,dim=-1))
    print("Accuracy:", accuracy/BATCH_SIZE)
    break

Accuracy: tensor(1., device='cuda:0')
