In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets
from tqdm import tqdm
import sys

In [2]:
device = ("cuda:0" if torch.cuda.is_available() else "cpu:0")

train_mnist = datasets.MNIST(root=".", download=True, train=True)
test_mnist = datasets.MNIST(root=".", download=True, train=False)

x_train, y_train = train_mnist.data.unsqueeze(1).to(device) / 255., train_mnist.targets.to(device)
x_test, y_test = test_mnist.data.unsqueeze(1).to(device) / 255., test_mnist.targets.to(device)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

train_size, test_size = x_train.shape[0], x_test.shape[0]
BATCH_SIZE = 32
TRAIN_ITER_SIZE, TEST_ITER_SIZE = train_size // BATCH_SIZE, test_size // BATCH_SIZE

train_data = DataLoader(TensorDataset(x_train,y_train), batch_size=BATCH_SIZE, shuffle=True)
test_data = DataLoader(TensorDataset(x_test,y_test), batch_size=BATCH_SIZE, shuffle=True)

train_size, test_size, TRAIN_ITER_SIZE, TEST_ITER_SIZE

torch.Size([60000, 1, 28, 28]) torch.Size([60000]) torch.Size([10000, 1, 28, 28]) torch.Size([10000])


(60000, 10000, 1875, 312)

In [3]:
batch_no = 0
for x,y in train_data:
  #print(x.shape, y.shape)
  batch_no += 1
print("batch_no =", batch_no, "\t60000 / 32 =", 60000 / 32)

batch_no = 1875 	60000 / 32 = 1875.0


# Initializing the Network:

In [4]:
class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
    self.pooling1 = nn.MaxPool2d(kernel_size=2)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
    self.pooling2 = nn.MaxPool2d(kernel_size=2)
    self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
    self.flatten = nn.Flatten()
    self.fc1 = nn.Linear(in_features=128*7*7, out_features=128)
    self.drop = nn.Dropout(0.2)
    self.fc_out = nn.Linear(in_features=128, out_features=10)
  def forward(self,x):
    x = self.conv1(x)
    x = F.relu(x)
    x = self.pooling1(x)
    x = self.conv2(x)
    x = F.relu(x)
    x = self.pooling2(x)
    x = self.conv3(x)
    x = F.relu(x)
    x = self.flatten(x)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.drop(x)
    x = self.fc_out(x)
    x = F.log_softmax(x, dim=-1)
    return x

# Testing the Network:

In [5]:
net = Net().to(device)
net(torch.FloatTensor(2, 1, 28,28).normal_().to(device))

tensor([[-2.2301, -2.2583, -2.3244, -2.3132, -2.2591, -2.3718, -2.3408, -2.3537,
         -2.4294, -2.1710],
        [-2.2655, -2.2716, -2.3072, -2.2975, -2.2235, -2.3447, -2.3314, -2.3489,
         -2.4236, -2.2286]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)

In [6]:
net(x_train[0:2,...])

tensor([[-2.2694, -2.2524, -2.2872, -2.3369, -2.2148, -2.3318, -2.3476, -2.3842,
         -2.3777, -2.2399],
        [-2.2701, -2.2657, -2.2987, -2.3351, -2.2231, -2.3254, -2.3417, -2.3795,
         -2.3761, -2.2251]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)

In [7]:
from torch.utils.tensorboard import SummaryWriter

# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/mnist_experiment_1')

In [8]:
net = Net().to(device)
criterion = nn.NLLLoss()
optim = torch.optim.Adam(net.parameters(), lr=0.001)

def evaluate(net, test_data, BATCH_SIZE, epoch_accuracies):
  with torch.no_grad():
    net.eval()
    for x_test_batch,y_test_batch in test_data:
      y_test_pred = net(x_test_batch)
      accuracy = torch.sum(y_test_batch == torch.argmax(y_test_pred,dim=-1)).item()
      epoch_accuracies.append(accuracy / BATCH_SIZE)
      break

for epoch in range(10):
  epoch_loss = 0.0
  epoch_accuracies = []
  print("Epoch ", epoch, end="")
  for i,(x_batch,y_batch) in enumerate(train_data):
    net.train()
    y_pred = net(x_batch)
    loss = criterion(y_pred, y_batch)
    epoch_loss += loss.item()
    optim.zero_grad()
    loss.backward()
    optim.step()

    if i % 10 == 0:
      evaluate(net, test_data, BATCH_SIZE, epoch_accuracies)

  print(" | Epoch_loss: "+str(epoch_loss)+" | Epoch_Accuracy: "+str(100*sum(epoch_accuracies)/len(epoch_accuracies)))
  writer.add_scalar("loss", epoch_loss, epoch)
  writer.add_scalar("Accuracy", 100*sum(epoch_accuracies)/len(epoch_accuracies), epoch)
writer.close()

Epoch  0 | Epoch_loss: 271.00884941924596 | Epoch_Accuracy: 96.09375
Epoch  1 | Epoch_loss: 90.9115871508111 | Epoch_Accuracy: 98.88630319148936
Epoch  2 | Epoch_loss: 66.83527442317427 | Epoch_Accuracy: 98.90292553191489
Epoch  3 | Epoch_loss: 49.27120221757468 | Epoch_Accuracy: 99.13563829787235
Epoch  4 | Epoch_loss: 44.43723610584493 | Epoch_Accuracy: 99.2686170212766
Epoch  5 | Epoch_loss: 29.995088642527037 | Epoch_Accuracy: 99.13563829787235
Epoch  6 | Epoch_loss: 27.822682137853917 | Epoch_Accuracy: 99.28523936170212
Epoch  7 | Epoch_loss: 24.708456984821083 | Epoch_Accuracy: 99.20212765957447
Epoch  8 | Epoch_loss: 20.87110854722765 | Epoch_Accuracy: 99.23537234042553
Epoch  9 | Epoch_loss: 19.32233928172807 | Epoch_Accuracy: 99.15226063829788


In [16]:
# Uncomment to see the plots
#%load_ext tensorboard

#%tensorboard --logdir="runs/"

In [10]:
with torch.no_grad():
  for x_test_batch,y_test_batch in test_data:
    y_test_pred = net(x_test_batch)
    accuracy = torch.sum(y_test_batch == torch.argmax(y_test_pred,dim=-1))
    print("Accuracy:", accuracy/BATCH_SIZE)
    break

Accuracy: tensor(1., device='cuda:0')


In [12]:
# del model
model = Net()

In [13]:
torch.save(net, "mnist_net.h5")

In [14]:
model = torch.load("mnist_net.h5")

In [15]:
with torch.no_grad():
  for x_test_batch,y_test_batch in test_data:
    y_test_pred = model(x_test_batch)
    accuracy = torch.sum(y_test_batch == torch.argmax(y_test_pred,dim=-1))
    print("Accuracy:", accuracy/BATCH_SIZE)
    break

Accuracy: tensor(0.9688, device='cuda:0')
