### 0. Import packages

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.datasets
import torchvision.transforms as transforms

### 1. Define the transform

In [4]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(0.5, 0.5)])

### 2. Load the datasets

In [5]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

### 3. Build the NN model

In [9]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        num_features = 1
        size = x.size()[1:]
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


### 4. Set the loss function and optimizer

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

### 5. Train the model


In [10]:
for epoch in range(2):
    running_loss = 0.0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        if i % 100 == 99:
            print("Epoch: {}, Iter: {}, Loss: {}".format(epoch+1, i+1, running_loss/100))
            running_loss = 0.0


Epoch: 1, Iter: 100, Loss: 2.307419183254242
Epoch: 1, Iter: 200, Loss: 2.3077608823776243
Epoch: 1, Iter: 300, Loss: 2.307215065956116
Epoch: 1, Iter: 400, Loss: 2.3073448657989504
Epoch: 2, Iter: 100, Loss: 2.3066208004951476
Epoch: 2, Iter: 200, Loss: 2.307808995246887
Epoch: 2, Iter: 300, Loss: 2.307646415233612
Epoch: 2, Iter: 400, Loss: 2.3074153232574464


### 6. Evaluate the model

In [62]:
total = 0
correct = 0

with torch.no_grad():
    for data in test_loader:
        inputs, labels = data

        outputs = net(inputs)
        # print(outputs.data.size())
        # break

        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print (100 * correct / total)

84.53
