In [50]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary

In [51]:
# device config
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# hyper-param
num_classes = 10
learning_rate = 0.001 
batch_size = 100
epochs = 5

cuda:0


In [52]:
# Data Loader
train_data = torchvision.datasets.MNIST(root='../sample_data/MNIST/',
                      train=True,
                      transform=transforms.ToTensor(),
                      download=True)

test_data = torchvision.datasets.MNIST(root='../sample_data/MNIST/',
                     train=False,
                     transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_data,
                    shuffle=True,
                    batch_size=batch_size)

test_loader = torch.utils.data.DataLoader(dataset=test_data,
                   batch_size=batch_size,
                   shuffle=False)

In [53]:
class ConvNet(nn.Module):
  def __init__(self, num_classes):
    super(ConvNet, self).__init__()
    # self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=2)
    # self.batchnorm1 = nn.BatchNorm2d(16)
    # self.relu1 = nn.ReLU()
    # self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
    # self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2)
    # self.batchnorm2 = nn.BatchNorm2d(32)
    # self.relu2 = nn.ReLU()
    # self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)

    # use nn.Sequential
    self.conv1 = nn.Sequential(
        nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.fc = nn.Linear(7*7*32, num_classes)

  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = x.reshape(x.size(0), -1)
    x = self.fc(x)
    return x

In [61]:
net = ConvNet(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
summary(net, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             416
       BatchNorm2d-2           [-1, 16, 28, 28]              32
              ReLU-3           [-1, 16, 28, 28]               0
         MaxPool2d-4           [-1, 16, 14, 14]               0
            Conv2d-5           [-1, 32, 14, 14]          12,832
       BatchNorm2d-6           [-1, 32, 14, 14]              64
              ReLU-7           [-1, 32, 14, 14]               0
         MaxPool2d-8             [-1, 32, 7, 7]               0
            Linear-9                   [-1, 10]          15,690
Total params: 29,034
Trainable params: 29,034
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.47
Params size (MB): 0.11
Estimated Total Size (MB): 0.58
---------------------------------------------

In [62]:
total_step = len(train_loader)
for epoch in range(epochs):
  for i, data in enumerate(train_loader):
    img = data[0].to(device)
    label = data[1].to(device)
    
    out = net(img)
    loss = criterion(out, label)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:
      print('Epoch[{}/{}], Step[{}/{}], Loss:{:4f}'.
         format(epoch+1, epochs, i+1, total_step, loss.item()))
print("finish traing!")


Epoch[1/5], Step[100/600], Loss:0.148777
Epoch[1/5], Step[200/600], Loss:0.079981
Epoch[1/5], Step[300/600], Loss:0.130312
Epoch[1/5], Step[400/600], Loss:0.034636
Epoch[1/5], Step[500/600], Loss:0.100923
Epoch[1/5], Step[600/600], Loss:0.028652
Epoch[2/5], Step[100/600], Loss:0.116672
Epoch[2/5], Step[200/600], Loss:0.015857
Epoch[2/5], Step[300/600], Loss:0.102708
Epoch[2/5], Step[400/600], Loss:0.081026
Epoch[2/5], Step[500/600], Loss:0.058997
Epoch[2/5], Step[600/600], Loss:0.043007
Epoch[3/5], Step[100/600], Loss:0.011813
Epoch[3/5], Step[200/600], Loss:0.008520
Epoch[3/5], Step[300/600], Loss:0.018788
Epoch[3/5], Step[400/600], Loss:0.073382
Epoch[3/5], Step[500/600], Loss:0.025965
Epoch[3/5], Step[600/600], Loss:0.040716
Epoch[4/5], Step[100/600], Loss:0.007690
Epoch[4/5], Step[200/600], Loss:0.024133
Epoch[4/5], Step[300/600], Loss:0.008501
Epoch[4/5], Step[400/600], Loss:0.043896
Epoch[4/5], Step[500/600], Loss:0.045060
Epoch[4/5], Step[600/600], Loss:0.004891
Epoch[5/5], Step

In [63]:
net.eval()
with torch.no_grad():
  correct = 0
  total = 0
  for i, data in enumerate(test_loader):
    img = data[0].to(device)
    label = data[1].to(device)
    out = net(img)
    # if i==0:
    #   print(out.shape)
    _, pred = torch.max(out, 1)
    total += label.size(0)
    correct += (pred == label).sum().item()
  print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 99.04 %
