In [0]:
import torch
import torchvision

import torchvision.transforms as transforms
import torchvision.models as models

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import matplotlib.pyplot as mlt

In [0]:
# Transform
transform = transforms.Compose([transforms.Resize((32, 32)),
                               transforms.ToTensor()])

In [0]:
# Data
trainset = torchvision.datasets.MNIST(root='./data', train = True, download = True, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 256, shuffle = True, num_workers=8)

testset = torchvision.datasets.MNIST(root='./data', train = False, download = True, transform = transform)
testloader = torch.utils.data.DataLoader(testset, batch_size = 1024, shuffle = True, num_workers=8)


In [0]:
# Network

class LeNet(nn.Module):
    """
    Input - 1x32x32
    C1 - 6@28x28 (5x5 kernel)
    tanh
    S2 - 6@14x14 (2x2 kernel, stride 2) Subsampling
    C3 - 16@10x10 (5x5 kernel, complicated shit)
    tanh
    S4 - 16@5x5 (2x2 kernel, stride 2) Subsampling
    C5 - 120@1x1 (5x5 kernel)
    F6 - 84
    tanh
    F7 - 10 (Output)
    """
    def __init__(self):
      
      super(LeNet, self).__init__()
      '''
      self.conv= nn.Sequential(OrderedDict([
        ('c1', nn.Conv2d(1, 6, kernel_size=(5, 5))),
        ('relu1', F.relu()),
        ('s2', nn.MaxPool2d(kernel_size(2, 2), stride=2)),
        ('c3', nn.Conv2d(6, 16, kernel_size=(5, 5))),
        ('relu3', F.relu()),
        ('s4', nn.MaxPool2d(kernel_size(2, 2), stride=2)),
        ('c5', nn.Conv2d(16, 120, kernel_size=(5, 5))),
        ('relu5', F.relu())
      ]))
      
      self.fc = nn.Sequential(OrderedDict([
        ('fc6', nn.Linear(120, 84)),
        ('relu6', F.relu()),
        ('fc7', nn.Linear(84, 10)),
        ('sig7', nn.LogSoftmax(dim=-1))
      ]))
      '''
      
      self.conv1 = nn.Conv2d(1, 6, kernel_size=(5, 5))
      self.conv2 = nn.Conv2d(6, 16, kernel_size=(5, 5))
      self.conv3 = nn.Conv2d(16, 120, kernel_size=(5, 5))
      
      self.pool = nn.AvgPool2d(kernel_size=(2, 2), stride=2)
      
      self.fc1 = nn.Linear(120, 84)
      self.fc2 = nn.Linear(84, 10)
      
      
    def forward(self, x):
      '''
      x = self.conv(x)
      x = x.view(x.size(0), -1)
      x = self.fc(x)
      return x
      '''
      x = self.pool(F.relu(self.conv1(x)))
      x = self.pool(F.relu(self.conv2(x)))
      x = F.relu(self.conv3(x))
      
      x = x.view(x.size(0), -1)
      x = F.relu(self.fc1(x))
      sf = nn.LogSoftmax(dim=-1)
      
      return sf(x)
      
model = LeNet()

In [0]:
# Loss function & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=2e-3)

In [0]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

model.to(device)

cuda:0


LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=120, out_features=84, bias=True)
  (fc2): Linear(in_features=84, out_features=10, bias=True)
)

In [0]:
# Training
epochs = 2

for epoch in range(epochs):
  print('\n===> Epoch [%d/%d]' % (epoch+1, epochs))
  
  running_loss = 0.0
  
  for i, data in enumerate(trainloader,0):
    
    # Data Load
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    
    # Zero the parameter gradients
    optimizer.zero_grad()
    
    # Forward
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    
    # Backward
    loss.backward()
    
    # Optimize
    optimizer.step()
    
    # Print statistics
    running_loss += loss.item()
    
    # Print every 2000 mini-batches
    if i % 10 == 0 :
      print('      - Iteration [%5d / %5d] --- Loss: %.3f' %
            (i, len(trainloader), running_loss / 2000))
      running_loss = 0.0


===> Epoch [1/2]
      - Iteration [    0 /   235] --- Loss: 0.000
      - Iteration [   10 /   235] --- Loss: 0.003
      - Iteration [   20 /   235] --- Loss: 0.002
      - Iteration [   30 /   235] --- Loss: 0.003
      - Iteration [   40 /   235] --- Loss: 0.002
      - Iteration [   50 /   235] --- Loss: 0.002
      - Iteration [   60 /   235] --- Loss: 0.002
      - Iteration [   70 /   235] --- Loss: 0.002
      - Iteration [   80 /   235] --- Loss: 0.002
      - Iteration [   90 /   235] --- Loss: 0.002
      - Iteration [  100 /   235] --- Loss: 0.002
      - Iteration [  110 /   235] --- Loss: 0.003
      - Iteration [  120 /   235] --- Loss: 0.003
      - Iteration [  130 /   235] --- Loss: 0.002
      - Iteration [  140 /   235] --- Loss: 0.002
      - Iteration [  150 /   235] --- Loss: 0.003
      - Iteration [  160 /   235] --- Loss: 0.002
      - Iteration [  170 /   235] --- Loss: 0.002
      - Iteration [  180 /   235] --- Loss: 0.003
      - Iteration [  190 /   235