In [1]:
from __future__ import print_function, division

import torch

In [2]:
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import numpy as np

plt.ion()

In [3]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms

In [5]:
from matplotlib.pyplot import imshow, imsave
%matplotlib inline

In [7]:
class MLP_MNIST(nn.Module):
    def __init__(self, num_classes):
        super(MLP_MNIST, self).__init__()
        self.mlp = nn.Sequential(
                    nn.Linear(28*28, 300),
                    nn.ReLU(),
                    nn.Linear(300,100),
                    nn.ReLU(),
                    nn.Linear(100, num_classes),
        )
        
    def forward(self, x):
        y_ = x.view(x.size(0), -1)
        y_ = self.mlp(y_)
        return y_

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [9]:
transform = transforms.Compose([transforms.ToTensor(),
                                   transforms.Normalize(mean=(.5,), std=(.5,))])

In [10]:
mnist_train = datasets.MNIST(root='data/', train=True, transform=transform, download=True)
mnist_test = datasets.MNIST(root='data/', train=False, transform=transform, download=True)

In [11]:
mnist_train[0][0].shape

torch.Size([1, 28, 28])

In [12]:
batch_size=20

In [13]:
train_loader = DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(dataset=mnist_test, batch_size=batch_size, shuffle=False, drop_last=True)

In [22]:
model = MLP_MNIST(num_classes=10).to(device)
print(device)

cuda:0


In [23]:
print(model)

MLP_MNIST(
  (mlp): Sequential(
    (0): Linear(in_features=784, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=100, bias=True)
    (3): ReLU()
    (4): Linear(in_features=100, out_features=10, bias=True)
  )
)


In [24]:
learning_rate=0.001
epochs=5
step=0

In [25]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)

In [28]:
%%time
best_acc = 0
best_model_weights = copy.deepcopy(model.state_dict())

for i in range(epochs):
    for idx, (images, labels) in enumerate(train_loader):
        x, y = images.to(device), labels.to(device)
        yhat = model(x)
        
        loss = criterion(yhat, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step==0 or (step+1)%500 ==0:
            print('{}th epoch, {}th step: \n - loss: {}'.format(i+1, step+1, loss.item()))
            
        if step==0 or (step+1)%1000 ==0:
            model.eval()
            correct = 0
            with torch.no_grad():
                for idx, (images, labels) in enumerate(test_loader):
                    x, y = images.to(device), labels.to(device)
                    yhat = model(x)
                    _, predicted = torch.max(yhat, dim=1)
                    correct += torch.sum(predicted==y).item()
            acc = correct/len(mnist_test)*100
            print('*'*30)
            print("Test Accuracy = {:.2f}%".format(acc))
            print("*"*30)
            if acc>best_acc:
                best_acc = acc
                best_model_weights = copy.deepcopy(model.state_dict())
            model.load_state_dict(best_model_weights)
            
            model.train()
        
        step += 1

1th epoch, 30500th step: 
 - loss: 0.17700496315956116
1th epoch, 31000th step: 
 - loss: 0.0370250940322876
******************************
Test Accuracy = 96.75%
******************************
1th epoch, 31500th step: 
 - loss: 0.008609580807387829
1th epoch, 32000th step: 
 - loss: 0.03285803645849228
******************************
Test Accuracy = 96.55%
******************************
1th epoch, 32500th step: 
 - loss: 0.2144518345594406
1th epoch, 33000th step: 
 - loss: 0.03577850013971329
******************************
Test Accuracy = 97.07%
******************************
2th epoch, 33500th step: 
 - loss: 0.03011343441903591
2th epoch, 34000th step: 
 - loss: 0.03922879695892334
******************************
Test Accuracy = 96.78%
******************************
2th epoch, 34500th step: 
 - loss: 0.04423928260803223
2th epoch, 35000th step: 
 - loss: 0.03430182859301567


KeyboardInterrupt: 

In [29]:
model.eval()

correct = 0

with torch.no_grad():
    for idx, (images, labels) in enumerate(test_loader):
        x, y = images.to(device), labels.to(device)
        yhat = model(x)
        
        _, predicted = torch.max(yhat, dim=1)
        correct += torch.sum(predicted==y).item()
        
acc = correct/len(mnist_test)*100

print("Final Test Accuracy: {:.2f}%".format(acc))

Final Test Accuracy: 97.11%
