In [1]:
# Lab 10 MNIST and softmax
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [2]:
# Select Target Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [4]:
# MNIST dataset
mnist_train = dsets.MNIST(root='../MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='../MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [5]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [6]:
# Multi-Layer NN
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()

In [16]:
# Initialization
torch.nn.init.xavier_normal_(linear1.weight)
torch.nn.init.xavier_normal_(linear2.weight)
torch.nn.init.xavier_normal_(linear3.weight)

Parameter containing:
tensor([[ 0.0388, -0.0962, -0.1696,  ..., -0.0914,  0.0589, -0.1261],
        [ 0.0305, -0.0397, -0.0082,  ...,  0.0223,  0.0059,  0.0501],
        [-0.0090, -0.0045,  0.1778,  ..., -0.0443,  0.0629, -0.0214],
        ...,
        [ 0.1113,  0.0039, -0.0598,  ...,  0.0399, -0.2394,  0.1599],
        [-0.0815,  0.1989, -0.1192,  ..., -0.1321,  0.2411, -0.0383],
        [-0.0071, -0.0031, -0.1351,  ...,  0.0471,  0.0823, -0.1021]],
       device='cuda:0', requires_grad=True)

In [17]:
#model
model = torch.nn.Sequential(linear1,relu,
                            linear2,relu,
                            linear3).to(device)

In [18]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [19]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X,Y in data_loader:
        # reshape input image into [Batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis,Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('Epoch {:d}, cost {:.9f}'.format(epoch+1,avg_cost))
print('Learning finished')

Epoch 1, cost 0.242054522
Epoch 2, cost 0.091852538
Epoch 3, cost 0.060248423
Epoch 4, cost 0.044403650
Epoch 5, cost 0.033454735
Epoch 6, cost 0.025483338
Epoch 7, cost 0.022449100
Epoch 8, cost 0.018636249
Epoch 9, cost 0.016029257
Epoch 10, cost 0.015881643
Epoch 11, cost 0.011563175
Epoch 12, cost 0.012039029
Epoch 13, cost 0.012200069
Epoch 14, cost 0.011019747
Epoch 15, cost 0.009390112
Learning finished


In [22]:
with torch.no_grad():
    X_test = mnist_test.data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy : {:7.4f} %'.format(accuracy.item()*100))
    
    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_singleData = mnist_test.data[r:r+1].view(-1,28*28).float().to(device)
    Y_singleData = mnist_test.targets[r:r+1].to(device)
    
    print('Label : ',Y_singleData.item())
    singlePrediction = model(X_singleData)
    print('Prediction : ', torch.argmax(singlePrediction, 1).item())

Accuracy : 97.7100 %
Label :  3
Prediction :  3
