In [11]:
# Lab 10 MNIST and softmax
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [12]:
# Select Target Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [13]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [14]:
# MNIST dataset
mnist_train = dsets.MNIST(root='../MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='../MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [15]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [16]:
# MNIST data image of shape 28 * 28 = 784
linear = torch.nn.Linear(784, 10, bias=True).to(device)

In [17]:
# Initialization
torch.nn.init.normal_(linear.weight)

Parameter containing:
tensor([[-1.2901,  0.3930, -0.1891,  ..., -0.1626, -0.3275,  0.5608],
        [-1.6757, -0.2454,  0.9221,  ...,  0.9559,  0.6160, -0.4226],
        [ 0.2444,  1.4810, -2.0906,  ...,  0.1206,  1.0044, -0.3737],
        ...,
        [ 0.6234,  1.8019, -2.7563,  ..., -0.5889, -0.5576,  0.7360],
        [-0.2871, -1.3313, -2.2248,  ...,  0.0309,  0.9180, -0.1482],
        [ 0.7678,  0.6624, -0.5362,  ...,  0.2338,  0.3688, -0.7182]],
       device='cuda:0', requires_grad=True)

In [18]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(linear.parameters(), lr=learning_rate)

In [19]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X,Y in data_loader:
        # reshape input image into [Batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = linear(X)
        cost = criterion(hypothesis,Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('Epoch {:d}, cost {:.9f}'.format(epoch+1,avg_cost))
print('Learning finished')

Epoch 1, cost 5.656054497
Epoch 2, cost 1.699803472
Epoch 3, cost 1.121566176
Epoch 4, cost 0.883358002
Epoch 5, cost 0.750486851
Epoch 6, cost 0.663246810
Epoch 7, cost 0.601343870
Epoch 8, cost 0.554267704
Epoch 9, cost 0.518746793
Epoch 10, cost 0.489619672
Epoch 11, cost 0.465846598
Epoch 12, cost 0.446371138
Epoch 13, cost 0.429083347
Epoch 14, cost 0.414353102
Epoch 15, cost 0.401432812
Learning finished


In [20]:
with torch.no_grad():
    X_test = mnist_test.data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    prediction = linear(X_test)
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy : {:7.4f} %'.format(accuracy.item()*100))
    
    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_singleData = mnist_test.data[r:r+1].view(-1,28*28).float().to(device)
    Y_singleData = mnist_test.targets[r:r+1].to(device)
    
    print('Label : ',Y_singleData.item())
    singlePrediction = linear(X_singleData)
    print('Prediction : ', torch.argmax(singlePrediction, 1).item())

Accuracy : 89.0600 %
Label :  8
Prediction :  3
