In [1]:
# Lab 10 MNIST and softmax
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [2]:
# Select Target Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [4]:
# MNIST dataset
mnist_train = dsets.MNIST(root='../MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='../MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [5]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [6]:
# Multi-Layer NN
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()

In [7]:
# Initialization
torch.nn.init.normal_(linear1.weight)
torch.nn.init.normal_(linear2.weight)
torch.nn.init.normal_(linear3.weight)

Parameter containing:
tensor([[-0.6645,  0.6646,  0.7159,  ..., -0.3040, -0.8945, -0.7977],
        [-1.2045,  2.4545, -1.8073,  ..., -1.7139, -0.0451,  0.3864],
        [ 0.3414,  0.3114, -1.9218,  ..., -0.4525, -0.6849,  0.9663],
        ...,
        [ 1.3915, -0.4048,  0.2338,  ..., -0.8257, -0.8397,  1.9816],
        [ 1.0414,  0.2130, -0.0417,  ...,  1.7541, -0.6454, -0.0821],
        [-0.1162, -1.2692,  0.8201,  ..., -0.8303, -0.5022,  0.0583]],
       requires_grad=True)

In [8]:
#model
model = torch.nn.Sequential(linear1,relu,
                            linear2,relu,
                            linear3).to(device)

In [9]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X,Y in data_loader:
        # reshape input image into [Batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis,Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('Epoch {:d}, cost {:.9f}'.format(epoch+1,avg_cost))
print('Learning finished')

Epoch 1, cost 129.338562012
Epoch 2, cost 36.177036285
Epoch 3, cost 22.967609406
Epoch 4, cost 16.025194168
Epoch 5, cost 11.624404907
Epoch 6, cost 8.616628647
Epoch 7, cost 6.342143536
Epoch 8, cost 4.774716854
Epoch 9, cost 3.567008257
Epoch 10, cost 2.752093315
Epoch 11, cost 2.117019892
Epoch 12, cost 1.631757498
Epoch 13, cost 1.238214016
Epoch 14, cost 0.947955966
Epoch 15, cost 0.823421955
Learning finished


In [13]:
with torch.no_grad():
    X_test = mnist_test.data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy : {:7.4f} %'.format(accuracy.item()*100))
    
    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_singleData = mnist_test.data[r:r+1].view(-1,28*28).float().to(device)
    Y_singleData = mnist_test.targets[r:r+1].to(device)
    
    print('Label : ',Y_singleData.item())
    singlePrediction = model(X_singleData)
    print('Prediction : ', torch.argmax(singlePrediction, 1).item())

Accuracy : 94.4100 %
Label :  5
Prediction :  5
