In [1]:
# Lab 10 MNIST and softmax
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [2]:
# Select Target Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
drop_prob = 0.3

In [4]:
# MNIST dataset
mnist_train = dsets.MNIST(root='../MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='../MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [5]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [6]:
# Multi-Layer NN
linear1 = torch.nn.Linear(784, 512, bias=True)
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, 512, bias=True)
linear4 = torch.nn.Linear(512, 512, bias=True)
linear5 = torch.nn.Linear(512, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=drop_prob)

In [7]:
# Initialization
torch.nn.init.xavier_normal_(linear1.weight)
torch.nn.init.xavier_normal_(linear2.weight)
torch.nn.init.xavier_normal_(linear3.weight)
torch.nn.init.xavier_normal_(linear4.weight)
torch.nn.init.xavier_normal_(linear5.weight)

Parameter containing:
tensor([[-0.0180, -0.0595,  0.0064,  ..., -0.0023, -0.0860,  0.0191],
        [-0.0545,  0.0292, -0.0036,  ..., -0.0279,  0.1061, -0.0536],
        [-0.0312, -0.0075,  0.0382,  ..., -0.0651,  0.0227, -0.1196],
        ...,
        [ 0.0175,  0.0823, -0.0075,  ..., -0.0867,  0.0492,  0.0200],
        [-0.0306, -0.0145, -0.0415,  ...,  0.0258,  0.1316, -0.0382],
        [ 0.0292,  0.0643, -0.1190,  ...,  0.0634, -0.1017,  0.0353]],
       requires_grad=True)

In [8]:
#model
model = torch.nn.Sequential(linear1,relu,
                            linear2,relu,
                            linear3,relu,
                            linear4,relu,
                            linear5).to(device)

In [9]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X,Y in data_loader:
        # reshape input image into [Batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis,Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('Epoch {:04d}, cost {:.9f}'.format(epoch+1,avg_cost))
print('Learning finished')

Epoch 1, cost 0.210628167
Epoch 2, cost 0.094539858
Epoch 3, cost 0.061651159
Epoch 4, cost 0.047363535
Epoch 5, cost 0.040801957
Epoch 6, cost 0.034543749
Epoch 7, cost 0.029978471
Epoch 8, cost 0.025865972
Epoch 9, cost 0.024721120
Epoch 10, cost 0.022390937
Epoch 11, cost 0.020143313
Epoch 12, cost 0.015671570
Epoch 13, cost 0.016479092
Epoch 14, cost 0.012650740
Epoch 15, cost 0.016869124
Learning finished


In [11]:
with torch.no_grad():
    X_test = mnist_test.data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy : {:7.4f} %'.format(accuracy.item()*100))
    
    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_singleData = mnist_test.data[r:r+1].view(-1,28*28).float().to(device)
    Y_singleData = mnist_test.targets[r:r+1].to(device)
    
    print('Label : ',Y_singleData.item())
    singlePrediction = model(X_singleData)
    print('Prediction : ', torch.argmax(singlePrediction, 1).item())

Accuracy : 97.9300 %
Label :  8
Prediction :  8
