In [1]:
from core import *
import torch
import torchvision
import torch.utils.data as Data
from torch.autograd import Variable
import time

use_cuda = torch.cuda.is_available()

N_Epochs = 1000
LearningRate = 1e-3
BatchSize = 100
Download_MNIST = True   # download the dataset if you don't already have it
# can try a smaller dropout rate or a smaller learning rate

import os.path
dataset_path = os.path.join(os.path.dirname(""), 'mnist')

train_set = torchvision.datasets.MNIST(
    root=dataset_path,
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=Download_MNIST
)

train_size = train_set.train_data.size()[0]
N_Batch = train_size / BatchSize

train_loader = Data.DataLoader(dataset=train_set, batch_size=BatchSize, shuffle=True)

test_set = torchvision.datasets.MNIST(
    root=dataset_path,
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=Download_MNIST
)

test_size = test_set.test_data.size()[0]
compute_accu = lambda pred, true, digits: round((pred == true).mean() * 100, digits)

if __name__ == '__main__':

    # Initialize network
    net = BayesianNN(
      nn_input_size=784, 
      layer_config=[400, 400, 10], 
      activation_config=[ActivationType.RELU, ActivationType.RELU, ActivationType.SOFTMAX], 
      prior_type=PriorType.MIXTURE,
      task_type=TaskType.CLASSIFICATION
    )
  
    if use_cuda:
      net = net.cuda()
    
    optim = torch.optim.SGD(net.parameters(), lr=LearningRate)
    # Main training loop
    train_accu_lst = []
    test_accu_lst = []
       
    for i_ep in range(N_Epochs):

        # Training
        net.train()
        
        start = time.clock()
        
        for X, Y in train_loader:
            batch_X = Variable(X.view(BatchSize, -1))
            batch_Y = Variable(Y.view(BatchSize))
            
            if use_cuda:
              batch_X, batch_Y = batch_X.cuda(), batch_Y.cuda()

            y_pred = net(batch_X)

            # Loss and backprop
            loss, kl, _ = net.cost_function(batch_X, batch_Y, num_samples=2, num_batches = N_Batch)
            optim.zero_grad()
            loss.backward()
            optim.step()
        
        print("one epoch used {} seconds with GPU: {}".format(time.clock()-start, use_cuda))
        
        # Evaluate on training set
        net.eval()
        train_X = Variable(train_set.train_data.view(train_size, -1).type(torch.FloatTensor))
        train_Y = Variable(train_set.train_labels.view(train_size, -1))
        
        if use_cuda:
          train_X, train_Y = train_X.cuda(), train_Y.cuda()
        
        pred_class = net(train_X).data.cpu().numpy().argmax(axis=1)
        true_class = train_Y.data.cpu().numpy().ravel()

        train_accu = compute_accu(pred_class, true_class, 2)
        print('Epoch', i_ep, '|  Training Accuracy:', train_accu, '%')

        train_accu_lst.append(train_accu)

        # Evaluate on test set
        test_X = Variable(test_set.test_data.view(test_size, -1).type(torch.FloatTensor))
        test_Y = Variable(test_set.test_labels.view(test_size, -1))
        
        if use_cuda:
          test_X, test_Y = test_X.cuda(), test_Y.cuda()
        
        pred_class = net(test_X).data.cpu().numpy().argmax(axis=1)
        true_class = test_Y.data.cpu().numpy().ravel()

        test_accu = compute_accu(pred_class, true_class, 2)
        print('Epoch', i_ep, '|  Test Accuracy:', test_accu, '%')

        test_accu_lst.append(test_accu)

    # Plot
    import matplotlib.pyplot as plt
    plt.style.use('seaborn-paper')

    plt.title('Classification Accuracy on MNIST')
    plt.plot(train_accu_lst, label='Train')
    plt.plot(test_accu_lst, label='Test')
    plt.ylabel('Accuracy (%)')
    plt.xlabel('Epochs')
    plt.legend(loc='best')
    plt.tight_layout()
plt.show()

Use GPU: True
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!
one epoch used 31.169053999999996 seconds with GPU: True
Epoch 0 |  Training Accuracy: 22.55 %
Epoch 0 |  Test Accuracy: 23.26 %


KeyboardInterrupt: ignored