In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import random

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [19]:
training_epoch = 10
batch_size = 100
learning_rate = 1e-3
drop_prob=0.3

In [20]:
mnist_train = dsets.MNIST(root = "MNIST_data/",
                                                  train = True,
                                                  transform = transforms.ToTensor(),
                                                  download=False)
mnist_test = dsets.MNIST(root = "MNIST_data/",
                                                  train = False,
                                                  transform = transforms.ToTensor(),
                                                  download=False)

In [21]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                                                                 batch_size = batch_size,
                                                                                 shuffle = True,
                                                                                 drop_last=True) #60.000 images, 7.000 batch_size => 4.000 images are left

In [22]:
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 128, bias=True)
linear3 = torch.nn.Linear(128, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p = drop_prob)

In [23]:
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.0788,  0.1275, -0.0620,  ..., -0.1377,  0.0723, -0.0535],
        [ 0.1084, -0.0643,  0.0442,  ...,  0.1849,  0.0869,  0.2071],
        [-0.0723, -0.0579,  0.0255,  ...,  0.0880,  0.0079,  0.1402],
        ...,
        [-0.1238, -0.0863,  0.2016,  ...,  0.0507, -0.0489,  0.0633],
        [ 0.1992, -0.1147,  0.1513,  ..., -0.0129, -0.0867, -0.1638],
        [-0.0513,  0.0727,  0.1375,  ...,  0.1846,  0.0769,  0.1023]],
       requires_grad=True)

In [24]:
model = torch.nn.Sequential(linear1, relu, dropout,
                                                       linear2, relu, dropout,
                                                       linear3).to(device)

In [25]:
loss = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [28]:
total_batch = len(data_loader)
model.train()
for epoch in range(training_epoch):
    avg_cost = 0
    
    for X, Y in data_loader:
        X=X.view(-1, 28*28).to(device)
        Y=Y.to(device)
        
        optimizer.zero_grad()
        hypothesis=model(X)
        cost=loss(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost/total_batch
    print('Epoch:', '%04d' % (epoch + 1), 'cost = ', '{:.9f}'.format(avg_cost))
print('Done')

Epoch: 0001 cost =  0.051781423
Epoch: 0002 cost =  0.046194252
Epoch: 0003 cost =  0.043967646
Epoch: 0004 cost =  0.042875208
Epoch: 0005 cost =  0.040348522
Epoch: 0006 cost =  0.035939004
Epoch: 0007 cost =  0.036034819
Epoch: 0008 cost =  0.036279093
Epoch: 0009 cost =  0.032862190
Epoch: 0010 cost =  0.033254270
Done


In [29]:
with torch.no_grad():
    model.eval()
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuray:', accuracy.item())
    
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())



Accuray: 0.9821999669075012
Label:  2
Prediction:  2
